sys-clk-OC: Separate governor into multiple modules that could be disabled individually
This commit is contained in:
@@ -135,7 +135,7 @@ Copy the `atmosphere`, and `switch` folders at the root of your sdcard, overwrit
|
||||
* sys-clk overlay (accessible from anywhere by invoking the [Tesla menu](https://gbatemp.net/threads/tesla-the-nintendo-switch-overlay-menu.557362/))
|
||||
|
||||
`/switch/.overlays/sys-clk-overlay.ovl`
|
||||
|
||||
|
||||
* sys-clk core sysmodule
|
||||
|
||||
`/atmosphere/contents/00FF0000636C6BFF/exefs.nsp`
|
||||
@@ -143,7 +143,7 @@ Copy the `atmosphere`, and `switch` folders at the root of your sdcard, overwrit
|
||||
|
||||
## Config
|
||||
|
||||
Presets can be customized by adding them to the ini config file located at `/config/sys-clk/config.ini`, using the following template for each app
|
||||
Presets can be customized by adding them to the ini config file located at `/config/sys-clk/config.ini`, using the following template for each app
|
||||
|
||||
```
|
||||
[Application Title ID]
|
||||
@@ -162,14 +162,14 @@ handheld_charging_official_mem=
|
||||
handheld_cpu=
|
||||
handheld_gpu=
|
||||
handheld_mem=
|
||||
governor_disabled=
|
||||
governor_config=
|
||||
```
|
||||
|
||||
* Replace `Application Title ID` with the title id of the game/application you're interested in customizing.
|
||||
A list of games title id can be found in the [Switchbrew wiki](https://switchbrew.org/wiki/Title_list/Games).
|
||||
* Frequencies are expressed in mhz, and will be scaled to the nearest possible values, described in the clock table below.
|
||||
* If any key is omitted, value is empty or set to 0, it will be ignored, and stock clocks will apply.
|
||||
* If charging, sys-clk will look for the frequencies in that order, picking the first found
|
||||
* If charging, sys-clk will look for the frequencies in that order, picking the first found
|
||||
1. Charger specific config (USB or Official) `handheld_charging_usb_X` or `handheld_charging_official_X`
|
||||
2. Non specific charging config `handheld_charging_X`
|
||||
3. Handheld config `handheld_X`
|
||||
|
||||
@@ -63,7 +63,6 @@ typedef struct
|
||||
{
|
||||
bool systemCoreBoostCPU;
|
||||
bool batteryChargingDisabledOverride;
|
||||
bool governor;
|
||||
SysClkProfile realProfile;
|
||||
} SysClkOcExtra;
|
||||
|
||||
@@ -72,7 +71,20 @@ typedef struct
|
||||
uint32_t values[20];
|
||||
} SysClkFrequencyTable;
|
||||
|
||||
uint32_t* GetTable(SysClkModule module);
|
||||
uint32_t* GetModuleFreqTable(SysClkModule module);
|
||||
uint32_t GetModuleMaximumFreq(SysClkModule module);
|
||||
|
||||
typedef enum {
|
||||
SysClkOcGovernorConfig_AllDisabled = 0,
|
||||
SysClkOcGovernorConfig_CPU_Shift = 0,
|
||||
SysClkOcGovernorConfig_CPUOnly = 1,
|
||||
SysClkOcGovernorConfig_CPU = 1 << SysClkOcGovernorConfig_CPU_Shift,
|
||||
SysClkOcGovernorConfig_GPU_Shift = 1 << SysClkOcGovernorConfig_CPU_Shift,
|
||||
SysClkOcGovernorConfig_GPUOnly = 1 << SysClkOcGovernorConfig_GPU_Shift,
|
||||
SysClkOcGovernorConfig_GPU = 1 << SysClkOcGovernorConfig_GPU_Shift,
|
||||
SysClkOcGovernorConfig_Default = 3,
|
||||
SysClkOcGovernorConfig_Mask = 3,
|
||||
} SysClkOcGovernorConfig;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
@@ -80,7 +92,7 @@ typedef struct
|
||||
uint32_t mhz[(size_t)SysClkProfile_EnumMax * (size_t)SysClkModule_EnumMax];
|
||||
uint32_t mhzMap[SysClkProfile_EnumMax][SysClkModule_EnumMax];
|
||||
};
|
||||
bool governorDisabled;
|
||||
SysClkOcGovernorConfig governorConfig;
|
||||
} SysClkTitleProfileList;
|
||||
|
||||
#define SYSCLK_GLOBAL_PROFILE_TID 0xA111111111111111
|
||||
|
||||
@@ -76,7 +76,7 @@ uint32_t g_freq_table_gpu_hz[] = {
|
||||
0,
|
||||
};
|
||||
|
||||
uint32_t* GetTable(SysClkModule module) {
|
||||
uint32_t* GetModuleFreqTable(SysClkModule module) {
|
||||
switch (module) {
|
||||
case SysClkModule_CPU:
|
||||
return &g_freq_table_cpu_hz[0];
|
||||
@@ -88,3 +88,12 @@ uint32_t* GetTable(SysClkModule module) {
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
uint32_t GetModuleMaximumFreq(SysClkModule module) {
|
||||
uint32_t* p = GetModuleFreqTable(module);
|
||||
if (p == NULL || *p == 0)
|
||||
return UINT32_MAX;
|
||||
|
||||
while (*(++p));
|
||||
return *(--p);
|
||||
}
|
||||
|
||||
@@ -73,14 +73,29 @@ void AppProfileGui::listUI()
|
||||
bool globalGovernorEnabled = configList->values[SysClkConfigValue_GovernorExperimental];
|
||||
|
||||
if (globalGovernorEnabled) {
|
||||
tsl::elm::ToggleListItem* governorToggle = new tsl::elm::ToggleListItem("Disable governor", this->profileList->governorDisabled);
|
||||
governorToggle->setStateChangedListener([this](bool state) {
|
||||
this->profileList->governorDisabled = state;
|
||||
tsl::elm::ToggleListItem* cpuGovernorToggle = new tsl::elm::ToggleListItem("CPU Freq Governor",
|
||||
(this->profileList->governorConfig >> SysClkOcGovernorConfig_CPU_Shift) & 1);
|
||||
cpuGovernorToggle->setStateChangedListener([this](bool state) {
|
||||
this->profileList->governorConfig =
|
||||
SysClkOcGovernorConfig((this->profileList->governorConfig & SysClkOcGovernorConfig_GPUOnly) | state << SysClkOcGovernorConfig_CPU_Shift);
|
||||
|
||||
Result rc = sysclkIpcSetProfiles(this->applicationId, this->profileList);
|
||||
if (R_FAILED(rc))
|
||||
FatalGui::openWithResultCode("sysclkIpcSetProfiles", rc);
|
||||
});
|
||||
this->listElement->addItem(governorToggle);
|
||||
this->listElement->addItem(cpuGovernorToggle);
|
||||
|
||||
tsl::elm::ToggleListItem* gpuGovernorToggle = new tsl::elm::ToggleListItem("GPU Freq Governor",
|
||||
(this->profileList->governorConfig >> SysClkOcGovernorConfig_GPU_Shift) & 1);
|
||||
gpuGovernorToggle->setStateChangedListener([this](bool state) {
|
||||
this->profileList->governorConfig =
|
||||
SysClkOcGovernorConfig((this->profileList->governorConfig & SysClkOcGovernorConfig_CPUOnly) | state << SysClkOcGovernorConfig_GPU_Shift);
|
||||
|
||||
Result rc = sysclkIpcSetProfiles(this->applicationId, this->profileList);
|
||||
if (R_FAILED(rc))
|
||||
FatalGui::openWithResultCode("sysclkIpcSetProfiles", rc);
|
||||
});
|
||||
this->listElement->addItem(gpuGovernorToggle);
|
||||
}
|
||||
|
||||
delete configList;
|
||||
|
||||
@@ -59,7 +59,6 @@ ClockManager::ClockManager()
|
||||
this->oc = new SysClkOcExtra;
|
||||
this->oc->systemCoreBoostCPU = false;
|
||||
this->oc->batteryChargingDisabledOverride = false;
|
||||
this->oc->governor = false;
|
||||
this->oc->realProfile = SysClkProfile_Handheld;
|
||||
|
||||
this->rnxSync = new ReverseNXSync;
|
||||
@@ -150,12 +149,12 @@ void ClockManager::Tick()
|
||||
uint32_t hz = GetHz((SysClkModule)module);
|
||||
this->governor->SetMaxHz(hz, (SysClkModule)module);
|
||||
|
||||
bool handledByGovernor = this->oc->governor && (module != SysClkModule_MEM);
|
||||
if (hz && hz != this->context->freqs[module] && !handledByGovernor)
|
||||
if (hz && hz != this->context->freqs[module] && !this->governor->IsHandledByGovernor((SysClkModule)module))
|
||||
{
|
||||
// Skip setting CPU or GPU clocks in CpuBoostMode if CPU <= boostCPUFreq or GPU >= 76.8MHz
|
||||
bool skipBoost = apmExtIsBoostMode(this->context->perfConfId);
|
||||
skipBoost &= (module == SysClkModule_CPU && hz <= Clocks::boostCpuFreq) || module == SysClkModule_GPU;
|
||||
bool skipBoost = apmExtIsBoostMode(this->context->perfConfId) &&
|
||||
((module == SysClkModule_CPU && hz <= Clocks::boostCpuFreq) || module == SysClkModule_GPU);
|
||||
|
||||
if (!skipBoost) {
|
||||
FileUtils::LogLine("[mgr] %s clock set : %u.%u MHz", Clocks::GetModuleName((SysClkModule)module, true), hz/1000000, hz/100000 - hz/1000000*10);
|
||||
Clocks::SetHz((SysClkModule)module, hz);
|
||||
@@ -174,7 +173,7 @@ void ClockManager::WaitForNextTick()
|
||||
/* Self-check system core (#3) usage via idleticks at intervals (Not enabled at higher CPU freq or without charger) */
|
||||
uint64_t tickWaitTimeMs = this->GetConfig()->GetConfigValue(SysClkConfigValue_PollingIntervalMs);
|
||||
|
||||
if (this->oc->governor) {
|
||||
if (this->governor->IsHandledByGovernor(SysClkModule_CPU)) {
|
||||
svcSleepThread(tickWaitTimeMs * 1000'000ULL);
|
||||
return;
|
||||
}
|
||||
@@ -240,21 +239,14 @@ bool ClockManager::RefreshContext()
|
||||
this->rnxSync->Reset(applicationId);
|
||||
}
|
||||
|
||||
bool governor = this->GetConfig()->GetConfigValue(SysClkConfigValue_GovernorExperimental);
|
||||
governor &= !this->GetConfig()->GetTitleGovernorDisabled(applicationId);
|
||||
if (governor != this->oc->governor)
|
||||
{
|
||||
this->oc->governor = governor;
|
||||
FileUtils::LogLine("[mgr] Governor status: %s", governor ? "enabled" : "disabled");
|
||||
hasChanged = true;
|
||||
}
|
||||
|
||||
if (hasChanged) {
|
||||
if (enabled && governor)
|
||||
this->governor->Start();
|
||||
else
|
||||
this->governor->Stop();
|
||||
SysClkOcGovernorConfig governorConfig = SysClkOcGovernorConfig_AllDisabled;
|
||||
if (this->GetConfig()->GetConfigValue(SysClkConfigValue_GovernorExperimental)) {
|
||||
governorConfig = SysClkOcGovernorConfig_Default;
|
||||
SysClkOcGovernorConfig governorConfigTitle = this->GetConfig()->GetTitleGovernorConfig(applicationId);
|
||||
if (governorConfig != governorConfigTitle)
|
||||
governorConfig = governorConfigTitle;
|
||||
}
|
||||
this->governor->SetConfig(governorConfig);
|
||||
|
||||
SysClkProfile realProfile = Clocks::GetCurrentProfile();
|
||||
if (realProfile != this->oc->realProfile)
|
||||
@@ -301,8 +293,7 @@ bool ClockManager::RefreshContext()
|
||||
if (hz != 0 && hz != this->context->freqs[module])
|
||||
{
|
||||
this->context->freqs[module] = hz;
|
||||
bool handledByGovernor = this->oc->governor && (module != SysClkModule_MEM);
|
||||
if (!handledByGovernor) {
|
||||
if (!this->governor->IsHandledByGovernor((SysClkModule)module)) {
|
||||
FileUtils::LogLine("[mgr] %s clock change: %u.%u MHz", Clocks::GetModuleName((SysClkModule)module, true), hz/1000000, hz/100000 - hz/1000000*10);
|
||||
hasChanged = true;
|
||||
}
|
||||
|
||||
@@ -25,7 +25,7 @@ Config::Config(std::string path)
|
||||
this->loaded = false;
|
||||
this->profileMhzMap = std::map<std::tuple<std::uint64_t, SysClkProfile, SysClkModule>, std::uint32_t>();
|
||||
this->profileCountMap = std::map<std::uint64_t, std::uint8_t>();
|
||||
this->profileGovernorDisabled = std::map<std::uint64_t, bool>();
|
||||
this->profileGovernorMap = std::map<std::uint64_t, SysClkOcGovernorConfig>();
|
||||
this->mtime = 0;
|
||||
this->enabled = false;
|
||||
for(unsigned int i = 0; i < SysClkModule_EnumMax; i++)
|
||||
@@ -79,7 +79,7 @@ void Config::Close()
|
||||
this->loaded = false;
|
||||
this->profileMhzMap.clear();
|
||||
this->profileCountMap.clear();
|
||||
this->profileGovernorDisabled.clear();
|
||||
this->profileGovernorMap.clear();
|
||||
|
||||
for(unsigned int i = 0; i < SysClkConfigValue_EnumMax; i++)
|
||||
{
|
||||
@@ -171,18 +171,18 @@ std::uint32_t Config::GetAutoClockHz(std::uint64_t tid, SysClkModule module, Sys
|
||||
return 0;
|
||||
}
|
||||
|
||||
bool Config::GetTitleGovernorDisabled(std::uint64_t tid)
|
||||
SysClkOcGovernorConfig Config::GetTitleGovernorConfig(std::uint64_t tid)
|
||||
{
|
||||
if (this->loaded)
|
||||
{
|
||||
std::map<uint64_t, bool>::const_iterator it = this->profileGovernorDisabled.find(tid);
|
||||
if (it != this->profileGovernorDisabled.end())
|
||||
std::map<uint64_t, SysClkOcGovernorConfig>::const_iterator it = this->profileGovernorMap.find(tid);
|
||||
if (it != this->profileGovernorMap.end())
|
||||
{
|
||||
return it->second;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
return SysClkOcGovernorConfig_Default;
|
||||
}
|
||||
|
||||
void Config::GetProfiles(std::uint64_t tid, SysClkTitleProfileList* out_profiles)
|
||||
@@ -197,11 +197,12 @@ void Config::GetProfiles(std::uint64_t tid, SysClkTitleProfileList* out_profiles
|
||||
}
|
||||
}
|
||||
|
||||
std::map<uint64_t, bool>::const_iterator it = this->profileGovernorDisabled.find(tid);
|
||||
bool governorDisabled = false;
|
||||
if (it != this->profileGovernorDisabled.end() && it->second)
|
||||
governorDisabled = true;
|
||||
out_profiles->governorDisabled = governorDisabled;
|
||||
std::map<uint64_t, SysClkOcGovernorConfig>::const_iterator it = this->profileGovernorMap.find(tid);
|
||||
SysClkOcGovernorConfig governor = SysClkOcGovernorConfig_Default;
|
||||
// Found
|
||||
if (it != this->profileGovernorMap.end())
|
||||
governor = it->second;
|
||||
out_profiles->governorConfig = governor;
|
||||
}
|
||||
|
||||
bool Config::SetProfiles(std::uint64_t tid, SysClkTitleProfileList* profiles, bool immediate)
|
||||
@@ -254,9 +255,9 @@ bool Config::SetProfiles(std::uint64_t tid, SysClkTitleProfileList* profiles, bo
|
||||
}
|
||||
}
|
||||
|
||||
if (profiles->governorDisabled) {
|
||||
snprintf(sk, 0x40, "%s", CONFIG_KEY_TITLE_GOVERNOR_DISABLED);
|
||||
snprintf(sv, 0x10, "%d", profiles->governorDisabled);
|
||||
if (profiles->governorConfig != SysClkOcGovernorConfig_Default) {
|
||||
snprintf(sk, 0x40, "%s", CONFIG_KEY_TITLE_GOVERNOR_CONFIG);
|
||||
snprintf(sv, 0x10, "%d", profiles->governorConfig);
|
||||
*ik++ = sk;
|
||||
*iv++ = sv;
|
||||
}
|
||||
@@ -290,10 +291,10 @@ bool Config::SetProfiles(std::uint64_t tid, SysClkTitleProfileList* profiles, bo
|
||||
}
|
||||
}
|
||||
|
||||
if (profiles->governorDisabled)
|
||||
this->profileGovernorDisabled[tid] = profiles->governorDisabled;
|
||||
if (profiles->governorConfig == SysClkOcGovernorConfig_Default)
|
||||
this->profileGovernorMap.erase(tid);
|
||||
else
|
||||
this->profileGovernorDisabled.erase(tid);
|
||||
this->profileGovernorMap[tid] = profiles->governorConfig;
|
||||
}
|
||||
|
||||
return true;
|
||||
@@ -343,13 +344,13 @@ int Config::BrowseIniFunc(const char* section, const char* key, const char* valu
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (!strcmp(key, CONFIG_KEY_TITLE_GOVERNOR_DISABLED)) {
|
||||
if (!strcmp(key, CONFIG_KEY_TITLE_GOVERNOR_CONFIG)) {
|
||||
input = strtoul(value, NULL, 0);
|
||||
if ((input & 0x1) != input) {
|
||||
input = 0;
|
||||
if ((input & SysClkOcGovernorConfig_Mask) != input) {
|
||||
input = SysClkOcGovernorConfig_Default;
|
||||
FileUtils::LogLine("[cfg] Invalid value for key '%s' in section '%s': using default %d", key, section, input);
|
||||
}
|
||||
config->profileGovernorDisabled[tid] = (bool)input;
|
||||
config->profileGovernorMap[tid] = (SysClkOcGovernorConfig)input;
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
||||
@@ -21,7 +21,7 @@
|
||||
|
||||
#define CONFIG_VAL_SECTION "values"
|
||||
|
||||
#define CONFIG_KEY_TITLE_GOVERNOR_DISABLED "governor_disabled"
|
||||
#define CONFIG_KEY_TITLE_GOVERNOR_CONFIG "governor_config"
|
||||
|
||||
class Config
|
||||
{
|
||||
@@ -39,7 +39,7 @@ class Config
|
||||
void GetProfiles(std::uint64_t tid, SysClkTitleProfileList* out_profiles);
|
||||
bool SetProfiles(std::uint64_t tid, SysClkTitleProfileList* profiles, bool immediate);
|
||||
std::uint32_t GetAutoClockHz(std::uint64_t tid, SysClkModule module, SysClkProfile profile);
|
||||
bool GetTitleGovernorDisabled(std::uint64_t tid);
|
||||
SysClkOcGovernorConfig GetTitleGovernorConfig(std::uint64_t tid);
|
||||
|
||||
void SetEnabled(bool enabled);
|
||||
bool Enabled();
|
||||
@@ -61,7 +61,7 @@ class Config
|
||||
|
||||
std::map<std::tuple<std::uint64_t, SysClkProfile, SysClkModule>, std::uint32_t> profileMhzMap;
|
||||
std::map<std::uint64_t, std::uint8_t> profileCountMap;
|
||||
std::map<std::uint64_t, bool> profileGovernorDisabled;
|
||||
std::map<std::uint64_t, SysClkOcGovernorConfig> profileGovernorMap;
|
||||
bool loaded;
|
||||
std::string path;
|
||||
time_t mtime;
|
||||
|
||||
@@ -90,6 +90,7 @@ ReverseNXMode ReverseNXSync::RecheckToolMode() {
|
||||
if (this->m_tool_enabled) {
|
||||
const char* fileName = "_ZN2nn2oe16GetOperationModeEv.asm64"; // or _ZN2nn2oe18GetPerformanceModeEv.asm64
|
||||
const char* filePath = new char[72];
|
||||
SCOPE_EXIT { delete[] filePath; };
|
||||
/* Check per-game patch */
|
||||
snprintf((char*)filePath, 72, "/SaltySD/patches/%016lX/%s", this->m_app_id, fileName);
|
||||
mode = this->GetToolModeFromPatch(filePath);
|
||||
@@ -98,7 +99,6 @@ ReverseNXMode ReverseNXSync::RecheckToolMode() {
|
||||
snprintf((char*)filePath, 72, "/SaltySD/patches/%s", fileName);
|
||||
mode = this->GetToolModeFromPatch(filePath);
|
||||
}
|
||||
delete[] filePath;
|
||||
}
|
||||
|
||||
return mode;
|
||||
@@ -135,157 +135,94 @@ void PsmExt::ChargingHandler(ClockManager* instance) {
|
||||
delete info;
|
||||
}
|
||||
|
||||
|
||||
Governor::Governor() {
|
||||
memset(reinterpret_cast<void*>(&m_cpu_freq), 0, sizeof(m_cpu_freq));
|
||||
memset(reinterpret_cast<void*>(&m_gpu_freq), 0, sizeof(m_gpu_freq));
|
||||
|
||||
m_cpu_freq.module = SysClkModule_CPU;
|
||||
m_gpu_freq.module = SysClkModule_GPU;
|
||||
|
||||
m_cpu_freq.hz_list = GetTable(SysClkModule_CPU);
|
||||
m_gpu_freq.hz_list = GetTable(SysClkModule_GPU);
|
||||
|
||||
m_cpu_freq.boost_hz = Clocks::boostCpuFreq;
|
||||
m_cpu_freq.utilref_hz = 2397'000'000;
|
||||
|
||||
m_gpu_freq.boost_hz = 76'800'000;
|
||||
m_gpu_freq.min_hz = 153'600'000;
|
||||
m_gpu_freq.utilref_hz = 1305'600'000;
|
||||
|
||||
nvInitialize();
|
||||
Result rc = nvOpen(&m_nvgpu_field, "/dev/nvhost-ctrl-gpu");
|
||||
if (R_FAILED(rc)) {
|
||||
ASSERT_RESULT_OK(rc, "nvOpen");
|
||||
nvExit();
|
||||
}
|
||||
}
|
||||
|
||||
Governor::~Governor() {
|
||||
Stop();
|
||||
nvClose(m_nvgpu_field);
|
||||
nvExit();
|
||||
}
|
||||
|
||||
void Governor::Start() {
|
||||
if (m_running)
|
||||
return;
|
||||
|
||||
m_running = true;
|
||||
Result rc = 0;
|
||||
for (int core = 0; core < CORE_NUMS; core++) {
|
||||
s_CoreContext* s = &m_cpu_core_ctx[core];
|
||||
s->self = this;
|
||||
s->id = core;
|
||||
int prio = (core == CORE_NUMS - 1) ? 0x3F : 0x3B; // Pre-emptive MT
|
||||
rc = threadCreate(&m_t_cpuworker[core], &CpuUtilWorker, (void*)s, NULL, 0x400, prio, core);
|
||||
ASSERT_RESULT_OK(rc, "threadCreate");
|
||||
rc = threadStart(&m_t_cpuworker[core]);
|
||||
ASSERT_RESULT_OK(rc, "threadStart");
|
||||
}
|
||||
rc = threadCreate(&m_t_main, &Main, (void*)this, NULL, 0x400, 0x3F, 3);
|
||||
ASSERT_RESULT_OK(rc, "threadCreate");
|
||||
rc = threadStart(&m_t_main);
|
||||
ASSERT_RESULT_OK(rc, "threadStart");
|
||||
}
|
||||
|
||||
void Governor::Stop() {
|
||||
if (!m_running)
|
||||
return;
|
||||
|
||||
m_running = false;
|
||||
svcSleepThread(TICK_TIME_NS);
|
||||
|
||||
threadWaitForExit(&m_t_main);
|
||||
threadClose(&m_t_main);
|
||||
|
||||
for (int core = 0; core < CORE_NUMS; core++) {
|
||||
threadWaitForExit(&m_t_cpuworker[core]);
|
||||
threadClose(&m_t_cpuworker[core]);
|
||||
}
|
||||
}
|
||||
|
||||
void Governor::SetMaxHz(uint32_t max_hz, SysClkModule module) {
|
||||
if (!max_hz) // Fallback to apm configuration
|
||||
max_hz = Clocks::GetStockClock(m_apm_conf, (SysClkModule)module);
|
||||
|
||||
switch (module) {
|
||||
case SysClkModule_CPU:
|
||||
m_cpu_freq.max_hz = max_hz;
|
||||
break;
|
||||
case SysClkModule_GPU:
|
||||
m_gpu_freq.max_hz = max_hz;
|
||||
m_gpu_freq.min_hz = (m_gpu_freq.max_hz <= 153'600'000) ? max_hz : 153'600'000;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void Governor::SetPerfConf(uint32_t id) {
|
||||
m_perf_conf_id = id;
|
||||
m_apm_conf = Clocks::GetEmbeddedApmConfig(id);
|
||||
}
|
||||
|
||||
uint32_t Governor::s_FreqContext::GetNormalizedUtil(uint32_t raw_util) {
|
||||
return ((uint64_t)raw_util * target_hz / utilref_hz);
|
||||
}
|
||||
namespace GovernorImpl {
|
||||
|
||||
// Schedutil: https://github.com/torvalds/linux/blob/master/kernel/sched/cpufreq_schedutil.c
|
||||
// C = 1.25, tipping-point 80.0% (used in Linux schedutil), 1.25 -> 1 + (1 >> 2)
|
||||
// C = 1.5, tipping-point 66.7%, 1.5 -> 1 + (1 >> 1)
|
||||
// Utilization is frequency-invariant (normalized):
|
||||
// next_freq = C * max_freq(ref_freq) * util / max
|
||||
void Governor::s_FreqContext::SetNextFreq(uint32_t norm_util) {
|
||||
uint32_t prev_hz = target_hz;
|
||||
|
||||
auto FindHzInTable = [](uint32_t* hz_list, uint32_t in_hz) {
|
||||
uint32_t* p = hz_list;
|
||||
// target_freq = C * max_freq(ref_freq) * util / max
|
||||
void BaseGovernor::ApplyNewFreqFromNormUtil(uint32_t normUtil) {
|
||||
auto FindHzInTable = [](uint32_t* list, uint32_t hz) -> uint32_t {
|
||||
uint32_t* p = list;
|
||||
for (; *p != 0; p++) {
|
||||
if (in_hz <= *p)
|
||||
return p;
|
||||
if (hz <= *p)
|
||||
return *p;
|
||||
}
|
||||
return (--p);
|
||||
return *(--p);
|
||||
};
|
||||
|
||||
uint32_t next_freq = utilref_hz / UTIL_MAX * norm_util;
|
||||
uint32_t next_freq = m_ref_hz / UTIL_MAX * normUtil;
|
||||
next_freq += next_freq >> 1;
|
||||
|
||||
uint32_t new_hz;
|
||||
if (next_freq >= max_hz)
|
||||
target_hz = max_hz;
|
||||
new_hz = max_hz;
|
||||
else if (next_freq <= min_hz)
|
||||
target_hz = min_hz;
|
||||
new_hz = min_hz;
|
||||
else
|
||||
target_hz = *FindHzInTable(hz_list, next_freq);
|
||||
new_hz = FindHzInTable(m_hz_list, next_freq);
|
||||
|
||||
bool changed = target_hz != prev_hz;
|
||||
if (changed)
|
||||
SetHz();
|
||||
ApplyTargetFreq(new_hz);
|
||||
}
|
||||
|
||||
void Governor::s_FreqContext::SetHz() {
|
||||
if (target_hz)
|
||||
Clocks::SetHz(module, target_hz);
|
||||
void CpuGovernor::GovernorWorker::Start() {
|
||||
if (this->running)
|
||||
return;
|
||||
|
||||
this->running = true;
|
||||
Result rc = 0;
|
||||
for (int id = 0; id < CORE_NUMS; id++) {
|
||||
WorkerContext* s = &contexts[id];
|
||||
s->super = this->super;
|
||||
s->id = id;
|
||||
int prio = (id == CORE_NUMS - 1) ? 0x3F : 0x3B; // Pre-emptive MT
|
||||
rc = threadCreate(&threads[id], &WorkerContext::Loop, (void*)s, NULL, 0x400, prio, id);
|
||||
ASSERT_RESULT_OK(rc, "threadCreate");
|
||||
rc = threadStart(&threads[id]);
|
||||
ASSERT_RESULT_OK(rc, "threadStart");
|
||||
}
|
||||
}
|
||||
|
||||
void Governor::s_FreqContext::Boost() {
|
||||
target_hz = boost_hz;
|
||||
if (module == SysClkModule_CPU && max_hz > boost_hz)
|
||||
target_hz = max_hz;
|
||||
SetHz();
|
||||
void CpuGovernor::GovernorWorker::Stop() {
|
||||
if (!this->running)
|
||||
return;
|
||||
|
||||
this->running = false;
|
||||
svcSleepThread(TICK_TIME_NS);
|
||||
|
||||
for (auto &t : threads) {
|
||||
threadWaitForExit(&t);
|
||||
threadClose(&t);
|
||||
}
|
||||
}
|
||||
|
||||
void Governor::CpuUtilWorker(void* args) {
|
||||
s_CoreContext* s = static_cast<s_CoreContext*>(args);
|
||||
void CpuGovernor::Apply() {
|
||||
uint32_t util = 0;
|
||||
for (auto& ctx : this->m_worker.contexts) {
|
||||
uint32_t core_util = ctx.util;
|
||||
if (util < core_util)
|
||||
util = core_util;
|
||||
}
|
||||
|
||||
this->m_util.Update(util);
|
||||
if (this->auto_boost && this->m_worker.contexts[SYS_CORE_ID].util > BOOST_THRESHOLD)
|
||||
this->ApplyBoost();
|
||||
else
|
||||
this->ApplyNewFreqFromNormUtil(this->m_util.Get());
|
||||
}
|
||||
|
||||
void CpuGovernor::WorkerContext::Loop(void* args) {
|
||||
WorkerContext* s = static_cast<WorkerContext*>(args);
|
||||
CpuGovernor* self = s->super;
|
||||
GovernorWorker* worker = &(self->m_worker);
|
||||
int coreid = s->id;
|
||||
Governor* self = s->self;
|
||||
|
||||
while (self->m_running) {
|
||||
while (worker->running) {
|
||||
uint64_t tick = s->tick = armGetSystemTick();
|
||||
s->util = self->m_cpu_freq.GetNormalizedUtil(CpuCoreUtil(coreid, TICK_TIME_NS).Get());
|
||||
s->util = self->CalcNormalizedUtil(CpuCoreUtil(coreid, TICK_TIME_NS).Get());
|
||||
|
||||
bool CPUBoosted = apmExtIsCPUBoosted(self->m_perf_conf_id);
|
||||
if (CPUBoosted) {
|
||||
if (apmExtIsCPUBoosted(self->m_manager->GetPerfConf())) {
|
||||
svcSleepThread(TICK_TIME_NS);
|
||||
continue;
|
||||
}
|
||||
@@ -295,88 +232,128 @@ void Governor::CpuUtilWorker(void* args) {
|
||||
if (id == coreid)
|
||||
continue;
|
||||
|
||||
uint64_t diff = std::abs((int64_t)self->m_cpu_core_ctx[id].tick - (int64_t)tick);
|
||||
uint64_t diff = std::abs((int64_t)worker->contexts[id].tick - (int64_t)tick);
|
||||
if (diff < SYSTICK_HZ / SAMPLE_RATE * 10)
|
||||
continue;
|
||||
|
||||
if (id == SYS_CORE_ID && self->m_syscore_autoboost) {
|
||||
self->m_cpu_freq.Boost();
|
||||
// Stuck on system core and auto boost enabled, apply boost
|
||||
if (id == SYS_CORE_ID && self->auto_boost) {
|
||||
self->ApplyBoost();
|
||||
break;
|
||||
}
|
||||
|
||||
self->m_cpu_freq.target_hz = self->m_cpu_freq.max_hz;
|
||||
self->m_cpu_freq.SetHz();
|
||||
// Stuck on other cores or auto boost disabled, apply max hz
|
||||
self->ApplyTargetFreq(self->max_hz);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void Governor::Main(void* args) {
|
||||
void GpuGovernor::Apply() {
|
||||
uint32_t util = this->CalcNormalizedUtil(GpuCoreUtil(m_nvgpu_field).Get());
|
||||
this->m_util.Update(util);
|
||||
this->ApplyNewFreqFromNormUtil(this->m_util.Get());
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
bool Governor::IsHandledByGovernor(SysClkModule module) {
|
||||
switch (module) {
|
||||
case SysClkModule_CPU:
|
||||
return ((this->GetConfig() >> SysClkOcGovernorConfig_CPU_Shift) & 1);
|
||||
case SysClkModule_GPU:
|
||||
return ((this->GetConfig() >> SysClkOcGovernorConfig_GPU_Shift) & 1);
|
||||
case SysClkModule_MEM:
|
||||
return false;
|
||||
default:
|
||||
return this->GetConfig() != SysClkOcGovernorConfig_AllDisabled;
|
||||
}
|
||||
}
|
||||
|
||||
void Governor::SetConfig(SysClkOcGovernorConfig config) {
|
||||
if (m_config == config)
|
||||
return;
|
||||
|
||||
m_config = config;
|
||||
m_cpu_gov->m_worker.onConfigUpdated(config);
|
||||
m_manager.onConfigUpdated(config);
|
||||
};
|
||||
|
||||
void Governor::SetPerfConf(uint32_t id) {
|
||||
m_perf_conf_id = id;
|
||||
m_apm_conf = Clocks::GetEmbeddedApmConfig(id);
|
||||
}
|
||||
|
||||
void Governor::SetMaxHz(uint32_t maxHz, SysClkModule module) {
|
||||
if (!maxHz) // Fallback to apm configuration
|
||||
maxHz = Clocks::GetStockClock(m_apm_conf, (SysClkModule)module);
|
||||
|
||||
switch (module) {
|
||||
case SysClkModule_CPU:
|
||||
m_cpu_gov->max_hz = maxHz;
|
||||
break;
|
||||
case SysClkModule_GPU:
|
||||
m_gpu_gov->max_hz = maxHz;
|
||||
m_gpu_gov->min_hz = (maxHz <= 153'600'000) ? maxHz : 153'600'000;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void Governor::GovernorManager::Start() {
|
||||
if (this->running)
|
||||
return;
|
||||
|
||||
this->running = true;
|
||||
Result rc = threadCreate(&thread, &ContextManager, (void*)this, NULL, 0x400, 0x3F, 3);
|
||||
ASSERT_RESULT_OK(rc, "threadCreate");
|
||||
rc = threadStart(&thread);
|
||||
ASSERT_RESULT_OK(rc, "threadStart");
|
||||
}
|
||||
|
||||
void Governor::GovernorManager::Stop() {
|
||||
if (!this->running)
|
||||
return;
|
||||
|
||||
this->running = false;
|
||||
svcSleepThread(TICK_TIME_NS);
|
||||
threadWaitForExit(&thread);
|
||||
threadClose(&thread);
|
||||
}
|
||||
|
||||
void Governor::GovernorManager::ContextManager(void* args) {
|
||||
Governor* self = static_cast<Governor*>(args);
|
||||
s_FreqContext* cpu_ctx = &self->m_cpu_freq;
|
||||
s_FreqContext* gpu_ctx = &self->m_gpu_freq;
|
||||
uint32_t nvgpu_field = self->m_nvgpu_field;
|
||||
|
||||
s_CpuUtil *cpu_util = new s_CpuUtil;
|
||||
s_GpuUtil *gpu_util = new s_GpuUtil;
|
||||
auto SetCpuFreq = [self, cpu_ctx, cpu_util]() mutable {
|
||||
uint32_t util = self->m_cpu_core_ctx[0].util;
|
||||
for (size_t i = 1; i < CORE_NUMS; i++) {
|
||||
if (util < self->m_cpu_core_ctx[i].util)
|
||||
util = self->m_cpu_core_ctx[i].util;
|
||||
}
|
||||
cpu_util->Update(util);
|
||||
if (self->m_cpu_core_ctx[SYS_CORE_ID].util > BOOST_THRESHOLD && self->m_syscore_autoboost)
|
||||
cpu_ctx->Boost();
|
||||
else
|
||||
cpu_ctx->SetNextFreq(cpu_util->Get());
|
||||
};
|
||||
|
||||
auto SetGpuFreq = [gpu_ctx, nvgpu_field, gpu_util]() mutable {
|
||||
uint32_t util = gpu_ctx->GetNormalizedUtil(GpuCoreUtil(nvgpu_field).Get());
|
||||
gpu_util->Update(util);
|
||||
util = gpu_util->Get();
|
||||
gpu_ctx->SetNextFreq(util);
|
||||
};
|
||||
|
||||
constexpr uint64_t UPDATE_CONTEXT_RATE = SAMPLE_RATE / 2;
|
||||
uint64_t update_ticks = UPDATE_CONTEXT_RATE;
|
||||
bool CPUBoosted = false;
|
||||
bool GPUThrottled = false;
|
||||
bool cpuBoosted = false, gpuThrottled = false;
|
||||
|
||||
while (self->m_running) {
|
||||
while (self->m_manager.running) {
|
||||
bool shouldUpdateContext = ++update_ticks >= UPDATE_CONTEXT_RATE;
|
||||
if (shouldUpdateContext) {
|
||||
update_ticks = 0;
|
||||
uint32_t hz = Clocks::GetCurrentHz(SysClkModule_GPU);
|
||||
|
||||
uint32_t hz = self->m_gpu_gov->RefreshContext();
|
||||
// Sleep mode detected, wait 10 ticks
|
||||
while (!hz) {
|
||||
svcSleepThread(10 * TICK_TIME_NS);
|
||||
hz = Clocks::GetCurrentHz(SysClkModule_GPU);
|
||||
hz = self->m_gpu_gov->RefreshContext();
|
||||
}
|
||||
|
||||
GPUThrottled = apmExtIsBoostMode(self->m_perf_conf_id);
|
||||
CPUBoosted = apmExtIsCPUBoosted(self->m_perf_conf_id);
|
||||
uint32_t perf_conf = self->GetPerfConf();
|
||||
if ((gpuThrottled = apmExtIsBoostMode(perf_conf)) && (self->GetConfig() & SysClkOcGovernorConfig_GPU))
|
||||
self->m_gpu_gov->ApplyBoost();
|
||||
|
||||
gpu_ctx->target_hz = hz;
|
||||
if (GPUThrottled)
|
||||
gpu_ctx->Boost();
|
||||
|
||||
hz = Clocks::GetCurrentHz(SysClkModule_CPU);
|
||||
cpu_ctx->target_hz = hz;
|
||||
if (CPUBoosted)
|
||||
cpu_ctx->Boost();
|
||||
if ((cpuBoosted = apmExtIsCPUBoosted(perf_conf)) && (self->GetConfig() & SysClkOcGovernorConfig_CPU))
|
||||
self->m_cpu_gov->ApplyBoost();
|
||||
}
|
||||
|
||||
if (!GPUThrottled)
|
||||
SetGpuFreq();
|
||||
if (!CPUBoosted)
|
||||
SetCpuFreq();
|
||||
if (!gpuThrottled && (self->GetConfig() & SysClkOcGovernorConfig_GPU))
|
||||
self->m_gpu_gov->Apply();
|
||||
if (!cpuBoosted && (self->GetConfig() & SysClkOcGovernorConfig_CPU))
|
||||
self->m_cpu_gov->Apply();
|
||||
|
||||
svcSleepThread(TICK_TIME_NS);
|
||||
}
|
||||
|
||||
delete cpu_util;
|
||||
delete gpu_util;
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
@@ -12,8 +12,10 @@
|
||||
|
||||
// Forward declaration
|
||||
class ClockManager;
|
||||
class Governor;
|
||||
#include "clock_manager.h"
|
||||
|
||||
|
||||
class CpuCoreUtil {
|
||||
public:
|
||||
CpuCoreUtil (int coreid, uint64_t ns);
|
||||
@@ -28,6 +30,7 @@ protected:
|
||||
uint64_t GetIdleTickCount();
|
||||
};
|
||||
|
||||
|
||||
class GpuCoreUtil {
|
||||
public:
|
||||
GpuCoreUtil (uint32_t nvgpu_field);
|
||||
@@ -38,6 +41,7 @@ protected:
|
||||
static constexpr uint64_t NVGPU_GPU_IOCTL_PMU_GET_GPU_LOAD = 0x80044715;
|
||||
};
|
||||
|
||||
|
||||
class ReverseNXSync {
|
||||
public:
|
||||
ReverseNXSync ();
|
||||
@@ -61,182 +65,301 @@ protected:
|
||||
ReverseNXMode RecheckToolMode();
|
||||
};
|
||||
|
||||
|
||||
namespace PsmExt {
|
||||
void ChargingHandler(ClockManager* instance);
|
||||
}
|
||||
|
||||
class Governor {
|
||||
public:
|
||||
Governor();
|
||||
~Governor();
|
||||
|
||||
void Start();
|
||||
void Stop();
|
||||
void SetMaxHz(uint32_t max_hz, SysClkModule module);
|
||||
void SetAutoCPUBoost(bool enabled) { m_syscore_autoboost = enabled; };
|
||||
void SetCPUBoostHz(uint32_t boost_hz) { m_cpu_freq.boost_hz = boost_hz; };
|
||||
void SetPerfConf(uint32_t id);
|
||||
constexpr uint64_t SAMPLE_RATE = 200;
|
||||
constexpr uint64_t TICK_TIME_NS = 1000'000'000 / SAMPLE_RATE;
|
||||
constexpr uint64_t SYSTICK_HZ = 19200000;
|
||||
|
||||
protected:
|
||||
// Parameters for sampling
|
||||
static constexpr uint64_t SAMPLE_RATE = 200;
|
||||
static constexpr uint64_t TICK_TIME_NS = 1000'000'000 / SAMPLE_RATE;
|
||||
static constexpr uint64_t SYSTICK_HZ = 19200000;
|
||||
namespace GovernorImpl {
|
||||
constexpr uint32_t UTIL_MAX = 1000;
|
||||
|
||||
static constexpr int CORE_NUMS = 4;
|
||||
static constexpr int SYS_CORE_ID = (CORE_NUMS - 1);
|
||||
|
||||
bool m_running = false;
|
||||
bool m_syscore_autoboost = false;
|
||||
Thread m_t_cpuworker[CORE_NUMS], m_t_main;
|
||||
|
||||
uint32_t m_nvgpu_field;
|
||||
uint32_t m_perf_conf_id;
|
||||
SysClkApmConfiguration *m_apm_conf;
|
||||
|
||||
typedef struct {
|
||||
SysClkModule module;
|
||||
uint32_t* hz_list;
|
||||
uint32_t target_hz;
|
||||
uint32_t min_hz;
|
||||
uint32_t max_hz;
|
||||
uint32_t boost_hz;
|
||||
uint32_t utilref_hz;
|
||||
|
||||
uint32_t GetNormalizedUtil(uint32_t raw_util);
|
||||
void SetNextFreq(uint32_t norm_util);
|
||||
void SetHz();
|
||||
void Boost();
|
||||
} s_FreqContext;
|
||||
s_FreqContext m_cpu_freq, m_gpu_freq;
|
||||
|
||||
typedef struct {
|
||||
Governor* self;
|
||||
int id;
|
||||
uint32_t util;
|
||||
uint64_t tick;
|
||||
} s_CoreContext;
|
||||
s_CoreContext m_cpu_core_ctx[CORE_NUMS];
|
||||
|
||||
// PELT: https://github.com/torvalds/linux/blob/master/kernel/sched/pelt.c
|
||||
// Util_acc_n = Util_0 + Util_1 * D + Util_2 * D^2 + ... + Util_n * D^n
|
||||
// To approximate D (decay multiplier):
|
||||
// After 50 ms (if SAMPLE_RATE == 200, 10 samples)
|
||||
// UTIL_MAX * D^10 ≈ 1 (UTIL_MAX decayed to 1)
|
||||
// D = 4129 / 8192
|
||||
// Util_acc_max = Util_acc_inf = 2012
|
||||
static constexpr uint32_t UTIL_MAX = 100'0;
|
||||
struct s_CpuUtil {
|
||||
uint32_t util_acc = 0;
|
||||
|
||||
static constexpr uint32_t DECAY_DIVIDENT = 4129;
|
||||
static constexpr uint32_t DECAY_DIVISOR = 8192;
|
||||
static constexpr uint32_t UTIL_ACC_MAX = 2012;
|
||||
|
||||
uint32_t Get() { return (util_acc * UTIL_MAX / UTIL_ACC_MAX); };
|
||||
void Update(uint32_t util) { util_acc = util_acc * DECAY_DIVIDENT / DECAY_DIVISOR + util; };
|
||||
};
|
||||
|
||||
static void CpuUtilWorker(void* args);
|
||||
static void Main(void* args);
|
||||
|
||||
// Get max value from a sliding window in O(1)
|
||||
template <typename T, size_t WINDOW_SIZE>
|
||||
class SWindowMax {
|
||||
protected:
|
||||
typedef struct {
|
||||
T item;
|
||||
T max;
|
||||
} s_Entry;
|
||||
|
||||
struct s_Stack {
|
||||
s_Entry m_stack[WINDOW_SIZE] = {};
|
||||
size_t m_next = WINDOW_SIZE;
|
||||
|
||||
bool empty() { return m_next == 0; };
|
||||
s_Entry top() { return m_stack[m_next-1]; };
|
||||
s_Entry pop() { return m_stack[--m_next]; };
|
||||
void push(s_Entry item) {
|
||||
if (m_next == WINDOW_SIZE)
|
||||
return;
|
||||
m_stack[m_next++] = item;
|
||||
};
|
||||
class BaseGovernor {
|
||||
public:
|
||||
BaseGovernor(SysClkModule module) : m_module(module) {
|
||||
m_hz_list = GetModuleFreqTable(module);
|
||||
m_ref_hz = GetModuleMaximumFreq(module);
|
||||
};
|
||||
|
||||
s_Stack enqStack;
|
||||
s_Stack deqStack;
|
||||
uint32_t RefreshContext() { return this->m_target_hz = Clocks::GetCurrentHz(this->m_module); };
|
||||
|
||||
void Push(s_Stack& stack, T item) {
|
||||
s_Entry n = {
|
||||
.item = item,
|
||||
.max = enqStack.empty() ? item : std::max(item, enqStack.top().max)
|
||||
};
|
||||
stack.push(n);
|
||||
}
|
||||
|
||||
T Pop() {
|
||||
if (deqStack.empty()) {
|
||||
while (!enqStack.empty())
|
||||
Push(deqStack, enqStack.pop().max);
|
||||
}
|
||||
return deqStack.pop().item;
|
||||
}
|
||||
|
||||
public:
|
||||
SWindowMax() {}
|
||||
|
||||
void Add(T item) { Pop(); Push(enqStack, item); }
|
||||
|
||||
T Get() {
|
||||
if (!enqStack.empty()) {
|
||||
T enqMax = enqStack.top().max;
|
||||
if (!deqStack.empty()) {
|
||||
T deqMax = deqStack.top().max;
|
||||
return std::max(deqMax, enqMax);
|
||||
}
|
||||
return enqMax;
|
||||
}
|
||||
if (!deqStack.empty())
|
||||
return deqStack.top().max;
|
||||
return 0;
|
||||
}
|
||||
};
|
||||
|
||||
// Get average value from a sliding window in O(1)
|
||||
template <typename T, size_t WINDOW_SIZE>
|
||||
class SWindowAvg {
|
||||
public:
|
||||
SWindowAvg() {}
|
||||
|
||||
void Add(T item) {
|
||||
T pop = m_queue[m_next];
|
||||
m_queue[m_next] = item;
|
||||
m_next = (m_next + 1) % WINDOW_SIZE;
|
||||
m_sum -= pop;
|
||||
m_sum += item;
|
||||
}
|
||||
|
||||
T Get() { return m_sum / WINDOW_SIZE; }
|
||||
uint32_t min_hz, max_hz, boost_hz;
|
||||
|
||||
protected:
|
||||
size_t m_next = 0;
|
||||
T m_sum = 0;
|
||||
T m_queue[WINDOW_SIZE] = {};
|
||||
uint32_t CalcNormalizedUtil(uint32_t rawUtil) {
|
||||
return ((uint64_t)rawUtil * m_target_hz / m_ref_hz);
|
||||
};
|
||||
|
||||
void ApplyNewFreqFromNormUtil(uint32_t norm);
|
||||
|
||||
void ApplyTargetFreq(uint32_t hz) {
|
||||
if (!hz || m_target_hz == hz)
|
||||
return;
|
||||
|
||||
m_target_hz = hz;
|
||||
Clocks::SetHz(m_module, hz);
|
||||
};
|
||||
|
||||
void ApplyBoost() {
|
||||
ApplyTargetFreq(
|
||||
(m_module == SysClkModule_CPU && max_hz > boost_hz) ? max_hz : boost_hz
|
||||
);
|
||||
};
|
||||
|
||||
SysClkModule m_module;
|
||||
uint32_t* m_hz_list;
|
||||
uint32_t m_target_hz, m_ref_hz;
|
||||
|
||||
friend Governor;
|
||||
};
|
||||
|
||||
struct s_GpuUtil {
|
||||
SWindowMax<uint32_t, 32> window {};
|
||||
class CpuGovernor : public BaseGovernor {
|
||||
public:
|
||||
CpuGovernor(Governor* manager)
|
||||
: BaseGovernor(SysClkModule_CPU), m_manager(manager) {
|
||||
boost_hz = Clocks::boostCpuFreq;
|
||||
m_worker.super = this;
|
||||
};
|
||||
|
||||
uint32_t util_acc = 0;
|
||||
// After 160 ms (if SAMPLE_RATE == 200, 32 samples)
|
||||
// UTIL_MAX * D^32 ≈ 1 (UTIL_MAX decayed to 1)
|
||||
// D = 6880 / 8192
|
||||
// Util_acc_max = Util_acc_inf = 6145
|
||||
static constexpr uint32_t DECAY_DIVIDENT = 6880;
|
||||
static constexpr uint32_t DECAY_DIVISOR = 8192;
|
||||
static constexpr uint32_t UTIL_ACC_MAX = 6145;
|
||||
~CpuGovernor() { this->m_worker.Stop(); };
|
||||
|
||||
uint32_t Get() { return ((util_acc * UTIL_MAX / UTIL_ACC_MAX) + window.Get()) / 2; };
|
||||
void Update(uint32_t util) { window.Add(util); util_acc = util_acc * DECAY_DIVIDENT / DECAY_DIVISOR + util; };
|
||||
void Apply();
|
||||
|
||||
bool auto_boost;
|
||||
|
||||
protected:
|
||||
static constexpr int CORE_NUMS = 4;
|
||||
static constexpr int SYS_CORE_ID = CORE_NUMS - 1;
|
||||
|
||||
// PELT: https://github.com/torvalds/linux/blob/master/kernel/sched/pelt.c
|
||||
// Util_acc_n = Util_0 + Util_1 * D + Util_2 * D^2 + ... + Util_n * D^n
|
||||
// To approximate D (decay multiplier):
|
||||
// After 50 ms (if SAMPLE_RATE == 200, 10 samples)
|
||||
// UTIL_MAX * D^10 ≈ 1 (UTIL_MAX decayed to 1)
|
||||
// D = 4129 / 8192
|
||||
// Util_acc_max = Util_acc_inf = 2012
|
||||
typedef struct PeltUtil {
|
||||
uint32_t util_acc = 0;
|
||||
|
||||
static constexpr uint32_t DECAY_DIVIDENT = 4129;
|
||||
static constexpr uint32_t DECAY_DIVISOR = 8192;
|
||||
static constexpr uint32_t UTIL_ACC_MAX = 2012;
|
||||
|
||||
uint32_t Get() { return (util_acc * UTIL_MAX / UTIL_ACC_MAX); };
|
||||
void Update(uint32_t util) { util_acc = util_acc * DECAY_DIVIDENT / DECAY_DIVISOR + util; };
|
||||
} PeltUtil;
|
||||
PeltUtil m_util;
|
||||
|
||||
typedef struct {
|
||||
CpuGovernor*super;
|
||||
int id;
|
||||
uint32_t util;
|
||||
uint64_t tick;
|
||||
|
||||
static void Loop(void* args);
|
||||
} WorkerContext;
|
||||
|
||||
typedef struct GovernorWorker {
|
||||
Thread threads[CORE_NUMS];
|
||||
WorkerContext contexts[CORE_NUMS];
|
||||
bool running;
|
||||
CpuGovernor* super;
|
||||
|
||||
void Start();
|
||||
void Stop();
|
||||
|
||||
void onConfigUpdated(SysClkOcGovernorConfig config) {
|
||||
bool expected = (config >> SysClkOcGovernorConfig_CPU_Shift) & 1;
|
||||
if (expected != running)
|
||||
expected ? Start() : Stop();
|
||||
};
|
||||
} GovernorWorker;
|
||||
GovernorWorker m_worker;
|
||||
|
||||
Governor* m_manager;
|
||||
|
||||
friend Governor;
|
||||
};
|
||||
|
||||
class GpuGovernor : public BaseGovernor {
|
||||
public:
|
||||
GpuGovernor() : BaseGovernor(SysClkModule_GPU) {
|
||||
min_hz = 153'600'000;
|
||||
boost_hz = 76'800'000;
|
||||
|
||||
nvInitialize();
|
||||
Result rc = nvOpen(&m_nvgpu_field, "/dev/nvhost-ctrl-gpu");
|
||||
if (R_FAILED(rc)) {
|
||||
ASSERT_RESULT_OK(rc, "nvOpen");
|
||||
nvExit();
|
||||
}
|
||||
};
|
||||
|
||||
~GpuGovernor() {
|
||||
nvClose(m_nvgpu_field);
|
||||
nvExit();
|
||||
};
|
||||
|
||||
void SetMaxHz(uint32_t maxHz);
|
||||
|
||||
void Apply();
|
||||
|
||||
protected:
|
||||
// Get average value from a sliding window in O(1)
|
||||
template <typename T, size_t WINDOW_SIZE>
|
||||
class SWindowAvg {
|
||||
public:
|
||||
SWindowAvg() {}
|
||||
|
||||
void Add(T item) {
|
||||
T pop = m_queue[m_next];
|
||||
m_queue[m_next] = item;
|
||||
m_next = (m_next + 1) % WINDOW_SIZE;
|
||||
m_sum -= pop;
|
||||
m_sum += item;
|
||||
}
|
||||
|
||||
T Get() { return m_sum / WINDOW_SIZE; }
|
||||
|
||||
protected:
|
||||
size_t m_next = 0;
|
||||
T m_sum = 0;
|
||||
T m_queue[WINDOW_SIZE] = {};
|
||||
};
|
||||
|
||||
// Get max value from a sliding window in O(1)
|
||||
template <typename T, size_t WINDOW_SIZE>
|
||||
class SWindowMax {
|
||||
protected:
|
||||
typedef struct {
|
||||
T item;
|
||||
T max;
|
||||
} s_Entry;
|
||||
|
||||
struct s_Stack {
|
||||
s_Entry m_stack[WINDOW_SIZE] = {};
|
||||
size_t m_next = WINDOW_SIZE;
|
||||
|
||||
bool empty() { return m_next == 0; };
|
||||
s_Entry top() { return m_stack[m_next-1]; };
|
||||
s_Entry pop() { return m_stack[--m_next]; };
|
||||
void push(s_Entry item) {
|
||||
if (m_next == WINDOW_SIZE)
|
||||
return;
|
||||
m_stack[m_next++] = item;
|
||||
};
|
||||
};
|
||||
|
||||
s_Stack enqStack;
|
||||
s_Stack deqStack;
|
||||
|
||||
void Push(s_Stack& stack, T item) {
|
||||
s_Entry n = {
|
||||
.item = item,
|
||||
.max = enqStack.empty() ? item : std::max(item, enqStack.top().max)
|
||||
};
|
||||
stack.push(n);
|
||||
}
|
||||
|
||||
T Pop() {
|
||||
if (deqStack.empty()) {
|
||||
while (!enqStack.empty())
|
||||
Push(deqStack, enqStack.pop().max);
|
||||
}
|
||||
return deqStack.pop().item;
|
||||
}
|
||||
|
||||
public:
|
||||
SWindowMax() {}
|
||||
|
||||
void Add(T item) { Pop(); Push(enqStack, item); }
|
||||
|
||||
T Get() {
|
||||
if (!enqStack.empty()) {
|
||||
T enqMax = enqStack.top().max;
|
||||
if (!deqStack.empty()) {
|
||||
T deqMax = deqStack.top().max;
|
||||
return std::max(deqMax, enqMax);
|
||||
}
|
||||
return enqMax;
|
||||
}
|
||||
if (!deqStack.empty())
|
||||
return deqStack.top().max;
|
||||
return 0;
|
||||
}
|
||||
};
|
||||
|
||||
typedef struct MaxWindow {
|
||||
SWindowMax<uint32_t, 32> window {};
|
||||
uint32_t util_acc = 0;
|
||||
|
||||
// After 160 ms (if SAMPLE_RATE == 200, 32 samples)
|
||||
// UTIL_MAX * D^32 ≈ 1 (UTIL_MAX decayed to 1)
|
||||
// D = 6880 / 8192
|
||||
// Util_acc_max = Util_acc_inf = 6145
|
||||
static constexpr uint32_t DECAY_DIVIDENT = 6880;
|
||||
static constexpr uint32_t DECAY_DIVISOR = 8192;
|
||||
static constexpr uint32_t UTIL_ACC_MAX = 6145;
|
||||
|
||||
uint32_t Get() { return ((util_acc * UTIL_MAX / UTIL_ACC_MAX) + window.Get()) / 2; };
|
||||
void Update(uint32_t util) { window.Add(util); util_acc = util_acc * DECAY_DIVIDENT / DECAY_DIVISOR + util; };
|
||||
} MaxWindow;
|
||||
MaxWindow m_util;
|
||||
|
||||
uint32_t m_nvgpu_field;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
class Governor {
|
||||
public:
|
||||
Governor() {
|
||||
m_cpu_gov = new GovernorImpl::CpuGovernor(this);
|
||||
m_gpu_gov = new GovernorImpl::GpuGovernor();
|
||||
};
|
||||
|
||||
~Governor() {
|
||||
m_manager.Stop();
|
||||
delete m_cpu_gov;
|
||||
delete m_gpu_gov;
|
||||
};
|
||||
|
||||
SysClkOcGovernorConfig GetConfig() { return m_config; };
|
||||
bool IsHandledByGovernor(SysClkModule module = SysClkModule_EnumMax);
|
||||
void SetConfig(SysClkOcGovernorConfig config);
|
||||
|
||||
void SetPerfConf(uint32_t id);
|
||||
uint32_t GetPerfConf() { return m_perf_conf_id; };
|
||||
|
||||
void SetMaxHz(uint32_t maxHz, SysClkModule module);
|
||||
|
||||
void SetAutoCPUBoost(bool enabled) { m_cpu_gov->auto_boost = enabled; };
|
||||
void SetCPUBoostHz(uint32_t boostHz) { m_cpu_gov->boost_hz = boostHz; };
|
||||
|
||||
protected:
|
||||
typedef struct GovernorManager {
|
||||
bool running = false;
|
||||
Thread thread;
|
||||
|
||||
void Start();
|
||||
void Stop();
|
||||
void onConfigUpdated(SysClkOcGovernorConfig config) {
|
||||
bool shouldRun = (config != SysClkOcGovernorConfig_AllDisabled);
|
||||
shouldRun ? Start() : Stop();
|
||||
};
|
||||
static void ContextManager(void* args);
|
||||
} GovernorManager;
|
||||
GovernorManager m_manager;
|
||||
|
||||
SysClkOcGovernorConfig m_config = SysClkOcGovernorConfig_AllDisabled;
|
||||
|
||||
uint32_t m_perf_conf_id;
|
||||
SysClkApmConfiguration* m_apm_conf;
|
||||
|
||||
GovernorImpl::CpuGovernor* m_cpu_gov;
|
||||
GovernorImpl::GpuGovernor* m_gpu_gov;
|
||||
};
|
||||
|
||||
Reference in New Issue
Block a user