[Sys-clk-OC] Added an governor toggle per title in overlay (default: governor enabled) (#37); Fixed an issue where governor does not bump CPU frequency immediately to max when some non-system core (Core0/1/2) util is 100%.

This commit is contained in:
KazushiM
2022-10-28 22:00:44 +08:00
parent 851839be0a
commit a6dcf1da5c
13 changed files with 151 additions and 113 deletions

View File

@@ -241,9 +241,8 @@ bool ClockManager::RefreshContext()
uint32_t chargingLimit = this->GetConfig()->GetConfigValue(SysClkConfigValue_ChargingLimitPercentage);
PsmExt::ChargingHandler(fastChargingEnabled, chargingLimit);
bool configUpdated = this->config->Refresh();
bool hasChanged = false;
if (configUpdated) {
bool hasChanged = this->config->Refresh();
if (hasChanged) {
this->rnxSync->ToggleSync(this->GetConfig()->GetConfigValue(SysClkConfigValue_SyncReverseNXMode));
this->oc->allowUnsafeFreq = this->GetConfig()->GetConfigValue(SysClkConfigValue_AllowUnsafeFrequencies);
}
@@ -264,15 +263,6 @@ bool ClockManager::RefreshContext()
hasChanged = true;
}
if (hasChanged) {
if (enabled && governor)
this->governor->Start();
else
this->governor->Stop();
}
hasChanged |= configUpdated;
std::uint64_t applicationId = ProcessManagement::GetCurrentApplicationId();
if (applicationId != this->context->applicationId)
{
@@ -281,10 +271,16 @@ bool ClockManager::RefreshContext()
hasChanged = true;
/* Clear ReverseNX state */
this->GetConfig()->SetReverseNXRTMode(ReverseNX_NotFound);
this->rnxSync->Reset(applicationId);
}
if (hasChanged) {
if (enabled && governor && !this->GetConfig()->GetTitleGovernorDisabled(applicationId))
this->governor->Start();
else
this->governor->Stop();
}
SysClkProfile profile = Clocks::GetCurrentProfile();
if (profile != this->oc->realProfile)
{
@@ -308,7 +304,6 @@ bool ClockManager::RefreshContext()
}
{
this->rnxSync->SetRTMode(this->GetConfig()->GetReverseNXRTMode());
SysClkProfile current = this->context->profile;
SysClkProfile expected = this->rnxSync->GetProfile(this->oc->realProfile);
this->context->profile = expected;
@@ -383,6 +378,10 @@ bool ClockManager::RefreshContext()
return hasChanged;
}
void ClockManager::SetRNXRTMode(ReverseNXMode mode) {
this->rnxSync->SetRTMode(mode);
}
SysClkContext ClockManager::GetCurrentContext()
{
std::scoped_lock lock{this->contextMutex};

View File

@@ -22,7 +22,6 @@
class ClockManager
{
public:
static ClockManager* GetInstance();
static void Initialize();
static void Exit();
@@ -31,6 +30,7 @@ class ClockManager
bool Running();
void Tick();
void WaitForNextTick();
void SetRNXRTMode(ReverseNXMode mode);
SysClkContext GetCurrentContext();
Config* GetConfig();

View File

@@ -24,6 +24,7 @@ Config::Config(std::string path)
this->loaded = false;
this->profileMhzMap = std::map<std::tuple<std::uint64_t, SysClkProfile, SysClkModule>, std::uint32_t>();
this->profileCountMap = std::map<std::uint64_t, std::uint8_t>();
this->profileGovernorDisabled = std::map<std::uint64_t, bool>();
this->mtime = 0;
this->enabled = false;
for(unsigned int i = 0; i < SysClkModule_EnumMax; i++)
@@ -35,8 +36,6 @@ Config::Config(std::string path)
{
this->configValues[i] = sysclkDefaultConfigValue((SysClkConfigValue)i);
}
this->reverseNXRTMode = ReverseNX_NotFound;
}
Config::~Config()
@@ -73,6 +72,7 @@ void Config::Close()
this->loaded = false;
this->profileMhzMap.clear();
this->profileCountMap.clear();
this->profileGovernorDisabled.clear();
for(unsigned int i = 0; i < SysClkConfigValue_EnumMax; i++)
{
@@ -164,6 +164,20 @@ std::uint32_t Config::GetAutoClockHz(std::uint64_t tid, SysClkModule module, Sys
return 0;
}
bool Config::GetTitleGovernorDisabled(std::uint64_t tid)
{
if (this->loaded)
{
std::map<uint64_t, bool>::const_iterator it = this->profileGovernorDisabled.find(tid);
if (it != this->profileGovernorDisabled.end())
{
return it->second;
}
}
return false;
}
void Config::GetProfiles(std::uint64_t tid, SysClkTitleProfileList* out_profiles)
{
std::scoped_lock lock{this->configMutex};
@@ -175,6 +189,12 @@ void Config::GetProfiles(std::uint64_t tid, SysClkTitleProfileList* out_profiles
out_profiles->mhzMap[profile][module] = FindClockMhz(tid, (SysClkModule)module, (SysClkProfile)profile);
}
}
std::map<uint64_t, bool>::const_iterator it = this->profileGovernorDisabled.find(tid);
bool governorDisabled = false;
if (it != this->profileGovernorDisabled.end() && it->second)
governorDisabled = true;
out_profiles->governorDisabled = governorDisabled;
}
bool Config::SetProfiles(std::uint64_t tid, SysClkTitleProfileList* profiles, bool immediate)
@@ -183,8 +203,8 @@ bool Config::SetProfiles(std::uint64_t tid, SysClkTitleProfileList* profiles, bo
uint8_t numProfiles = 0;
// String pointer array passed to ini
char* iniKeys[static_cast<int>(SysClkProfile_EnumMax) * static_cast<int>(SysClkModule_EnumMax) + 1];
char* iniValues[static_cast<int>(SysClkProfile_EnumMax) * static_cast<int>(SysClkModule_EnumMax) + 1];
char* iniKeys[static_cast<int>(SysClkProfile_EnumMax) * static_cast<int>(SysClkModule_EnumMax) + 1 + 1];
char* iniValues[static_cast<int>(SysClkProfile_EnumMax) * static_cast<int>(SysClkModule_EnumMax) + 1 + 1];
// Char arrays to build strings
char keysStr[static_cast<int>(SysClkProfile_EnumMax) * static_cast<int>(SysClkModule_EnumMax) * 0x40];
@@ -227,6 +247,13 @@ bool Config::SetProfiles(std::uint64_t tid, SysClkTitleProfileList* profiles, bo
}
}
if (profiles->governorDisabled) {
snprintf(sk, 0x40, "%s", CONFIG_KEY_TITLE_GOVERNOR_DISABLED);
snprintf(sv, 0x10, "%d", profiles->governorDisabled);
*ik++ = sk;
*iv++ = sv;
}
*ik = NULL;
*iv = NULL;
@@ -255,6 +282,11 @@ bool Config::SetProfiles(std::uint64_t tid, SysClkTitleProfileList* profiles, bo
mhz++;
}
}
if (profiles->governorDisabled)
this->profileGovernorDisabled[tid] = profiles->governorDisabled;
else
this->profileGovernorDisabled.erase(tid);
}
return true;
@@ -304,6 +336,16 @@ int Config::BrowseIniFunc(const char* section, const char* key, const char* valu
return 1;
}
if (!strcmp(key, CONFIG_KEY_TITLE_GOVERNOR_DISABLED)) {
input = strtoul(value, NULL, 0);
if ((input & 0x1) != input) {
input = 0;
FileUtils::LogLine("[cfg] Invalid value for key '%s' in section '%s': using default %d", key, section, input);
}
config->profileGovernorDisabled[tid] = (bool)input;
return 1;
}
SysClkProfile parsedProfile = SysClkProfile_EnumMax;
SysClkModule parsedModule = SysClkModule_EnumMax;
@@ -481,14 +523,3 @@ bool Config::SetConfigValues(SysClkConfigValueList* configValues, bool immediate
return true;
}
ReverseNXMode Config::GetReverseNXRTMode() {
std::scoped_lock lock{this->reverseNXRTMutex};
ReverseNXMode mode = this->reverseNXRTMode;
return mode;
}
void Config::SetReverseNXRTMode(ReverseNXMode mode) {
std::scoped_lock lock{this->reverseNXRTMutex};
this->reverseNXRTMode = mode;
}

View File

@@ -21,6 +21,8 @@
#define CONFIG_VAL_SECTION "values"
#define CONFIG_KEY_TITLE_GOVERNOR_DISABLED "governor_disabled"
class Config
{
public:
@@ -37,6 +39,7 @@ class Config
void GetProfiles(std::uint64_t tid, SysClkTitleProfileList* out_profiles);
bool SetProfiles(std::uint64_t tid, SysClkTitleProfileList* profiles, bool immediate);
std::uint32_t GetAutoClockHz(std::uint64_t tid, SysClkModule module, SysClkProfile profile);
bool GetTitleGovernorDisabled(std::uint64_t tid);
void SetEnabled(bool enabled);
bool Enabled();
@@ -47,8 +50,6 @@ class Config
const char* GetConfigValueName(SysClkConfigValue val, bool pretty);
void GetConfigValues(SysClkConfigValueList* out_configValues);
bool SetConfigValues(SysClkConfigValueList* configValues, bool immediate);
ReverseNXMode GetReverseNXRTMode();
void SetReverseNXRTMode(ReverseNXMode);
protected:
void Load();
void Close();
@@ -60,14 +61,13 @@ class Config
std::map<std::tuple<std::uint64_t, SysClkProfile, SysClkModule>, std::uint32_t> profileMhzMap;
std::map<std::uint64_t, std::uint8_t> profileCountMap;
std::map<std::uint64_t, bool> profileGovernorDisabled;
bool loaded;
std::string path;
time_t mtime;
LockableMutex configMutex;
LockableMutex overrideMutex;
LockableMutex reverseNXRTMutex;
std::atomic_bool enabled;
std::uint32_t overrideFreqs[SysClkModule_EnumMax];
std::uint64_t configValues[SysClkConfigValue_EnumMax];
ReverseNXMode reverseNXRTMode;
};

View File

@@ -298,7 +298,6 @@ Result IpcService::SetConfigValues(SysClkConfigValueList* configValues)
}
Result IpcService::SetReverseNXRTMode(ReverseNXMode mode) {
Config* config = ClockManager::GetInstance()->GetConfig();
config->SetReverseNXRTMode(mode);
ClockManager::GetInstance()->SetRNXRTMode(mode);
return 0;
}

View File

@@ -38,6 +38,7 @@ uint32_t GpuCoreUtil::Get() {
return load;
}
ReverseNXSync::ReverseNXSync()
: m_rt_mode(ReverseNX_NotFound), m_tool_mode(ReverseNX_NotFound) {
FILE *fp = fopen("/atmosphere/contents/0000000000534C56/flags/boot2.flag", "r");
@@ -103,6 +104,7 @@ ReverseNXMode ReverseNXSync::RecheckToolMode() {
return mode;
}
void PsmExt::ChargingHandler(bool fastChargingEnabled, uint32_t chargingLimit) {
PsmChargeInfo* info = new PsmChargeInfo;
Service* session = psmGetServiceSession();
@@ -125,6 +127,7 @@ void PsmExt::ChargingHandler(bool fastChargingEnabled, uint32_t chargingLimit) {
delete info;
}
Governor::Governor() {
memset(reinterpret_cast<void*>(&m_cpu_freq), 0, sizeof(m_cpu_freq));
memset(reinterpret_cast<void*>(&m_gpu_freq), 0, sizeof(m_gpu_freq));
@@ -166,9 +169,11 @@ void Governor::Start() {
m_running = true;
Result rc = 0;
for (int core = 0; core < CORE_NUMS; core++) {
s_CoreContext* s = InitCoreContext(&m_cpu_core_ctx[core], this, core);
s_CoreContext* s = &m_cpu_core_ctx[core];
s->self = this;
s->id = core;
int prio = (core == CORE_NUMS - 1) ? 0x3F : 0x3B; // Pre-emptive MT
rc = threadCreate(&m_t_cpuworker[core], &CheckCpuUtilWorker, (void*)s, NULL, 0x1000, prio, core);
rc = threadCreate(&m_t_cpuworker[core], &CpuUtilWorker, (void*)s, NULL, 0x1000, prio, core);
ASSERT_RESULT_OK(rc, "threadCreate");
rc = threadStart(&m_t_cpuworker[core]);
ASSERT_RESULT_OK(rc, "threadStart");
@@ -205,7 +210,7 @@ void Governor::SetMaxHz(uint32_t max_hz, SysClkModule module) {
break;
case SysClkModule_GPU:
m_gpu_freq.max_hz = max_hz;
m_gpu_freq.min_hz = (m_gpu_freq.max_hz == 76'800'000) ? 76'800'000 : 153'600'000;
m_gpu_freq.min_hz = (m_gpu_freq.max_hz <= 153'600'000) ? max_hz : 153'600'000;
break;
case SysClkModule_MEM:
m_mem_freq = max_hz;
@@ -234,19 +239,19 @@ void Governor::s_FreqContext::SetNextFreq(uint32_t norm_util) {
uint32_t prev_hz = target_hz;
uint32_t next_freq = (uint64_t)(norm_util + (norm_util >> 1)) * utilref_hz / UTIL_MAX;
uint32_t adj_next_freq;
uint32_t adj_next_freq = target_hz;
if (next_freq > max_hz) {
adj_next_freq = max_hz;
} else if (next_freq < min_hz) {
adj_next_freq = min_hz;
} else {
uint32_t* p = hz_list;
while (*p) {
if (*p > next_freq)
do {
if (*p > next_freq) {
adj_next_freq = *p;
break;
p++;
}
adj_next_freq = *p;
}
} while (*p++);
}
target_hz = adj_next_freq;
@@ -267,47 +272,35 @@ void Governor::s_FreqContext::SetBoostHz() {
SetHz();
}
Governor::s_CoreContext* Governor::InitCoreContext(
s_CoreContext* context, Governor* self, int64_t id
) {
memset(reinterpret_cast<void*>(context), 0, sizeof(s_CoreContext));
context->self = self;
context->id = id;
return context;
}
void Governor::CheckCpuUtilWorker(void* args) {
void Governor::CpuUtilWorker(void* args) {
s_CoreContext* s = static_cast<s_CoreContext*>(args);
int64_t coreid = s->id;
int coreid = s->id;
constexpr int SYS_CORE_ID = (CORE_NUMS - 1);
Governor* self = s->self;
bool isSystemCore = (coreid == CORE_NUMS - 1);
if (isSystemCore)
self->CheckCpuUtilWorkerSysCore();
else
self->CheckCpuUtilWorkerAppCore(coreid);
}
void Governor::CheckCpuUtilWorkerAppCore(int64_t coreid) {
constexpr uint64_t STUCK_TICKS = SAMPLE_RATE / 10;
while (m_running) {
bool isBusy = m_core3_stuck_cnt > STUCK_TICKS * (CORE_NUMS - 1);
if (isBusy) {
m_core3_stuck_cnt = 0;
m_cpu_freq.SetBoostHz();
svcSleepThread(STUCK_TICKS * TICK_TIME_NS);
} else {
m_core3_stuck_cnt++;
while (self->m_running) {
bool CPUBoosted = apmExtIsCPUBoosted(self->m_perf_conf_id);
if (CPUBoosted) {
svcSleepThread(TICK_TIME_NS);
continue;
}
m_cpu_core_ctx[coreid].util = m_cpu_freq.GetNormalizedUtil(CpuCoreUtil(coreid, TICK_TIME_NS).Get());
}
}
uint64_t timestamp = armTicksToNs(armGetSystemTick());
s->timestamp = timestamp;
for (int id = 0; id < CORE_NUMS; id++) {
if (abs(self->m_cpu_core_ctx[id].timestamp - timestamp) < TICK_TIME_NS * 10)
continue;
void Governor::CheckCpuUtilWorkerSysCore() {
int64_t coreid = CORE_NUMS - 1;
while (m_running) {
m_cpu_core_ctx[coreid].util = m_cpu_freq.GetNormalizedUtil(CpuCoreUtil(coreid, TICK_TIME_NS).Get());
if (id == SYS_CORE_ID) {
self->m_cpu_freq.SetBoostHz();
} else {
self->m_cpu_freq.target_hz = self->m_cpu_freq.max_hz;
self->m_cpu_freq.SetHz();
}
break;
}
s->util = self->m_cpu_freq.GetNormalizedUtil(CpuCoreUtil(coreid, TICK_TIME_NS).Get());
}
}
@@ -340,16 +333,13 @@ void Governor::Main(void* args) {
bool GPUThrottled = false;
while (self->m_running) {
self->m_core3_stuck_cnt = 0;
bool shouldUpdateContext = update_ticks++ >= UPDATE_CONTEXT_RATE;
if (shouldUpdateContext) {
update_ticks = 0;
uint32_t hz = Clocks::GetCurrentHz(SysClkModule_GPU);
// Sleep mode detected, wait 1 tick
// Sleep mode detected, wait 10 ticks
while (!hz) {
self->m_core3_stuck_cnt = 0;
svcSleepThread(TICK_TIME_NS);
svcSleepThread(10 * TICK_TIME_NS);
hz = Clocks::GetCurrentHz(SysClkModule_GPU);
}
@@ -370,13 +360,13 @@ void Governor::Main(void* args) {
self->m_mem_freq = hz;
if (hz != self->m_mem_freq)
Clocks::SetHz(SysClkModule_MEM, self->m_mem_freq);
} else {
if (!GPUThrottled)
gpu_ctx->SetNextFreq(GetAdjGpuUtil());
if (!CPUBoosted)
cpu_ctx->SetNextFreq(GetAdjCpuUtil());
}
if (!GPUThrottled)
gpu_ctx->SetNextFreq(GetAdjGpuUtil());
if (!CPUBoosted)
cpu_ctx->SetNextFreq(GetAdjCpuUtil());
svcSleepThread(TICK_TIME_NS);
}
}

View File

@@ -48,7 +48,8 @@ public:
ReverseNXMode GetMode();
protected:
ReverseNXMode m_rt_mode, m_tool_mode;
std::atomic<ReverseNXMode> m_rt_mode;
ReverseNXMode m_tool_mode;
uint64_t m_app_id = 0;
bool m_tool_enabled;
bool m_sync_enabled;
@@ -81,7 +82,6 @@ protected:
bool m_running = false;
Thread m_t_cpuworker[CORE_NUMS], m_t_main;
std::atomic<uint64_t> m_core3_stuck_cnt = 0;
uint32_t m_nvgpu_field;
uint32_t m_mem_freq;
@@ -108,32 +108,29 @@ protected:
Governor* self;
int id;
uint32_t util;
uint64_t timestamp;
} s_CoreContext;
s_CoreContext m_cpu_core_ctx[CORE_NUMS];
s_CoreContext* InitCoreContext(s_CoreContext* context, Governor* self, int64_t id = 0);
// PELT: https://github.com/torvalds/linux/blob/master/kernel/sched/pelt.c
// Util_acc_n = Util_0 + Util_1 * D + Util_2 * D^2 + ... + Util_n * D^n
// To approximate D (decay multiplier):
// After 100 ms (if SAMPLE_RATE == 200, 20 samples)
// (UTIL_MAX * D)^20 ≈ 1 (UTIL_MAX decayed to 1)
// D = 0.7079457843841379... ≈ 725 / 1024
// Util_acc_20 ≈ 3421, Util_acc_40 ≈ 3424, Util_acc_inf ≈ 3424
// D = 0.707946... ≈ 5799 / 8192 (epsilon < 0.0001)
// Util_acc_20 ≈ 3419, Util_acc_40 ≈ 3420, Util_acc_inf ≈ 3420
static constexpr uint32_t UTIL_MAX = 100'0;
struct s_Util {
uint32_t util_acc = 0;
static constexpr uint32_t DECAY_DIVIDENT = 725;
static constexpr uint32_t DECAY_DIVISOR = 1024;
static constexpr uint32_t UTIL_ACC_MAX = 3424;
static constexpr uint32_t DECAY_DIVIDENT = 5799;
static constexpr uint32_t DECAY_DIVISOR = 8192;
static constexpr uint32_t UTIL_ACC_MAX = 3420;
uint32_t Get() { return (util_acc * UTIL_MAX / UTIL_ACC_MAX); };
void Update(uint32_t util) { util_acc = util_acc * DECAY_DIVIDENT / DECAY_DIVISOR + util; };
};
static void CheckCpuUtilWorker(void* args);
void CheckCpuUtilWorkerAppCore(int64_t coreid);
void CheckCpuUtilWorkerSysCore();
static void CpuUtilWorker(void* args);
static void Main(void* args);
};