Schedutil-like governor with proper load_avg calculation; Fixed #36
This commit is contained in:
@@ -178,12 +178,9 @@ void ClockManager::Tick()
|
||||
{
|
||||
uint32_t hz = GetHz((SysClkModule)module);
|
||||
|
||||
if (this->oc->governor) {
|
||||
this->governor->SetMaxHz(hz, (SysClkModule)module);
|
||||
continue;
|
||||
}
|
||||
this->governor->SetMaxHz(hz, (SysClkModule)module);
|
||||
|
||||
if (hz && hz != this->context->freqs[module])
|
||||
if (hz && hz != this->context->freqs[module] && !this->oc->governor)
|
||||
{
|
||||
// Skip setting CPU or GPU clocks in CpuBoostMode if CPU <= boostCPUFreq or GPU >= 76.8MHz
|
||||
bool skipBoost = IsBoostMode() && ((module == SysClkModule_CPU && hz <= this->oc->boostCPUFreq) || module == SysClkModule_GPU);
|
||||
@@ -217,9 +214,9 @@ void ClockManager::WaitForNextTick()
|
||||
this->context->freqs[SysClkModule_CPU] <= this->oc->boostCPUFreq;
|
||||
|
||||
if (boostOK) {
|
||||
uint64_t core3Util = CpuCoreUtil(3, tickWaitTimeMs).Get();
|
||||
uint32_t core3Util = CpuCoreUtil(3, tickWaitTimeMs * 1000'000ULL).Get();
|
||||
bool lastBoost = this->oc->systemCoreBoostCPU;
|
||||
constexpr uint8_t BOOST_THRESHOLD = 95;
|
||||
constexpr uint32_t BOOST_THRESHOLD = 95'0;
|
||||
this->oc->systemCoreBoostCPU = (core3Util >= BOOST_THRESHOLD);
|
||||
|
||||
if (lastBoost && !this->oc->systemCoreBoostCPU)
|
||||
@@ -244,9 +241,12 @@ bool ClockManager::RefreshContext()
|
||||
uint32_t chargingLimit = this->GetConfig()->GetConfigValue(SysClkConfigValue_ChargingLimitPercentage);
|
||||
PsmExt::ChargingHandler(fastChargingEnabled, chargingLimit);
|
||||
|
||||
bool hasChanged = this->config->Refresh();
|
||||
this->rnxSync->ToggleSync(this->GetConfig()->GetConfigValue(SysClkConfigValue_SyncReverseNXMode));
|
||||
this->oc->allowUnsafeFreq = this->GetConfig()->GetConfigValue(SysClkConfigValue_AllowUnsafeFrequencies);
|
||||
bool configUpdated = this->config->Refresh();
|
||||
bool hasChanged = false;
|
||||
if (configUpdated) {
|
||||
this->rnxSync->ToggleSync(this->GetConfig()->GetConfigValue(SysClkConfigValue_SyncReverseNXMode));
|
||||
this->oc->allowUnsafeFreq = this->GetConfig()->GetConfigValue(SysClkConfigValue_AllowUnsafeFrequencies);
|
||||
}
|
||||
|
||||
bool enabled = this->GetConfig()->Enabled();
|
||||
if(enabled != this->context->enabled)
|
||||
@@ -261,13 +261,18 @@ bool ClockManager::RefreshContext()
|
||||
{
|
||||
this->oc->governor = governor;
|
||||
FileUtils::LogLine("[mgr] Governor status: %s", governor ? "enabled" : "disabled");
|
||||
if (governor)
|
||||
hasChanged = true;
|
||||
}
|
||||
|
||||
if (hasChanged) {
|
||||
if (enabled && governor)
|
||||
this->governor->Start();
|
||||
else
|
||||
this->governor->Stop();
|
||||
hasChanged = true;
|
||||
}
|
||||
|
||||
hasChanged |= configUpdated;
|
||||
|
||||
std::uint64_t applicationId = ProcessManagement::GetCurrentApplicationId();
|
||||
if (applicationId != this->context->applicationId)
|
||||
{
|
||||
|
||||
@@ -31,12 +31,12 @@ class Clocks
|
||||
static const char* GetThermalSensorName(SysClkThermalSensor sensor, bool pretty);
|
||||
static std::uint32_t GetNearestHz(SysClkModule module, SysClkProfile profile, std::uint32_t inHz, bool allowUnsafe);
|
||||
static std::uint32_t GetTemperatureMilli(SysClkThermalSensor sensor);
|
||||
static void GetList(SysClkModule module, std::uint32_t **outClocks);
|
||||
|
||||
protected:
|
||||
static std::int32_t GetTsTemperatureMilli(TsLocation location);
|
||||
static PcvModule GetPcvModule(SysClkModule sysclkModule);
|
||||
static PcvModuleId GetPcvModuleId(SysClkModule sysclkModule);
|
||||
static std::uint32_t GetNearestHz(SysClkModule module, std::uint32_t inHz);
|
||||
static void GetList(SysClkModule module, std::uint32_t **outClocks);
|
||||
static std::uint32_t GetMaxAllowedHz(SysClkModule module, SysClkProfile profile, bool allowUnsafe);
|
||||
};
|
||||
|
||||
@@ -22,7 +22,7 @@
|
||||
#include "ipc_service.h"
|
||||
#include "oc_extra.h"
|
||||
|
||||
#define INNER_HEAP_SIZE 0x30000
|
||||
#define INNER_HEAP_SIZE 0x38000
|
||||
|
||||
extern "C"
|
||||
{
|
||||
|
||||
@@ -1,5 +1,51 @@
|
||||
#include "oc_extra.h"
|
||||
|
||||
CpuCoreUtil::CpuCoreUtil(int coreid = -2, uint64_t ns = 1000'000ULL)
|
||||
: m_core_id(coreid), m_wait_time_ns(ns) { }
|
||||
|
||||
uint32_t CpuCoreUtil::Get() {
|
||||
struct _ctx {
|
||||
uint64_t timestamp;
|
||||
uint64_t idletick;
|
||||
} begin, end;
|
||||
|
||||
begin.timestamp = armTicksToNs(armGetSystemTick());
|
||||
begin.idletick = GetIdleTickCount();
|
||||
|
||||
svcSleepThread(m_wait_time_ns);
|
||||
|
||||
end.timestamp = armTicksToNs(armGetSystemTick());
|
||||
end.idletick = GetIdleTickCount();
|
||||
|
||||
uint64_t diff_idletick = end.idletick - begin.idletick;
|
||||
uint64_t real_elapsed_ns = end.timestamp - begin.timestamp;
|
||||
return UTIL_MAX - diff_idletick * 10 * 1000'000ULL / (TICKS_PER_MS * real_elapsed_ns);
|
||||
}
|
||||
|
||||
uint64_t CpuCoreUtil::GetIdleTickCount() {
|
||||
uint64_t idletick = 0;
|
||||
svcGetInfo(&idletick, InfoType_IdleTickCount, INVALID_HANDLE, m_core_id);
|
||||
return idletick;
|
||||
}
|
||||
|
||||
|
||||
GpuCoreUtil::GpuCoreUtil(uint32_t nvgpu_field)
|
||||
: m_nvgpu_field(nvgpu_field) { }
|
||||
|
||||
uint32_t GpuCoreUtil::Get() {
|
||||
uint32_t load;
|
||||
nvIoctl(m_nvgpu_field, NVGPU_GPU_IOCTL_PMU_GET_GPU_LOAD, &load);
|
||||
return load;
|
||||
}
|
||||
|
||||
ReverseNXSync::ReverseNXSync()
|
||||
: m_rt_mode(ReverseNX_NotFound), m_tool_mode(ReverseNX_NotFound) {
|
||||
FILE *fp = fopen("/atmosphere/contents/0000000000534C56/flags/boot2.flag", "r");
|
||||
m_tool_enabled = fp ? true : false;
|
||||
if (fp)
|
||||
fclose(fp);
|
||||
}
|
||||
|
||||
SysClkProfile ReverseNXSync::GetProfile(SysClkProfile real) {
|
||||
switch (this->GetMode()) {
|
||||
case ReverseNX_Docked:
|
||||
@@ -20,17 +66,6 @@ ReverseNXMode ReverseNXSync::GetMode() {
|
||||
return this->m_tool_mode;
|
||||
}
|
||||
|
||||
bool ReverseNXSync::CheckToolEnabled() {
|
||||
FILE *fp = fopen("/atmosphere/contents/0000000000534C56/flags/boot2.flag", "r");
|
||||
if (fp) {
|
||||
this->m_tool_enabled = true;
|
||||
fclose(fp);
|
||||
} else {
|
||||
this->m_tool_enabled = false;
|
||||
}
|
||||
return this->m_tool_enabled;
|
||||
}
|
||||
|
||||
ReverseNXMode ReverseNXSync::GetToolModeFromPatch(const char* patch_path) {
|
||||
constexpr uint32_t DOCKED_MAGIC = 0x320003E0;
|
||||
constexpr uint32_t HANDHELD_MAGIC = 0x52A00000;
|
||||
@@ -90,41 +125,66 @@ void PsmExt::ChargingHandler(bool fastChargingEnabled, uint32_t chargingLimit) {
|
||||
delete info;
|
||||
}
|
||||
|
||||
void Governor::Start() {
|
||||
m_stop_threads = false;
|
||||
svcSleepThread(8 * TICK_TIME_NS);
|
||||
Result rc = 0;
|
||||
Governor::Governor() {
|
||||
memset(reinterpret_cast<void*>(&m_cpu_freq), 0, sizeof(m_cpu_freq));
|
||||
memset(reinterpret_cast<void*>(&m_gpu_freq), 0, sizeof(m_gpu_freq));
|
||||
|
||||
for (int core = 0; core < CORE_NUMS; core++) {
|
||||
if (m_t_cpuworker[core].handle)
|
||||
continue;
|
||||
s_CoreContext* s = InitCoreContext(&m_cpu_core_ctx[core], this, core);
|
||||
rc = threadCreate(&m_t_cpuworker[core], &CheckCpuUtilWorker, (void*)s, NULL, 0x1000, 0x20, core);
|
||||
if (rc) {
|
||||
ERROR_THROW("Cannot create thread m_t_cpuworker[%d]: %u", core, rc);
|
||||
return;
|
||||
}
|
||||
rc = threadStart(&m_t_cpuworker[core]);
|
||||
if (rc) {
|
||||
ERROR_THROW("Cannot start thread m_t_cpuworker[%d]: %u", core, rc);
|
||||
return;
|
||||
}
|
||||
}
|
||||
rc = threadCreate(&m_t_main, &Main, (void*)this, NULL, 0x1000, 0x3F, 3);
|
||||
if (rc) {
|
||||
ERROR_THROW("Cannot create thread m_t_main: %u", rc);
|
||||
return;
|
||||
}
|
||||
rc = threadStart(&m_t_main);
|
||||
if (rc) {
|
||||
ERROR_THROW("Cannot start thread m_t_main: %u", rc);
|
||||
return;
|
||||
m_cpu_freq.module = SysClkModule_CPU;
|
||||
m_gpu_freq.module = SysClkModule_GPU;
|
||||
|
||||
uint32_t* list = NULL;
|
||||
Clocks::GetList(SysClkModule_CPU, &list);
|
||||
m_cpu_freq.hz_list = list;
|
||||
Clocks::GetList(SysClkModule_GPU, &list);
|
||||
m_gpu_freq.hz_list = list;
|
||||
|
||||
m_cpu_freq.boost_hz = 1785'000'000;
|
||||
m_cpu_freq.utilref_hz = 2397'000'000;
|
||||
|
||||
m_gpu_freq.boost_hz = 76'800'000;
|
||||
m_gpu_freq.min_hz = 153'600'000;
|
||||
m_gpu_freq.utilref_hz = 1305'600'000;
|
||||
|
||||
nvInitialize();
|
||||
Result rc = nvOpen(&m_nvgpu_field, "/dev/nvhost-ctrl-gpu");
|
||||
if (R_FAILED(rc)) {
|
||||
ASSERT_RESULT_OK(rc, "nvOpen");
|
||||
nvExit();
|
||||
}
|
||||
}
|
||||
|
||||
Governor::~Governor() {
|
||||
Stop();
|
||||
nvClose(m_nvgpu_field);
|
||||
nvExit();
|
||||
}
|
||||
|
||||
void Governor::Start() {
|
||||
if (m_running)
|
||||
return;
|
||||
|
||||
m_running = true;
|
||||
Result rc = 0;
|
||||
for (int core = 0; core < CORE_NUMS; core++) {
|
||||
s_CoreContext* s = InitCoreContext(&m_cpu_core_ctx[core], this, core);
|
||||
int prio = (core == CORE_NUMS - 1) ? 0x3F : 0x3B; // Pre-emptive MT
|
||||
rc = threadCreate(&m_t_cpuworker[core], &CheckCpuUtilWorker, (void*)s, NULL, 0x1000, prio, core);
|
||||
ASSERT_RESULT_OK(rc, "threadCreate");
|
||||
rc = threadStart(&m_t_cpuworker[core]);
|
||||
ASSERT_RESULT_OK(rc, "threadStart");
|
||||
}
|
||||
rc = threadCreate(&m_t_main, &Main, (void*)this, NULL, 0x1000, 0x3F, 3);
|
||||
ASSERT_RESULT_OK(rc, "threadCreate");
|
||||
rc = threadStart(&m_t_main);
|
||||
ASSERT_RESULT_OK(rc, "threadStart");
|
||||
}
|
||||
|
||||
void Governor::Stop() {
|
||||
m_stop_threads = true;
|
||||
svcSleepThread(8 * TICK_TIME_NS);
|
||||
if (!m_running)
|
||||
return;
|
||||
|
||||
m_running = false;
|
||||
svcSleepThread(TICK_TIME_NS);
|
||||
|
||||
threadWaitForExit(&m_t_main);
|
||||
threadClose(&m_t_main);
|
||||
@@ -141,10 +201,11 @@ void Governor::SetMaxHz(uint32_t max_hz, SysClkModule module) {
|
||||
|
||||
switch (module) {
|
||||
case SysClkModule_CPU:
|
||||
m_cpu_freq.idx_max_hz = FindIndex(&m_cpu_freq, max_hz);
|
||||
m_cpu_freq.max_hz = max_hz;
|
||||
break;
|
||||
case SysClkModule_GPU:
|
||||
m_gpu_freq.idx_max_hz = FindIndex(&m_gpu_freq, max_hz);
|
||||
m_gpu_freq.max_hz = max_hz;
|
||||
m_gpu_freq.min_hz = (m_gpu_freq.max_hz == 76'800'000) ? 76'800'000 : 153'600'000;
|
||||
break;
|
||||
case SysClkModule_MEM:
|
||||
m_mem_freq = max_hz;
|
||||
@@ -160,59 +221,50 @@ void Governor::SetPerfConf(uint32_t id) {
|
||||
m_apm_conf = Clocks::GetEmbeddedApmConfig(id);
|
||||
}
|
||||
|
||||
uint32_t Governor::FindIndex(s_Freq* f, uint32_t hz) {
|
||||
uint32_t idx = 0, hz_in_list;
|
||||
while ((hz_in_list = f->hz_list[idx]) != 0) {
|
||||
if (hz == hz_in_list)
|
||||
return idx;
|
||||
idx++;
|
||||
}
|
||||
ERROR_THROW("[mgr] Cannot find hz: %lu", hz);
|
||||
return 0;
|
||||
uint32_t Governor::s_FreqContext::GetNormalizedUtil(uint32_t raw_util) {
|
||||
return ((uint64_t)raw_util * target_hz / utilref_hz);
|
||||
}
|
||||
|
||||
bool Governor::TargetRamp(s_Freq* f, FREQ_RAMP_DIRECTION dir) {
|
||||
uint8_t idx_old = f->idx_target_hz;
|
||||
// Schedutil: https://github.com/torvalds/linux/blob/master/kernel/sched/cpufreq_schedutil.c
|
||||
// C = 1.25, tipping-point 80.0% (used in Linux schedutil), 1.25 -> 1 + (1 >> 2)
|
||||
// C = 1.5, tipping-point 66.7%, 1.5 -> 1 + (1 >> 1)
|
||||
// Utilization is frequency-invariant (normalized):
|
||||
// next_freq = C * max_freq(ref_freq) * util / max
|
||||
void Governor::s_FreqContext::SetNextFreq(uint32_t norm_util) {
|
||||
uint32_t prev_hz = target_hz;
|
||||
uint32_t next_freq = (uint64_t)(norm_util + (norm_util >> 1)) * utilref_hz / UTIL_MAX;
|
||||
|
||||
switch (dir) {
|
||||
case RAMP_UP:
|
||||
f->idx_target_hz++;
|
||||
if (f->idx_target_hz > f->idx_max_hz)
|
||||
f->idx_target_hz = f->idx_max_hz;
|
||||
break;
|
||||
case RAMP_DOWN:
|
||||
if (f->idx_target_hz > 0)
|
||||
f->idx_target_hz--;
|
||||
if (f->idx_target_hz < f->idx_min_hz)
|
||||
f->idx_target_hz = f->idx_min_hz;
|
||||
break;
|
||||
case RAMP_MAX:
|
||||
f->idx_target_hz = f->idx_max_hz;
|
||||
break;
|
||||
case RAMP_MIN:
|
||||
f->idx_target_hz = f->idx_min_hz;
|
||||
break;
|
||||
case RAMP_BOOST:
|
||||
f->idx_target_hz = f->idx_boost_hz;
|
||||
break;
|
||||
uint32_t adj_next_freq;
|
||||
if (next_freq > max_hz) {
|
||||
adj_next_freq = max_hz;
|
||||
} else if (next_freq < min_hz) {
|
||||
adj_next_freq = min_hz;
|
||||
} else {
|
||||
uint32_t* p = hz_list;
|
||||
while (*p) {
|
||||
if (*p > next_freq)
|
||||
break;
|
||||
p++;
|
||||
}
|
||||
adj_next_freq = *p;
|
||||
}
|
||||
|
||||
uint8_t idx_new = f->idx_target_hz;
|
||||
bool changed = idx_old != idx_new;
|
||||
return changed;
|
||||
target_hz = adj_next_freq;
|
||||
bool changed = target_hz != prev_hz;
|
||||
if (changed)
|
||||
SetHz();
|
||||
}
|
||||
|
||||
void Governor::SetHz(s_Freq* f) {
|
||||
uint32_t hz = f->hz_list[f->idx_target_hz];
|
||||
if (hz)
|
||||
Clocks::SetHz(f->module, hz);
|
||||
void Governor::s_FreqContext::SetHz() {
|
||||
if (target_hz)
|
||||
Clocks::SetHz(module, target_hz);
|
||||
}
|
||||
|
||||
void Governor::SetBoostHz(s_Freq* f) {
|
||||
f->idx_target_hz = f->idx_boost_hz;
|
||||
if (f->module == SysClkModule_CPU && f->idx_max_hz > f->idx_boost_hz)
|
||||
f->idx_target_hz = f->idx_max_hz;
|
||||
SetHz(f);
|
||||
void Governor::s_FreqContext::SetBoostHz() {
|
||||
target_hz = boost_hz;
|
||||
if (module == SysClkModule_CPU && max_hz > boost_hz)
|
||||
target_hz = max_hz;
|
||||
SetHz();
|
||||
}
|
||||
|
||||
Governor::s_CoreContext* Governor::InitCoreContext(
|
||||
@@ -237,68 +289,49 @@ void Governor::CheckCpuUtilWorker(void* args) {
|
||||
}
|
||||
|
||||
void Governor::CheckCpuUtilWorkerAppCore(int64_t coreid) {
|
||||
constexpr uint64_t STUCK_TICKS = 5;
|
||||
s_Queue<uint64_t> q;
|
||||
while (!m_stop_threads) {
|
||||
constexpr uint64_t STUCK_TICKS = SAMPLE_RATE / 10;
|
||||
while (m_running) {
|
||||
bool isBusy = m_core3_stuck_cnt > STUCK_TICKS * (CORE_NUMS - 1);
|
||||
if (isBusy) {
|
||||
m_core3_stuck_cnt = 0;
|
||||
SetBoostHz(&m_cpu_freq);
|
||||
m_cpu_freq.SetBoostHz();
|
||||
svcSleepThread(STUCK_TICKS * TICK_TIME_NS);
|
||||
} else {
|
||||
m_core3_stuck_cnt++;
|
||||
}
|
||||
|
||||
uint64_t load = CpuCoreUtil(coreid, TICK_TIME_NS).Get();
|
||||
q.PopAndPush(load);
|
||||
m_cpu_core_ctx[coreid].util = q.GetAvg();
|
||||
m_cpu_core_ctx[coreid].util = m_cpu_freq.GetNormalizedUtil(CpuCoreUtil(coreid, TICK_TIME_NS).Get());
|
||||
}
|
||||
}
|
||||
|
||||
void Governor::CheckCpuUtilWorkerSysCore() {
|
||||
s_Queue<uint64_t> q;
|
||||
int64_t coreid = CORE_NUMS - 1;
|
||||
while (!m_stop_threads) {
|
||||
uint64_t load = CpuCoreUtil(coreid, TICK_TIME_NS).Get();
|
||||
q.PopAndPush(load);
|
||||
m_cpu_core_ctx[coreid].util = q.GetAvg() * 7 / 8; // Adjusted, Multipler: 0.875
|
||||
while (m_running) {
|
||||
m_cpu_core_ctx[coreid].util = m_cpu_freq.GetNormalizedUtil(CpuCoreUtil(coreid, TICK_TIME_NS).Get());
|
||||
}
|
||||
}
|
||||
|
||||
void Governor::Main(void* args) {
|
||||
Governor* self = static_cast<Governor*>(args);
|
||||
s_FreqContext* cpu_ctx = &self->m_cpu_freq;
|
||||
s_FreqContext* gpu_ctx = &self->m_gpu_freq;
|
||||
uint32_t nvgpu_field = self->m_nvgpu_field;
|
||||
|
||||
auto GetCpuUtil = [self]() {
|
||||
uint64_t cpu_util = self->m_cpu_core_ctx[0].util;
|
||||
s_Util cpu_util, gpu_util;
|
||||
auto GetAdjCpuUtil = [self, cpu_util]() mutable {
|
||||
uint64_t util = self->m_cpu_core_ctx[0].util;
|
||||
for (size_t i = 1; i < CORE_NUMS; i++) {
|
||||
if (cpu_util < self->m_cpu_core_ctx[i].util)
|
||||
cpu_util = self->m_cpu_core_ctx[i].util;
|
||||
if (util < self->m_cpu_core_ctx[i].util)
|
||||
util = self->m_cpu_core_ctx[i].util;
|
||||
}
|
||||
return cpu_util;
|
||||
cpu_util.Update(util);
|
||||
return cpu_util.Get();
|
||||
};
|
||||
|
||||
struct s_MaxQueue {
|
||||
uint32_t queue[QUEUE_SIZE] = { 0 };
|
||||
size_t pos = 0;
|
||||
} q;
|
||||
|
||||
auto GetGpuUtil = [nvgpu_field, q]() mutable {
|
||||
uint32_t load = GpuCoreUtil(nvgpu_field, TICK_TIME_NS).Get();
|
||||
if (load > 20) { // Ignore load <= 2.0%
|
||||
q.queue[q.pos % QUEUE_SIZE] = load;
|
||||
q.pos++;
|
||||
} else {
|
||||
load = q.queue[(q.pos - 1) % QUEUE_SIZE];
|
||||
}
|
||||
// Get max of the queue
|
||||
for (size_t i = 1; i < QUEUE_SIZE; i++) {
|
||||
size_t p = (q.pos + i - 1) % QUEUE_SIZE;
|
||||
if (load < q.queue[p])
|
||||
load = q.queue[p];
|
||||
}
|
||||
|
||||
return load;
|
||||
auto GetAdjGpuUtil = [gpu_ctx, nvgpu_field, gpu_util]() mutable {
|
||||
uint32_t util = gpu_ctx->GetNormalizedUtil(GpuCoreUtil(nvgpu_field).Get());
|
||||
gpu_util.Update(util);
|
||||
return gpu_util.Get();
|
||||
};
|
||||
|
||||
constexpr uint64_t UPDATE_CONTEXT_RATE = SAMPLE_RATE / 2;
|
||||
@@ -306,7 +339,7 @@ void Governor::Main(void* args) {
|
||||
bool CPUBoosted = false;
|
||||
bool GPUThrottled = false;
|
||||
|
||||
while (!self->m_stop_threads) {
|
||||
while (self->m_running) {
|
||||
self->m_core3_stuck_cnt = 0;
|
||||
|
||||
bool shouldUpdateContext = update_ticks++ >= UPDATE_CONTEXT_RATE;
|
||||
@@ -323,14 +356,14 @@ void Governor::Main(void* args) {
|
||||
GPUThrottled = apmExtIsBoostMode(self->m_perf_conf_id);
|
||||
CPUBoosted = apmExtIsCPUBoosted(self->m_perf_conf_id);
|
||||
|
||||
self->m_gpu_freq.idx_target_hz = FindIndex(&self->m_gpu_freq, hz);
|
||||
gpu_ctx->target_hz = hz;
|
||||
if (GPUThrottled)
|
||||
SetBoostHz(&self->m_gpu_freq);
|
||||
gpu_ctx->SetBoostHz();
|
||||
|
||||
hz = Clocks::GetCurrentHz(SysClkModule_CPU);
|
||||
self->m_cpu_freq.idx_target_hz = FindIndex(&self->m_cpu_freq, hz);
|
||||
cpu_ctx->target_hz = hz;
|
||||
if (CPUBoosted)
|
||||
SetBoostHz(&self->m_cpu_freq);
|
||||
cpu_ctx->SetBoostHz();
|
||||
|
||||
hz = Clocks::GetCurrentHz(SysClkModule_MEM);
|
||||
if (!self->m_mem_freq)
|
||||
@@ -338,29 +371,10 @@ void Governor::Main(void* args) {
|
||||
if (hz != self->m_mem_freq)
|
||||
Clocks::SetHz(SysClkModule_MEM, self->m_mem_freq);
|
||||
} else {
|
||||
if (!GPUThrottled) {
|
||||
uint32_t gpu_util = GetGpuUtil();
|
||||
if (gpu_util > GPU_THR_RAMP_MAX) {
|
||||
if (TargetRamp(&self->m_gpu_freq, RAMP_MAX))
|
||||
SetHz(&self->m_gpu_freq);
|
||||
} else if (gpu_util > GPU_THR_RAMP_UP) {
|
||||
if (TargetRamp(&self->m_gpu_freq, RAMP_UP))
|
||||
SetHz(&self->m_gpu_freq);
|
||||
} else if (gpu_util < GPU_THR_RAMP_DOWN) {
|
||||
if (TargetRamp(&self->m_gpu_freq, RAMP_DOWN))
|
||||
SetHz(&self->m_gpu_freq);
|
||||
}
|
||||
}
|
||||
if (!CPUBoosted) {
|
||||
uint64_t cpu_util = GetCpuUtil();
|
||||
if (cpu_util > CPU_THR_RAMP_UP) {
|
||||
if (TargetRamp(&self->m_cpu_freq, RAMP_UP))
|
||||
SetHz(&self->m_cpu_freq);
|
||||
} else if (cpu_util < CPU_THR_RAMP_DOWN) {
|
||||
if (TargetRamp(&self->m_cpu_freq, RAMP_DOWN))
|
||||
SetHz(&self->m_cpu_freq);
|
||||
}
|
||||
}
|
||||
if (!GPUThrottled)
|
||||
gpu_ctx->SetNextFreq(GetAdjGpuUtil());
|
||||
if (!CPUBoosted)
|
||||
cpu_ctx->SetNextFreq(GetAdjCpuUtil());
|
||||
}
|
||||
|
||||
svcSleepThread(TICK_TIME_NS);
|
||||
|
||||
@@ -13,57 +13,31 @@
|
||||
|
||||
class CpuCoreUtil {
|
||||
public:
|
||||
CpuCoreUtil (int coreid = -2, uint64_t ns = 1000'000ULL):
|
||||
m_core_id(coreid), m_wait_time_ns(ns) {};
|
||||
|
||||
inline uint64_t Get() { Start(); WaitForStop(); Stop(); return Calculate(); };
|
||||
inline void Start() { m_idletick = GetIdleTickCount(); };
|
||||
inline void WaitForStop() { svcSleepThread(m_wait_time_ns); };
|
||||
inline void Stop() { m_idletick = GetIdleTickCount() - m_idletick; };
|
||||
|
||||
static constexpr uint64_t TICKS_PER_MS = 192;
|
||||
inline uint64_t Calculate() { return 100'0 - m_idletick * 10 * 1000'000ULL / (TICKS_PER_MS * m_wait_time_ns); };
|
||||
CpuCoreUtil (int coreid, uint64_t ns);
|
||||
uint32_t Get();
|
||||
|
||||
protected:
|
||||
const int m_core_id;
|
||||
const uint64_t m_wait_time_ns;
|
||||
uint64_t m_idletick;
|
||||
static constexpr uint64_t TICKS_PER_MS = 192;
|
||||
static constexpr uint32_t UTIL_MAX = 100'0;
|
||||
|
||||
inline uint64_t GetIdleTickCount() {
|
||||
uint64_t idletick = 0;
|
||||
svcGetInfo(&idletick, InfoType_IdleTickCount, INVALID_HANDLE, m_core_id);
|
||||
return idletick;
|
||||
};
|
||||
uint64_t GetIdleTickCount();
|
||||
};
|
||||
|
||||
class GpuCoreUtil {
|
||||
public:
|
||||
GpuCoreUtil (uint32_t nvgpu_field, uint64_t ns = 1000'000ULL):
|
||||
m_nvgpu_field(nvgpu_field), m_wait_time_ns(ns) {};
|
||||
|
||||
inline uint64_t Get() { Wait(); return GetLoad(); };
|
||||
inline void Wait() { svcSleepThread(m_wait_time_ns); };
|
||||
inline uint32_t GetLoad() {
|
||||
uint32_t load;
|
||||
nvIoctl(m_nvgpu_field, NVGPU_GPU_IOCTL_PMU_GET_GPU_LOAD, &load);
|
||||
// if (R_FAILED(rc)) {
|
||||
// ERROR_THROW("[mgr] nvIoctl() failed: 0x%lX", rc);
|
||||
// }
|
||||
return load;
|
||||
};
|
||||
GpuCoreUtil (uint32_t nvgpu_field);
|
||||
uint32_t Get();
|
||||
|
||||
protected:
|
||||
uint32_t m_nvgpu_field;
|
||||
const uint64_t m_wait_time_ns;
|
||||
static constexpr uint64_t NVGPU_GPU_IOCTL_PMU_GET_GPU_LOAD = 0x80044715;
|
||||
};
|
||||
|
||||
class ReverseNXSync {
|
||||
public:
|
||||
ReverseNXSync ()
|
||||
: m_rt_mode(ReverseNX_NotFound), m_tool_mode(ReverseNX_NotFound) {
|
||||
CheckToolEnabled();
|
||||
};
|
||||
ReverseNXSync ();
|
||||
|
||||
void ToggleSync(bool enable) { m_sync_enabled = enable; };
|
||||
void Reset(uint64_t app_id) { m_app_id = app_id; SetRTMode(ReverseNX_NotFound); GetToolMode(); }
|
||||
@@ -79,7 +53,6 @@ protected:
|
||||
bool m_tool_enabled;
|
||||
bool m_sync_enabled;
|
||||
|
||||
bool CheckToolEnabled();
|
||||
ReverseNXMode GetToolModeFromPatch(const char* patch_path);
|
||||
ReverseNXMode RecheckToolMode();
|
||||
};
|
||||
@@ -90,57 +63,23 @@ namespace PsmExt {
|
||||
|
||||
class Governor {
|
||||
public:
|
||||
Governor() {
|
||||
memset(reinterpret_cast<void*>(&m_cpu_freq), 0, sizeof(m_cpu_freq));
|
||||
memset(reinterpret_cast<void*>(&m_gpu_freq), 0, sizeof(m_gpu_freq));
|
||||
|
||||
m_cpu_freq.module = SysClkModule_CPU;
|
||||
m_gpu_freq.module = SysClkModule_GPU;
|
||||
|
||||
m_cpu_freq.hz_list = &sysclk_g_freq_table_cpu_hz[0];
|
||||
m_gpu_freq.hz_list = &sysclk_g_freq_table_gpu_hz[0];
|
||||
|
||||
m_cpu_freq.idx_boost_hz = FindIndex(&m_cpu_freq, 1785'000'000);
|
||||
|
||||
m_gpu_freq.idx_boost_hz = FindIndex(&m_gpu_freq, 76'800'000);
|
||||
m_gpu_freq.idx_min_hz = FindIndex(&m_gpu_freq, 153'600'000);
|
||||
|
||||
nvInitialize();
|
||||
Result rc = nvOpen(&m_nvgpu_field, "/dev/nvhost-ctrl-gpu");
|
||||
if (R_FAILED(rc)) {
|
||||
ERROR_THROW("[mgr] nvOpen() failed: 0x%lX", rc);
|
||||
nvExit();
|
||||
}
|
||||
};
|
||||
|
||||
~Governor() {
|
||||
Stop();
|
||||
nvClose(m_nvgpu_field);
|
||||
nvExit();
|
||||
};
|
||||
Governor();
|
||||
~Governor();
|
||||
|
||||
void Start();
|
||||
void Stop();
|
||||
void SetMaxHz(uint32_t max_hz, SysClkModule module);
|
||||
void SetCPUBoostHz(uint32_t hz) { m_cpu_freq.idx_boost_hz = FindIndex(&m_cpu_freq, hz); };
|
||||
void SetCPUBoostHz(uint32_t hz) { m_cpu_freq.boost_hz = hz; };
|
||||
void SetPerfConf(uint32_t id);
|
||||
|
||||
protected:
|
||||
// Parameters for sampling
|
||||
static constexpr uint64_t SAMPLE_RATE = 200;
|
||||
static constexpr uint64_t TICK_TIME_MS = 1000 / SAMPLE_RATE;
|
||||
static constexpr uint64_t TICK_TIME_NS = 1000'000'000 / SAMPLE_RATE;
|
||||
|
||||
// Parameters for frequency ramp threshold
|
||||
static constexpr uint64_t CPU_THR_RAMP_DOWN = 70'0;
|
||||
static constexpr uint64_t CPU_THR_RAMP_UP = 90'0;
|
||||
static constexpr uint64_t GPU_THR_RAMP_DOWN = 70'0;
|
||||
static constexpr uint64_t GPU_THR_RAMP_UP = 80'0;
|
||||
static constexpr uint64_t GPU_THR_RAMP_MAX = 90'0;
|
||||
|
||||
static constexpr int CORE_NUMS = 4;
|
||||
|
||||
bool m_stop_threads = false;
|
||||
bool m_running = false;
|
||||
Thread m_t_cpuworker[CORE_NUMS], m_t_main;
|
||||
std::atomic<uint64_t> m_core3_stuck_cnt = 0;
|
||||
|
||||
@@ -149,62 +88,52 @@ protected:
|
||||
uint32_t m_perf_conf_id;
|
||||
SysClkApmConfiguration *m_apm_conf;
|
||||
|
||||
typedef enum {
|
||||
RAMP_UP,
|
||||
RAMP_DOWN,
|
||||
RAMP_MAX,
|
||||
RAMP_MIN,
|
||||
RAMP_BOOST,
|
||||
} FREQ_RAMP_DIRECTION;
|
||||
|
||||
typedef struct {
|
||||
SysClkModule module;
|
||||
uint32_t* hz_list;
|
||||
uint8_t idx_target_hz;
|
||||
uint8_t idx_min_hz;
|
||||
uint8_t idx_max_hz;
|
||||
uint8_t idx_boost_hz;
|
||||
} s_Freq;
|
||||
s_Freq m_cpu_freq, m_gpu_freq;
|
||||
uint32_t target_hz;
|
||||
uint32_t min_hz;
|
||||
uint32_t max_hz;
|
||||
uint32_t boost_hz;
|
||||
uint32_t utilref_hz;
|
||||
|
||||
static uint32_t FindIndex(s_Freq* f, uint32_t hz);
|
||||
static bool TargetRamp(s_Freq* f, FREQ_RAMP_DIRECTION dir);
|
||||
static void SetHz(s_Freq* f);
|
||||
static void SetBoostHz(s_Freq* f);
|
||||
uint32_t GetNormalizedUtil(uint32_t raw_util);
|
||||
void SetNextFreq(uint32_t norm_util);
|
||||
void SetHz();
|
||||
void SetBoostHz();
|
||||
} s_FreqContext;
|
||||
s_FreqContext m_cpu_freq, m_gpu_freq;
|
||||
|
||||
typedef struct {
|
||||
Governor* self;
|
||||
int64_t id;
|
||||
uint64_t util;
|
||||
int id;
|
||||
uint32_t util;
|
||||
} s_CoreContext;
|
||||
s_CoreContext m_cpu_core_ctx[CORE_NUMS];
|
||||
|
||||
s_CoreContext* InitCoreContext(s_CoreContext* context, Governor* self, int64_t id = 0);
|
||||
|
||||
static void CheckCpuUtilWorker(void* args);
|
||||
static void Main(void* args);
|
||||
// PELT: https://github.com/torvalds/linux/blob/master/kernel/sched/pelt.c
|
||||
// Util_acc_n = Util_0 + Util_1 * D + Util_2 * D^2 + ... + Util_n * D^n
|
||||
// To approximate D (decay multiplier):
|
||||
// After 100 ms (if SAMPLE_RATE == 200, 20 samples)
|
||||
// (UTIL_MAX * D)^20 ≈ 1 (UTIL_MAX decayed to 1)
|
||||
// D = 0.7079457843841379... ≈ 725 / 1024
|
||||
// Util_acc_20 ≈ 3421, Util_acc_40 ≈ 3424, Util_acc_inf ≈ 3424
|
||||
static constexpr uint32_t UTIL_MAX = 100'0;
|
||||
struct s_Util {
|
||||
uint32_t util_acc = 0;
|
||||
|
||||
private:
|
||||
static constexpr size_t QUEUE_SIZE = 8;
|
||||
template <typename T>
|
||||
struct s_Queue {
|
||||
// Much faster than <queue> from stl
|
||||
T queue[QUEUE_SIZE] = { 0 };
|
||||
T sum = 0;
|
||||
size_t pos = 0;
|
||||
static constexpr uint32_t DECAY_DIVIDENT = 725;
|
||||
static constexpr uint32_t DECAY_DIVISOR = 1024;
|
||||
static constexpr uint32_t UTIL_ACC_MAX = 3424;
|
||||
|
||||
T GetAvg() { return sum / QUEUE_SIZE; };
|
||||
T GetFirst() { return queue[pos % QUEUE_SIZE]; };
|
||||
T GetLast() { return queue[(pos - 1) % QUEUE_SIZE]; };
|
||||
T PopAndPush(T val_to_push) {
|
||||
T val_to_pop;
|
||||
sum -= (val_to_pop = GetFirst()); // Pop and subtract from sum
|
||||
sum += (queue[pos % QUEUE_SIZE] = val_to_push); // Push and add to sum
|
||||
pos++;
|
||||
return val_to_pop;
|
||||
}
|
||||
uint32_t Get() { return (util_acc * UTIL_MAX / UTIL_ACC_MAX); };
|
||||
void Update(uint32_t util) { util_acc = util_acc * DECAY_DIVIDENT / DECAY_DIVISOR + util; };
|
||||
};
|
||||
|
||||
void CheckCpuUtilWorkerSysCore();
|
||||
void CheckCpuUtilWorkerAppCore(int64_t coreid);
|
||||
static void CheckCpuUtilWorker(void* args);
|
||||
void CheckCpuUtilWorkerAppCore(int64_t coreid);
|
||||
void CheckCpuUtilWorkerSysCore();
|
||||
static void Main(void* args);
|
||||
};
|
||||
|
||||
Reference in New Issue
Block a user