Fix maxMemFreq detection; Add some other alternative governor logic
This commit is contained in:
@@ -149,10 +149,10 @@ void ClockManager::Tick()
|
||||
for (unsigned int module = 0; module < SysClkModule_EnumMax; module++)
|
||||
{
|
||||
uint32_t hz = GetHz((SysClkModule)module);
|
||||
|
||||
this->governor->SetMaxHz(hz, (SysClkModule)module);
|
||||
|
||||
if (hz && hz != this->context->freqs[module] && !this->oc->governor)
|
||||
bool handledByGovernor = this->oc->governor && (module != SysClkModule_MEM);
|
||||
if (hz && hz != this->context->freqs[module] && !handledByGovernor)
|
||||
{
|
||||
// Skip setting CPU or GPU clocks in CpuBoostMode if CPU <= boostCPUFreq or GPU >= 76.8MHz
|
||||
bool skipBoost = apmExtIsBoostMode(this->context->perfConfId);
|
||||
@@ -219,6 +219,7 @@ bool ClockManager::RefreshContext()
|
||||
if (Clocks::GetIsMariko()) {
|
||||
bool allowUnsafe = this->GetConfig()->GetConfigValue(SysClkConfigValue_AllowUnsafeFrequencies);
|
||||
Clocks::SetAllowUnsafe(allowUnsafe);
|
||||
this->governor->SetCPUBoostHz(Clocks::GetNearestHz(SysClkModule_CPU, SysClkProfile_EnumMax, Clocks::boostCpuFreq));
|
||||
this->governor->SetAutoCPUBoost(this->GetConfig()->GetConfigValue(SysClkConfigValue_AutoCPUBoost));
|
||||
}
|
||||
}
|
||||
@@ -301,7 +302,8 @@ bool ClockManager::RefreshContext()
|
||||
if (hz != 0 && hz != this->context->freqs[module])
|
||||
{
|
||||
this->context->freqs[module] = hz;
|
||||
if (!this->oc->governor) {
|
||||
bool handledByGovernor = this->oc->governor && (module != SysClkModule_MEM);
|
||||
if (!handledByGovernor) {
|
||||
FileUtils::LogLine("[mgr] %s clock change: %u.%u MHz", Clocks::GetModuleName((SysClkModule)module, true), hz/1000000, hz/100000 - hz/1000000*10);
|
||||
hasChanged = true;
|
||||
}
|
||||
|
||||
@@ -372,7 +372,8 @@ std::uint32_t Clocks::GetCurrentHz(SysClkModule module)
|
||||
|
||||
std::uint32_t Clocks::GetNearestHz(SysClkModule module, SysClkProfile profile, std::uint32_t inHz)
|
||||
{
|
||||
if (module == SysClkModule_MEM && inHz == MAX_MEM_CLOCK)
|
||||
uint32_t inMHz = inHz / 1000000U;
|
||||
if (module == SysClkModule_MEM && inMHz == MAX_MEM_CLOCK / 1000'000)
|
||||
return Clocks::maxMemFreq;
|
||||
|
||||
uint32_t* min = NULL;
|
||||
@@ -382,9 +383,7 @@ std::uint32_t Clocks::GetNearestHz(SysClkModule module, SysClkProfile profile, s
|
||||
if (!min || !max)
|
||||
ERROR_THROW("table lookup failed for SysClkModule: %u", module);
|
||||
|
||||
uint32_t inMHz = inHz / 1000000U;
|
||||
uint32_t* p = min;
|
||||
|
||||
while(p <= max) {
|
||||
if (inMHz == *p / 1000000U)
|
||||
return *p;
|
||||
|
||||
@@ -5,21 +5,21 @@ CpuCoreUtil::CpuCoreUtil(int coreid = -2, uint64_t ns = 1000'000ULL)
|
||||
|
||||
uint32_t CpuCoreUtil::Get() {
|
||||
struct _ctx {
|
||||
uint64_t timestamp;
|
||||
uint64_t systick;
|
||||
uint64_t idletick;
|
||||
} begin, end;
|
||||
|
||||
begin.timestamp = armTicksToNs(armGetSystemTick());
|
||||
begin.systick = armGetSystemTick();
|
||||
begin.idletick = GetIdleTickCount();
|
||||
|
||||
svcSleepThread(m_wait_time_ns);
|
||||
|
||||
end.timestamp = armTicksToNs(armGetSystemTick());
|
||||
end.systick = armGetSystemTick();
|
||||
end.idletick = GetIdleTickCount();
|
||||
|
||||
uint64_t diff_idletick = end.idletick - begin.idletick;
|
||||
uint64_t real_elapsed_ns = end.timestamp - begin.timestamp;
|
||||
return UTIL_MAX - diff_idletick * 10 * 1000'000ULL / (TICKS_PER_MS * real_elapsed_ns);
|
||||
uint64_t diff_systick = end.systick - begin.systick;
|
||||
return UTIL_MAX - diff_idletick * 10 * 100ULL / diff_systick;
|
||||
}
|
||||
|
||||
uint64_t CpuCoreUtil::GetIdleTickCount() {
|
||||
@@ -209,10 +209,6 @@ void Governor::SetMaxHz(uint32_t max_hz, SysClkModule module) {
|
||||
m_gpu_freq.max_hz = max_hz;
|
||||
m_gpu_freq.min_hz = (m_gpu_freq.max_hz <= 153'600'000) ? max_hz : 153'600'000;
|
||||
break;
|
||||
case SysClkModule_MEM:
|
||||
m_mem_freq = max_hz;
|
||||
Clocks::SetHz(SysClkModule_MEM, max_hz);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
@@ -234,24 +230,103 @@ uint32_t Governor::s_FreqContext::GetNormalizedUtil(uint32_t raw_util) {
|
||||
// next_freq = C * max_freq(ref_freq) * util / max
|
||||
void Governor::s_FreqContext::SetNextFreq(uint32_t norm_util) {
|
||||
uint32_t prev_hz = target_hz;
|
||||
uint32_t next_freq = (uint64_t)(norm_util + (norm_util >> 1)) * utilref_hz / UTIL_MAX;
|
||||
|
||||
uint32_t adj_next_freq = target_hz;
|
||||
if (next_freq > max_hz) {
|
||||
adj_next_freq = max_hz;
|
||||
} else if (next_freq < min_hz) {
|
||||
adj_next_freq = min_hz;
|
||||
} else {
|
||||
// === Add a non-linear coefficient to tipping-point ===
|
||||
// float nonlinear_coeff = (float)max_hz / target_hz; // Always non-negative
|
||||
// #ifdef __aarch64__
|
||||
// asm ("FSQRT %s0, %s0"
|
||||
// : "=w" (nonlinear_coeff)
|
||||
// : "w" (nonlinear_coeff));
|
||||
// asm ("FSQRT %s0, %s0"
|
||||
// : "=w" (nonlinear_coeff)
|
||||
// : "w" (nonlinear_coeff));
|
||||
// #else
|
||||
// nonlinear_coeff = sqrt(sqrt(nonlinear_coeff));
|
||||
// #endif
|
||||
|
||||
// === Tipping-point look-up table for all frequencies ===
|
||||
// typedef struct {
|
||||
// uint16_t numerator;
|
||||
// uint16_t denom_shift;
|
||||
// } lut_entry;
|
||||
|
||||
// static constexpr auto apply_cpu_nonlinear_coeff = [](uint32_t input) {
|
||||
// lut_entry lut[] = {
|
||||
// { 4645, 12 }, // 1963500000
|
||||
// {},
|
||||
// { 4505, 12 }, // 2091000000
|
||||
// { 8971, 13 }, // 2193000000
|
||||
// { 1117, 10 }, // 2295000000
|
||||
// { 1117, 10 }, // 2397000000
|
||||
// {},
|
||||
// { 5575, 12 }, // 612000000
|
||||
// { 10699, 13 }, // 714000000
|
||||
// {},
|
||||
// { 81, 6 }, // 816000000
|
||||
// { 10113, 13 }, // 918000000
|
||||
// { 1239, 10 }, // 1020000000
|
||||
// {},
|
||||
// { 9749, 13 }, // 1122000000
|
||||
// { 4807, 12 }, // 1224000000
|
||||
// { 2375, 11 }, // 1326000000
|
||||
// { 10041, 13 }, // 1428000000
|
||||
// {},
|
||||
// { 9283, 13 }, // 1581000000
|
||||
// {},
|
||||
// { 9215, 13 }, // 1683000000
|
||||
// { 18309, 14 }, // 1785000000
|
||||
// };
|
||||
// size_t idx = (input >> 20) % 24;
|
||||
// lut_entry entry = lut[idx];
|
||||
// return (input >> entry.denom_shift) * entry.numerator;
|
||||
// };
|
||||
|
||||
// static constexpr auto apply_gpu_nonlinear_coeff = [](uint32_t input) {
|
||||
// lut_entry lut[] = {
|
||||
// { 1087, 10 }, // 1305600000
|
||||
// { 2351, 11 }, // 1075200000
|
||||
// { 9749, 13 }, // 844800000
|
||||
// { 81, 6 }, // 614400000
|
||||
// { 2949, 11 }, // 384000000
|
||||
// { 9, 2 }, // 153600000
|
||||
// {},
|
||||
// { 1089, 10 }, // 1228800000
|
||||
// { 2375, 11 }, // 998400000
|
||||
// { 1239, 10 }, // 768000000
|
||||
// { 10699, 13 }, // 537600000
|
||||
// { 25, 4 }, // 307200000
|
||||
// { 4, 0 }, // 76800000
|
||||
// { 1087, 10 }, // 1267200000
|
||||
// { 1165, 10 }, // 1152000000
|
||||
// { 4807, 12 }, // 921600000
|
||||
// { 10113, 13 }, // 691200000
|
||||
// { 5575, 12 }, // 460800000
|
||||
// };
|
||||
// size_t idx = (input >> 18) % 20;
|
||||
// lut_entry entry = lut[idx];
|
||||
// return (input >> entry.denom_shift) * entry.numerator;
|
||||
// };
|
||||
|
||||
auto FindHzInTable = [](uint32_t* hz_list, uint32_t in_hz) {
|
||||
uint32_t* p = hz_list;
|
||||
do {
|
||||
if (*p >= next_freq) {
|
||||
adj_next_freq = *p;
|
||||
break;
|
||||
}
|
||||
} while (*p++);
|
||||
}
|
||||
while (*p) {
|
||||
if (in_hz <= *p)
|
||||
return p;
|
||||
p++;
|
||||
}
|
||||
return (--p);
|
||||
};
|
||||
|
||||
uint32_t next_freq = utilref_hz / UTIL_MAX * norm_util;
|
||||
next_freq += next_freq >> 1;
|
||||
|
||||
if (next_freq >= max_hz)
|
||||
target_hz = max_hz;
|
||||
else if (next_freq <= min_hz)
|
||||
target_hz = min_hz;
|
||||
else
|
||||
target_hz = *FindHzInTable(hz_list, next_freq);
|
||||
|
||||
target_hz = adj_next_freq;
|
||||
bool changed = target_hz != prev_hz;
|
||||
if (changed)
|
||||
SetHz();
|
||||
@@ -262,7 +337,7 @@ void Governor::s_FreqContext::SetHz() {
|
||||
Clocks::SetHz(module, target_hz);
|
||||
}
|
||||
|
||||
void Governor::s_FreqContext::SetBoostHz() {
|
||||
void Governor::s_FreqContext::Boost() {
|
||||
target_hz = boost_hz;
|
||||
if (module == SysClkModule_CPU && max_hz > boost_hz)
|
||||
target_hz = max_hz;
|
||||
@@ -275,9 +350,7 @@ void Governor::CpuUtilWorker(void* args) {
|
||||
Governor* self = s->self;
|
||||
|
||||
while (self->m_running) {
|
||||
uint64_t timestamp = armTicksToNs(armGetSystemTick());
|
||||
s->timestamp = timestamp;
|
||||
|
||||
uint64_t tick = s->tick = armGetSystemTick();
|
||||
s->util = self->m_cpu_freq.GetNormalizedUtil(CpuCoreUtil(coreid, TICK_TIME_NS).Get());
|
||||
|
||||
bool CPUBoosted = apmExtIsCPUBoosted(self->m_perf_conf_id);
|
||||
@@ -287,11 +360,15 @@ void Governor::CpuUtilWorker(void* args) {
|
||||
}
|
||||
|
||||
for (int id = 0; id < CORE_NUMS; id++) {
|
||||
if (abs(self->m_cpu_core_ctx[id].timestamp - timestamp) < TICK_TIME_NS * 10)
|
||||
if (id == coreid)
|
||||
continue;
|
||||
|
||||
uint64_t diff = std::abs((int64_t)self->m_cpu_core_ctx[id].tick - (int64_t)tick);
|
||||
if (diff < SYSTICK_HZ / SAMPLE_RATE * 10)
|
||||
continue;
|
||||
|
||||
if (id == SYS_CORE_ID && self->m_syscore_autoboost) {
|
||||
self->m_cpu_freq.SetBoostHz();
|
||||
self->m_cpu_freq.Boost();
|
||||
break;
|
||||
}
|
||||
|
||||
@@ -316,8 +393,8 @@ void Governor::Main(void* args) {
|
||||
util = self->m_cpu_core_ctx[i].util;
|
||||
}
|
||||
cpu_util.Update(util);
|
||||
if (self->m_cpu_core_ctx[SYS_CORE_ID].util > 95'0 && self->m_syscore_autoboost)
|
||||
cpu_ctx->SetBoostHz();
|
||||
if (self->m_cpu_core_ctx[SYS_CORE_ID].util > BOOST_THRESHOLD && self->m_syscore_autoboost)
|
||||
cpu_ctx->Boost();
|
||||
else
|
||||
cpu_ctx->SetNextFreq(cpu_util.Get());
|
||||
};
|
||||
@@ -350,18 +427,12 @@ void Governor::Main(void* args) {
|
||||
|
||||
gpu_ctx->target_hz = hz;
|
||||
if (GPUThrottled)
|
||||
gpu_ctx->SetBoostHz();
|
||||
gpu_ctx->Boost();
|
||||
|
||||
hz = Clocks::GetCurrentHz(SysClkModule_CPU);
|
||||
cpu_ctx->target_hz = hz;
|
||||
if (CPUBoosted)
|
||||
cpu_ctx->SetBoostHz();
|
||||
|
||||
hz = Clocks::GetCurrentHz(SysClkModule_MEM);
|
||||
if (!self->m_mem_freq)
|
||||
self->m_mem_freq = hz;
|
||||
if (hz != self->m_mem_freq)
|
||||
Clocks::SetHz(SysClkModule_MEM, self->m_mem_freq);
|
||||
cpu_ctx->Boost();
|
||||
}
|
||||
|
||||
if (!GPUThrottled)
|
||||
|
||||
@@ -3,7 +3,6 @@
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
#include <stack>
|
||||
#include <nxExt.h>
|
||||
#include <sysclk.h>
|
||||
#include <switch.h>
|
||||
@@ -19,7 +18,7 @@ public:
|
||||
protected:
|
||||
const int m_core_id;
|
||||
const uint64_t m_wait_time_ns;
|
||||
static constexpr uint64_t TICKS_PER_MS = 192;
|
||||
static constexpr uint64_t IDLETICKS_PER_MS = 192;
|
||||
static constexpr uint32_t UTIL_MAX = 100'0;
|
||||
|
||||
uint64_t GetIdleTickCount();
|
||||
@@ -71,12 +70,14 @@ public:
|
||||
void Stop();
|
||||
void SetMaxHz(uint32_t max_hz, SysClkModule module);
|
||||
void SetAutoCPUBoost(bool enabled) { m_syscore_autoboost = enabled; };
|
||||
void SetCPUBoostHz(uint32_t boost_hz) { m_cpu_freq.boost_hz = boost_hz; };
|
||||
void SetPerfConf(uint32_t id);
|
||||
|
||||
protected:
|
||||
// Parameters for sampling
|
||||
static constexpr uint64_t SAMPLE_RATE = 200;
|
||||
static constexpr uint64_t TICK_TIME_NS = 1000'000'000 / SAMPLE_RATE;
|
||||
static constexpr uint64_t SYSTICK_HZ = 19200000;
|
||||
|
||||
static constexpr int CORE_NUMS = 4;
|
||||
static constexpr int SYS_CORE_ID = (CORE_NUMS - 1);
|
||||
@@ -86,7 +87,6 @@ protected:
|
||||
Thread m_t_cpuworker[CORE_NUMS], m_t_main;
|
||||
|
||||
uint32_t m_nvgpu_field;
|
||||
uint32_t m_mem_freq;
|
||||
uint32_t m_perf_conf_id;
|
||||
SysClkApmConfiguration *m_apm_conf;
|
||||
|
||||
@@ -102,7 +102,7 @@ protected:
|
||||
uint32_t GetNormalizedUtil(uint32_t raw_util);
|
||||
void SetNextFreq(uint32_t norm_util);
|
||||
void SetHz();
|
||||
void SetBoostHz();
|
||||
void Boost();
|
||||
} s_FreqContext;
|
||||
s_FreqContext m_cpu_freq, m_gpu_freq;
|
||||
|
||||
@@ -110,7 +110,7 @@ protected:
|
||||
Governor* self;
|
||||
int id;
|
||||
uint32_t util;
|
||||
uint64_t timestamp;
|
||||
uint64_t tick;
|
||||
} s_CoreContext;
|
||||
s_CoreContext m_cpu_core_ctx[CORE_NUMS];
|
||||
|
||||
@@ -135,4 +135,66 @@ protected:
|
||||
|
||||
static void CpuUtilWorker(void* args);
|
||||
static void Main(void* args);
|
||||
|
||||
// Get max from a sliding window in O(1)
|
||||
static constexpr size_t WINDOW_SIZE = SAMPLE_RATE / 10;
|
||||
template <typename T>
|
||||
class SWindowMax {
|
||||
protected:
|
||||
typedef struct {
|
||||
T item;
|
||||
T max;
|
||||
} s_Entry;
|
||||
|
||||
struct s_Stack {
|
||||
s_Entry m_stack[WINDOW_SIZE] = {};
|
||||
size_t m_next = 0;
|
||||
|
||||
bool empty() { return m_next == 0; };
|
||||
s_Entry top() { return m_stack[m_next-1]; };
|
||||
s_Entry pop() { return m_stack[--m_next]; };
|
||||
void push(s_Entry item) {
|
||||
if (m_next == WINDOW_SIZE)
|
||||
return;
|
||||
m_stack[m_next++] = item;
|
||||
};
|
||||
};
|
||||
|
||||
s_Stack enqStack;
|
||||
s_Stack deqStack;
|
||||
|
||||
void Push(s_Stack& stack, T item) {
|
||||
s_Entry n;
|
||||
n.item = item;
|
||||
n.max = enqStack.empty() ? item : std::max(item, enqStack.top().max);
|
||||
stack.push(n);
|
||||
}
|
||||
|
||||
void Pop() {
|
||||
if (deqStack.empty()) {
|
||||
while (!enqStack.empty())
|
||||
Push(deqStack, enqStack.pop().max);
|
||||
}
|
||||
deqStack.pop();
|
||||
}
|
||||
|
||||
public:
|
||||
SWindowMax() { deqStack.m_next = WINDOW_SIZE; }
|
||||
|
||||
void Add(T item) { Pop(); Push(enqStack, item); }
|
||||
|
||||
T Max() {
|
||||
if (!enqStack.empty()) {
|
||||
T enqMax = enqStack.top().max;
|
||||
if (!deqStack.empty()) {
|
||||
T deqMax = deqStack.top().max;
|
||||
return std::max(deqMax, enqMax);
|
||||
}
|
||||
return enqMax;
|
||||
}
|
||||
if (!deqStack.empty())
|
||||
return deqStack.top().max;
|
||||
return 0;
|
||||
}
|
||||
};
|
||||
};
|
||||
|
||||
Reference in New Issue
Block a user