diff --git a/Source/sys-clk-OC/sysmodule/src/clock_manager.cpp b/Source/sys-clk-OC/sysmodule/src/clock_manager.cpp index b2d41c8c..dfa8457c 100644 --- a/Source/sys-clk-OC/sysmodule/src/clock_manager.cpp +++ b/Source/sys-clk-OC/sysmodule/src/clock_manager.cpp @@ -149,10 +149,10 @@ void ClockManager::Tick() for (unsigned int module = 0; module < SysClkModule_EnumMax; module++) { uint32_t hz = GetHz((SysClkModule)module); - this->governor->SetMaxHz(hz, (SysClkModule)module); - if (hz && hz != this->context->freqs[module] && !this->oc->governor) + bool handledByGovernor = this->oc->governor && (module != SysClkModule_MEM); + if (hz && hz != this->context->freqs[module] && !handledByGovernor) { // Skip setting CPU or GPU clocks in CpuBoostMode if CPU <= boostCPUFreq or GPU >= 76.8MHz bool skipBoost = apmExtIsBoostMode(this->context->perfConfId); @@ -219,6 +219,7 @@ bool ClockManager::RefreshContext() if (Clocks::GetIsMariko()) { bool allowUnsafe = this->GetConfig()->GetConfigValue(SysClkConfigValue_AllowUnsafeFrequencies); Clocks::SetAllowUnsafe(allowUnsafe); + this->governor->SetCPUBoostHz(Clocks::GetNearestHz(SysClkModule_CPU, SysClkProfile_EnumMax, Clocks::boostCpuFreq)); this->governor->SetAutoCPUBoost(this->GetConfig()->GetConfigValue(SysClkConfigValue_AutoCPUBoost)); } } @@ -301,7 +302,8 @@ bool ClockManager::RefreshContext() if (hz != 0 && hz != this->context->freqs[module]) { this->context->freqs[module] = hz; - if (!this->oc->governor) { + bool handledByGovernor = this->oc->governor && (module != SysClkModule_MEM); + if (!handledByGovernor) { FileUtils::LogLine("[mgr] %s clock change: %u.%u MHz", Clocks::GetModuleName((SysClkModule)module, true), hz/1000000, hz/100000 - hz/1000000*10); hasChanged = true; } diff --git a/Source/sys-clk-OC/sysmodule/src/clocks.cpp b/Source/sys-clk-OC/sysmodule/src/clocks.cpp index 0d5b109d..bc3e92cd 100644 --- a/Source/sys-clk-OC/sysmodule/src/clocks.cpp +++ b/Source/sys-clk-OC/sysmodule/src/clocks.cpp @@ -372,7 +372,8 @@ std::uint32_t Clocks::GetCurrentHz(SysClkModule module) std::uint32_t Clocks::GetNearestHz(SysClkModule module, SysClkProfile profile, std::uint32_t inHz) { - if (module == SysClkModule_MEM && inHz == MAX_MEM_CLOCK) + uint32_t inMHz = inHz / 1000000U; + if (module == SysClkModule_MEM && inMHz == MAX_MEM_CLOCK / 1000'000) return Clocks::maxMemFreq; uint32_t* min = NULL; @@ -382,9 +383,7 @@ std::uint32_t Clocks::GetNearestHz(SysClkModule module, SysClkProfile profile, s if (!min || !max) ERROR_THROW("table lookup failed for SysClkModule: %u", module); - uint32_t inMHz = inHz / 1000000U; uint32_t* p = min; - while(p <= max) { if (inMHz == *p / 1000000U) return *p; diff --git a/Source/sys-clk-OC/sysmodule/src/oc_extra.cpp b/Source/sys-clk-OC/sysmodule/src/oc_extra.cpp index 6bb801d1..94f8c4a0 100644 --- a/Source/sys-clk-OC/sysmodule/src/oc_extra.cpp +++ b/Source/sys-clk-OC/sysmodule/src/oc_extra.cpp @@ -5,21 +5,21 @@ CpuCoreUtil::CpuCoreUtil(int coreid = -2, uint64_t ns = 1000'000ULL) uint32_t CpuCoreUtil::Get() { struct _ctx { - uint64_t timestamp; + uint64_t systick; uint64_t idletick; } begin, end; - begin.timestamp = armTicksToNs(armGetSystemTick()); + begin.systick = armGetSystemTick(); begin.idletick = GetIdleTickCount(); svcSleepThread(m_wait_time_ns); - end.timestamp = armTicksToNs(armGetSystemTick()); + end.systick = armGetSystemTick(); end.idletick = GetIdleTickCount(); uint64_t diff_idletick = end.idletick - begin.idletick; - uint64_t real_elapsed_ns = end.timestamp - begin.timestamp; - return UTIL_MAX - diff_idletick * 10 * 1000'000ULL / (TICKS_PER_MS * real_elapsed_ns); + uint64_t diff_systick = end.systick - begin.systick; + return UTIL_MAX - diff_idletick * 10 * 100ULL / diff_systick; } uint64_t CpuCoreUtil::GetIdleTickCount() { @@ -209,10 +209,6 @@ void Governor::SetMaxHz(uint32_t max_hz, SysClkModule module) { m_gpu_freq.max_hz = max_hz; m_gpu_freq.min_hz = (m_gpu_freq.max_hz <= 153'600'000) ? max_hz : 153'600'000; break; - case SysClkModule_MEM: - m_mem_freq = max_hz; - Clocks::SetHz(SysClkModule_MEM, max_hz); - break; default: break; } @@ -234,24 +230,103 @@ uint32_t Governor::s_FreqContext::GetNormalizedUtil(uint32_t raw_util) { // next_freq = C * max_freq(ref_freq) * util / max void Governor::s_FreqContext::SetNextFreq(uint32_t norm_util) { uint32_t prev_hz = target_hz; - uint32_t next_freq = (uint64_t)(norm_util + (norm_util >> 1)) * utilref_hz / UTIL_MAX; - uint32_t adj_next_freq = target_hz; - if (next_freq > max_hz) { - adj_next_freq = max_hz; - } else if (next_freq < min_hz) { - adj_next_freq = min_hz; - } else { + // === Add a non-linear coefficient to tipping-point === + // float nonlinear_coeff = (float)max_hz / target_hz; // Always non-negative + // #ifdef __aarch64__ + // asm ("FSQRT %s0, %s0" + // : "=w" (nonlinear_coeff) + // : "w" (nonlinear_coeff)); + // asm ("FSQRT %s0, %s0" + // : "=w" (nonlinear_coeff) + // : "w" (nonlinear_coeff)); + // #else + // nonlinear_coeff = sqrt(sqrt(nonlinear_coeff)); + // #endif + + // === Tipping-point look-up table for all frequencies === + // typedef struct { + // uint16_t numerator; + // uint16_t denom_shift; + // } lut_entry; + + // static constexpr auto apply_cpu_nonlinear_coeff = [](uint32_t input) { + // lut_entry lut[] = { + // { 4645, 12 }, // 1963500000 + // {}, + // { 4505, 12 }, // 2091000000 + // { 8971, 13 }, // 2193000000 + // { 1117, 10 }, // 2295000000 + // { 1117, 10 }, // 2397000000 + // {}, + // { 5575, 12 }, // 612000000 + // { 10699, 13 }, // 714000000 + // {}, + // { 81, 6 }, // 816000000 + // { 10113, 13 }, // 918000000 + // { 1239, 10 }, // 1020000000 + // {}, + // { 9749, 13 }, // 1122000000 + // { 4807, 12 }, // 1224000000 + // { 2375, 11 }, // 1326000000 + // { 10041, 13 }, // 1428000000 + // {}, + // { 9283, 13 }, // 1581000000 + // {}, + // { 9215, 13 }, // 1683000000 + // { 18309, 14 }, // 1785000000 + // }; + // size_t idx = (input >> 20) % 24; + // lut_entry entry = lut[idx]; + // return (input >> entry.denom_shift) * entry.numerator; + // }; + + // static constexpr auto apply_gpu_nonlinear_coeff = [](uint32_t input) { + // lut_entry lut[] = { + // { 1087, 10 }, // 1305600000 + // { 2351, 11 }, // 1075200000 + // { 9749, 13 }, // 844800000 + // { 81, 6 }, // 614400000 + // { 2949, 11 }, // 384000000 + // { 9, 2 }, // 153600000 + // {}, + // { 1089, 10 }, // 1228800000 + // { 2375, 11 }, // 998400000 + // { 1239, 10 }, // 768000000 + // { 10699, 13 }, // 537600000 + // { 25, 4 }, // 307200000 + // { 4, 0 }, // 76800000 + // { 1087, 10 }, // 1267200000 + // { 1165, 10 }, // 1152000000 + // { 4807, 12 }, // 921600000 + // { 10113, 13 }, // 691200000 + // { 5575, 12 }, // 460800000 + // }; + // size_t idx = (input >> 18) % 20; + // lut_entry entry = lut[idx]; + // return (input >> entry.denom_shift) * entry.numerator; + // }; + + auto FindHzInTable = [](uint32_t* hz_list, uint32_t in_hz) { uint32_t* p = hz_list; - do { - if (*p >= next_freq) { - adj_next_freq = *p; - break; - } - } while (*p++); - } + while (*p) { + if (in_hz <= *p) + return p; + p++; + } + return (--p); + }; + + uint32_t next_freq = utilref_hz / UTIL_MAX * norm_util; + next_freq += next_freq >> 1; + + if (next_freq >= max_hz) + target_hz = max_hz; + else if (next_freq <= min_hz) + target_hz = min_hz; + else + target_hz = *FindHzInTable(hz_list, next_freq); - target_hz = adj_next_freq; bool changed = target_hz != prev_hz; if (changed) SetHz(); @@ -262,7 +337,7 @@ void Governor::s_FreqContext::SetHz() { Clocks::SetHz(module, target_hz); } -void Governor::s_FreqContext::SetBoostHz() { +void Governor::s_FreqContext::Boost() { target_hz = boost_hz; if (module == SysClkModule_CPU && max_hz > boost_hz) target_hz = max_hz; @@ -275,9 +350,7 @@ void Governor::CpuUtilWorker(void* args) { Governor* self = s->self; while (self->m_running) { - uint64_t timestamp = armTicksToNs(armGetSystemTick()); - s->timestamp = timestamp; - + uint64_t tick = s->tick = armGetSystemTick(); s->util = self->m_cpu_freq.GetNormalizedUtil(CpuCoreUtil(coreid, TICK_TIME_NS).Get()); bool CPUBoosted = apmExtIsCPUBoosted(self->m_perf_conf_id); @@ -287,11 +360,15 @@ void Governor::CpuUtilWorker(void* args) { } for (int id = 0; id < CORE_NUMS; id++) { - if (abs(self->m_cpu_core_ctx[id].timestamp - timestamp) < TICK_TIME_NS * 10) + if (id == coreid) + continue; + + uint64_t diff = std::abs((int64_t)self->m_cpu_core_ctx[id].tick - (int64_t)tick); + if (diff < SYSTICK_HZ / SAMPLE_RATE * 10) continue; if (id == SYS_CORE_ID && self->m_syscore_autoboost) { - self->m_cpu_freq.SetBoostHz(); + self->m_cpu_freq.Boost(); break; } @@ -316,8 +393,8 @@ void Governor::Main(void* args) { util = self->m_cpu_core_ctx[i].util; } cpu_util.Update(util); - if (self->m_cpu_core_ctx[SYS_CORE_ID].util > 95'0 && self->m_syscore_autoboost) - cpu_ctx->SetBoostHz(); + if (self->m_cpu_core_ctx[SYS_CORE_ID].util > BOOST_THRESHOLD && self->m_syscore_autoboost) + cpu_ctx->Boost(); else cpu_ctx->SetNextFreq(cpu_util.Get()); }; @@ -350,18 +427,12 @@ void Governor::Main(void* args) { gpu_ctx->target_hz = hz; if (GPUThrottled) - gpu_ctx->SetBoostHz(); + gpu_ctx->Boost(); hz = Clocks::GetCurrentHz(SysClkModule_CPU); cpu_ctx->target_hz = hz; if (CPUBoosted) - cpu_ctx->SetBoostHz(); - - hz = Clocks::GetCurrentHz(SysClkModule_MEM); - if (!self->m_mem_freq) - self->m_mem_freq = hz; - if (hz != self->m_mem_freq) - Clocks::SetHz(SysClkModule_MEM, self->m_mem_freq); + cpu_ctx->Boost(); } if (!GPUThrottled) diff --git a/Source/sys-clk-OC/sysmodule/src/oc_extra.h b/Source/sys-clk-OC/sysmodule/src/oc_extra.h index 52fa90e5..a3af2ec2 100644 --- a/Source/sys-clk-OC/sysmodule/src/oc_extra.h +++ b/Source/sys-clk-OC/sysmodule/src/oc_extra.h @@ -3,7 +3,6 @@ #include #include #include -#include #include #include #include @@ -19,7 +18,7 @@ public: protected: const int m_core_id; const uint64_t m_wait_time_ns; - static constexpr uint64_t TICKS_PER_MS = 192; + static constexpr uint64_t IDLETICKS_PER_MS = 192; static constexpr uint32_t UTIL_MAX = 100'0; uint64_t GetIdleTickCount(); @@ -71,12 +70,14 @@ public: void Stop(); void SetMaxHz(uint32_t max_hz, SysClkModule module); void SetAutoCPUBoost(bool enabled) { m_syscore_autoboost = enabled; }; + void SetCPUBoostHz(uint32_t boost_hz) { m_cpu_freq.boost_hz = boost_hz; }; void SetPerfConf(uint32_t id); protected: // Parameters for sampling static constexpr uint64_t SAMPLE_RATE = 200; static constexpr uint64_t TICK_TIME_NS = 1000'000'000 / SAMPLE_RATE; + static constexpr uint64_t SYSTICK_HZ = 19200000; static constexpr int CORE_NUMS = 4; static constexpr int SYS_CORE_ID = (CORE_NUMS - 1); @@ -86,7 +87,6 @@ protected: Thread m_t_cpuworker[CORE_NUMS], m_t_main; uint32_t m_nvgpu_field; - uint32_t m_mem_freq; uint32_t m_perf_conf_id; SysClkApmConfiguration *m_apm_conf; @@ -102,7 +102,7 @@ protected: uint32_t GetNormalizedUtil(uint32_t raw_util); void SetNextFreq(uint32_t norm_util); void SetHz(); - void SetBoostHz(); + void Boost(); } s_FreqContext; s_FreqContext m_cpu_freq, m_gpu_freq; @@ -110,7 +110,7 @@ protected: Governor* self; int id; uint32_t util; - uint64_t timestamp; + uint64_t tick; } s_CoreContext; s_CoreContext m_cpu_core_ctx[CORE_NUMS]; @@ -135,4 +135,66 @@ protected: static void CpuUtilWorker(void* args); static void Main(void* args); + + // Get max from a sliding window in O(1) + static constexpr size_t WINDOW_SIZE = SAMPLE_RATE / 10; + template + class SWindowMax { + protected: + typedef struct { + T item; + T max; + } s_Entry; + + struct s_Stack { + s_Entry m_stack[WINDOW_SIZE] = {}; + size_t m_next = 0; + + bool empty() { return m_next == 0; }; + s_Entry top() { return m_stack[m_next-1]; }; + s_Entry pop() { return m_stack[--m_next]; }; + void push(s_Entry item) { + if (m_next == WINDOW_SIZE) + return; + m_stack[m_next++] = item; + }; + }; + + s_Stack enqStack; + s_Stack deqStack; + + void Push(s_Stack& stack, T item) { + s_Entry n; + n.item = item; + n.max = enqStack.empty() ? item : std::max(item, enqStack.top().max); + stack.push(n); + } + + void Pop() { + if (deqStack.empty()) { + while (!enqStack.empty()) + Push(deqStack, enqStack.pop().max); + } + deqStack.pop(); + } + + public: + SWindowMax() { deqStack.m_next = WINDOW_SIZE; } + + void Add(T item) { Pop(); Push(enqStack, item); } + + T Max() { + if (!enqStack.empty()) { + T enqMax = enqStack.top().max; + if (!deqStack.empty()) { + T deqMax = deqStack.top().max; + return std::max(deqMax, enqMax); + } + return enqMax; + } + if (!deqStack.empty()) + return deqStack.top().max; + return 0; + } + }; };