Boost mode typo; uplift sample rate to 200/s; GPU boost mode = GPU throttled mode
This commit is contained in:
@@ -39,7 +39,7 @@ DEFINES := -DDISABLE_IPC -DTARGET="\"$(TARGET)\"" -DTARGET_VERSION="\"$(TARGET_V
|
||||
|
||||
ARCH := -march=armv8-a+crc+crypto -mtune=cortex-a57 -mtp=soft -fPIE
|
||||
|
||||
CFLAGS := -g -Wall -O2 -ffunction-sections \
|
||||
CFLAGS := -g -Wall -O3 -ffunction-sections \
|
||||
$(ARCH) $(DEFINES)
|
||||
|
||||
CFLAGS += $(INCLUDE) -D__SWITCH__
|
||||
|
||||
@@ -23,7 +23,13 @@ void apmExtExit(void);
|
||||
Result apmExtGetPerformanceMode(u32 *out_mode);
|
||||
Result apmExtSysRequestPerformanceMode(u32 mode);
|
||||
Result apmExtGetCurrentPerformanceConfiguration(u32 *out_conf);
|
||||
bool apmExtIsBoostMode(u32 conf_id, bool allow_cpu_limited);
|
||||
|
||||
inline bool apmExtIsCPUBoosted(u32 conf_id) { // CPU boosted to 1785 MHz
|
||||
return (conf_id == 0x92220009 || conf_id == 0x9222000A);
|
||||
};
|
||||
inline bool apmExtIsBoostMode(u32 conf_id) { // GPU throttled to 76.8 MHz
|
||||
return (conf_id >= 0x92220009 && conf_id <= 0x9222000C);
|
||||
};
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
||||
@@ -64,9 +64,3 @@ Result apmExtGetCurrentPerformanceConfiguration(u32 *out_conf)
|
||||
{
|
||||
return serviceDispatchOut(&g_apmSysSrv, 7, *out_conf);
|
||||
}
|
||||
|
||||
bool apmExtIsBoostMode(u32 conf_id, bool allow_cpu_limited) {
|
||||
if (allow_cpu_limited)
|
||||
return (conf_id >= 0x92220009 && conf_id <= 0x922200C);
|
||||
return (conf_id == 0x92220009 || conf_id == 0x922200A);
|
||||
}
|
||||
@@ -77,15 +77,15 @@ ClockManager::~ClockManager()
|
||||
delete this->config;
|
||||
}
|
||||
|
||||
bool ClockManager::IsCpuBoostMode()
|
||||
bool ClockManager::IsBoostMode()
|
||||
{
|
||||
std::uint32_t confId = this->context->perfConfId;
|
||||
bool isCpuBoostMode = apmExtIsBoostMode(confId, false);
|
||||
if (isCpuBoostMode && !this->oc->boostCPUFreq) {
|
||||
bool isBoostMode = apmExtIsBoostMode(confId);
|
||||
if (apmExtIsCPUBoosted(confId) && !this->oc->boostCPUFreq) {
|
||||
this->oc->boostCPUFreq = std::max(this->context->freqs[SysClkModule_CPU], 1785'000'000U);
|
||||
this->governor->SetCPUBoostHz(this->oc->boostCPUFreq);
|
||||
}
|
||||
return isCpuBoostMode;
|
||||
return isBoostMode;
|
||||
}
|
||||
|
||||
void ClockManager::SetRunning(bool running)
|
||||
@@ -186,7 +186,7 @@ void ClockManager::Tick()
|
||||
if (hz && hz != this->context->freqs[module])
|
||||
{
|
||||
// Skip setting CPU or GPU clocks in CpuBoostMode if CPU <= boostCPUFreq or GPU >= 76.8MHz
|
||||
bool skipBoost = IsCpuBoostMode() && ((module == SysClkModule_CPU && hz <= this->oc->boostCPUFreq) || module == SysClkModule_GPU);
|
||||
bool skipBoost = IsBoostMode() && ((module == SysClkModule_CPU && hz <= this->oc->boostCPUFreq) || module == SysClkModule_GPU);
|
||||
if (!skipBoost) {
|
||||
FileUtils::LogLine("[mgr] %s clock set : %u.%u MHz", Clocks::GetModuleName((SysClkModule)module, true), hz/1000000, hz/100000 - hz/1000000*10);
|
||||
Clocks::SetHz((SysClkModule)module, hz);
|
||||
@@ -312,7 +312,7 @@ bool ClockManager::RefreshContext()
|
||||
}
|
||||
|
||||
// let ptm module handle boost clocks rather than resetting
|
||||
if (hasChanged && !IsCpuBoostMode()) {
|
||||
if (hasChanged && !IsBoostMode()) {
|
||||
Clocks::ResetToStock();
|
||||
}
|
||||
|
||||
|
||||
@@ -52,7 +52,7 @@ class ClockManager
|
||||
ReverseNXSync *rnxSync;
|
||||
Governor *governor;
|
||||
|
||||
bool IsCpuBoostMode();
|
||||
bool IsBoostMode();
|
||||
|
||||
uint32_t GetHz(SysClkModule);
|
||||
};
|
||||
|
||||
@@ -92,7 +92,7 @@ void PsmExt::ChargingHandler(bool fastChargingEnabled, uint32_t chargingLimit) {
|
||||
|
||||
void Governor::Start() {
|
||||
m_stop_threads = false;
|
||||
svcSleepThread(8 * TICK_TIME_MAIN_NS);
|
||||
svcSleepThread(8 * TICK_TIME_NS);
|
||||
Result rc = 0;
|
||||
|
||||
for (int core = 0; core < CORE_NUMS; core++) {
|
||||
@@ -124,7 +124,7 @@ void Governor::Start() {
|
||||
|
||||
void Governor::Stop() {
|
||||
m_stop_threads = true;
|
||||
svcSleepThread(8 * TICK_TIME_MAIN_NS);
|
||||
svcSleepThread(8 * TICK_TIME_NS);
|
||||
|
||||
threadWaitForExit(&m_t_main);
|
||||
threadClose(&m_t_main);
|
||||
@@ -144,7 +144,7 @@ void Governor::SetMaxHz(uint32_t max_hz, SysClkModule module) {
|
||||
m_cpu_freq.idx_max_hz = FindIndex(&m_cpu_freq, max_hz);
|
||||
break;
|
||||
case SysClkModule_GPU:
|
||||
m_gpu_freq.idx_boost_hz = m_gpu_freq.idx_max_hz = FindIndex(&m_gpu_freq, max_hz);
|
||||
m_gpu_freq.idx_max_hz = FindIndex(&m_gpu_freq, max_hz);
|
||||
break;
|
||||
case SysClkModule_MEM:
|
||||
m_mem_freq = max_hz;
|
||||
@@ -237,19 +237,19 @@ void Governor::CheckCpuUtilWorker(void* args) {
|
||||
}
|
||||
|
||||
void Governor::CheckCpuUtilWorkerAppCore(int64_t coreid) {
|
||||
constexpr uint64_t STUCK_TICKS = 2;
|
||||
constexpr uint64_t STUCK_TICKS = 5;
|
||||
s_Queue<uint64_t> q;
|
||||
while (!m_stop_threads) {
|
||||
bool isBusy = m_core3_stuck_cnt > STUCK_TICKS * (CORE_NUMS - 1);
|
||||
if (isBusy) {
|
||||
m_core3_stuck_cnt = 0;
|
||||
SetBoostHz(&m_cpu_freq);
|
||||
svcSleepThread(STUCK_TICKS * TICK_TIME_CPU_NS);
|
||||
svcSleepThread(STUCK_TICKS * TICK_TIME_NS);
|
||||
} else {
|
||||
m_core3_stuck_cnt++;
|
||||
}
|
||||
|
||||
uint64_t load = CpuCoreUtil(coreid, TICK_TIME_CPU_MS).Get();
|
||||
uint64_t load = CpuCoreUtil(coreid, TICK_TIME_NS).Get();
|
||||
q.PopAndPush(load);
|
||||
m_cpu_core_ctx[coreid].util = q.GetAvg();
|
||||
}
|
||||
@@ -259,7 +259,7 @@ void Governor::CheckCpuUtilWorkerSysCore() {
|
||||
s_Queue<uint64_t> q;
|
||||
int64_t coreid = CORE_NUMS - 1;
|
||||
while (!m_stop_threads) {
|
||||
uint64_t load = CpuCoreUtil(coreid, TICK_TIME_CPU_MS).Get();
|
||||
uint64_t load = CpuCoreUtil(coreid, TICK_TIME_NS).Get();
|
||||
q.PopAndPush(load);
|
||||
m_cpu_core_ctx[coreid].util = q.GetAvg() * 7 / 8; // Adjusted, Multipler: 0.875
|
||||
}
|
||||
@@ -284,7 +284,7 @@ void Governor::Main(void* args) {
|
||||
} q;
|
||||
|
||||
auto GetGpuUtil = [nvgpu_field, q]() mutable {
|
||||
uint32_t load = GpuCoreUtil(nvgpu_field, TICK_TIME_GPU_MS).Get();
|
||||
uint32_t load = GpuCoreUtil(nvgpu_field, TICK_TIME_NS).Get();
|
||||
if (load > 20) { // Ignore load <= 2.0%
|
||||
q.queue[q.pos % QUEUE_SIZE] = load;
|
||||
q.pos++;
|
||||
@@ -301,29 +301,30 @@ void Governor::Main(void* args) {
|
||||
return load;
|
||||
};
|
||||
|
||||
uint64_t update_ticks = SAMPLE_RATE_MAIN;
|
||||
constexpr uint64_t UPDATE_CONTEXT_RATE = SAMPLE_RATE / 2;
|
||||
uint64_t update_ticks = UPDATE_CONTEXT_RATE;
|
||||
bool CPUBoosted = false;
|
||||
bool GPUBoosted = false; // Limited to 76.8 MHz, literally
|
||||
bool GPUThrottled = false;
|
||||
|
||||
while (!self->m_stop_threads) {
|
||||
self->m_core3_stuck_cnt = 0;
|
||||
|
||||
bool shouldUpdateContext = update_ticks++ >= SAMPLE_RATE_MAIN;
|
||||
bool shouldUpdateContext = update_ticks++ >= UPDATE_CONTEXT_RATE;
|
||||
if (shouldUpdateContext) {
|
||||
update_ticks = 0;
|
||||
uint32_t hz = Clocks::GetCurrentHz(SysClkModule_GPU);
|
||||
// Sleep mode detected, wait 1 tick
|
||||
while (!hz) {
|
||||
self->m_core3_stuck_cnt = 0;
|
||||
svcSleepThread(TICK_TIME_MAIN_NS);
|
||||
svcSleepThread(TICK_TIME_NS);
|
||||
hz = Clocks::GetCurrentHz(SysClkModule_GPU);
|
||||
}
|
||||
|
||||
GPUBoosted = apmExtIsBoostMode(self->m_perf_conf_id, true);
|
||||
CPUBoosted = apmExtIsBoostMode(self->m_perf_conf_id, false);
|
||||
GPUThrottled = apmExtIsBoostMode(self->m_perf_conf_id);
|
||||
CPUBoosted = apmExtIsCPUBoosted(self->m_perf_conf_id);
|
||||
|
||||
self->m_gpu_freq.idx_target_hz = FindIndex(&self->m_gpu_freq, hz);
|
||||
if (GPUBoosted)
|
||||
if (GPUThrottled)
|
||||
SetBoostHz(&self->m_gpu_freq);
|
||||
|
||||
hz = Clocks::GetCurrentHz(SysClkModule_CPU);
|
||||
@@ -337,7 +338,7 @@ void Governor::Main(void* args) {
|
||||
if (hz != self->m_mem_freq)
|
||||
Clocks::SetHz(SysClkModule_MEM, self->m_mem_freq);
|
||||
} else {
|
||||
if (!GPUBoosted) {
|
||||
if (!GPUThrottled) {
|
||||
uint32_t gpu_util = GetGpuUtil();
|
||||
if (gpu_util > GPU_THR_RAMP_MAX) {
|
||||
if (TargetRamp(&self->m_gpu_freq, RAMP_MAX))
|
||||
@@ -362,7 +363,7 @@ void Governor::Main(void* args) {
|
||||
}
|
||||
}
|
||||
|
||||
svcSleepThread(TICK_TIME_MAIN_NS);
|
||||
svcSleepThread(TICK_TIME_NS);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -13,8 +13,8 @@
|
||||
|
||||
class CpuCoreUtil {
|
||||
public:
|
||||
CpuCoreUtil (int coreid = -2, uint64_t ms = 1):
|
||||
m_core_id(coreid), m_wait_time_ms(ms), m_wait_time_ns(ms * 1000'000ULL) {};
|
||||
CpuCoreUtil (int coreid = -2, uint64_t ns = 1000'000ULL):
|
||||
m_core_id(coreid), m_wait_time_ns(ns) {};
|
||||
|
||||
inline uint64_t Get() { Start(); WaitForStop(); Stop(); return Calculate(); };
|
||||
inline void Start() { m_idletick = GetIdleTickCount(); };
|
||||
@@ -22,11 +22,11 @@ public:
|
||||
inline void Stop() { m_idletick = GetIdleTickCount() - m_idletick; };
|
||||
|
||||
static constexpr uint64_t TICKS_PER_MS = 192;
|
||||
inline uint64_t Calculate() { return 100'0 - m_idletick * 10 / (TICKS_PER_MS * m_wait_time_ms); };
|
||||
inline uint64_t Calculate() { return 100'0 - m_idletick * 10 * 1000'000ULL / (TICKS_PER_MS * m_wait_time_ns); };
|
||||
|
||||
protected:
|
||||
const int m_core_id;
|
||||
const uint64_t m_wait_time_ms, m_wait_time_ns;
|
||||
const uint64_t m_wait_time_ns;
|
||||
uint64_t m_idletick;
|
||||
|
||||
inline uint64_t GetIdleTickCount() {
|
||||
@@ -38,8 +38,8 @@ protected:
|
||||
|
||||
class GpuCoreUtil {
|
||||
public:
|
||||
GpuCoreUtil (uint32_t nvgpu_field, uint64_t ms = 1):
|
||||
m_nvgpu_field(nvgpu_field), m_wait_time_ns(ms * 1000'000ULL) {};
|
||||
GpuCoreUtil (uint32_t nvgpu_field, uint64_t ns = 1000'000ULL):
|
||||
m_nvgpu_field(nvgpu_field), m_wait_time_ns(ns) {};
|
||||
|
||||
inline uint64_t Get() { Wait(); return GetLoad(); };
|
||||
inline void Wait() { svcSleepThread(m_wait_time_ns); };
|
||||
@@ -127,19 +127,14 @@ public:
|
||||
|
||||
protected:
|
||||
// Parameters for sampling
|
||||
static constexpr uint64_t SAMPLE_RATE_MAIN = 60, SAMPLE_RATE_GPU = 60;
|
||||
static constexpr uint64_t SAMPLE_RATE_CPU = SAMPLE_RATE_GPU / 2;
|
||||
static constexpr uint64_t UPDATE_CONTEXT_RATE = 60;
|
||||
static constexpr uint64_t TICK_TIME_CPU_MS = 1000 / SAMPLE_RATE_CPU;
|
||||
static constexpr uint64_t TICK_TIME_CPU_NS = 1E9 / SAMPLE_RATE_CPU;
|
||||
static constexpr uint64_t TICK_TIME_GPU_MS = 1000 / SAMPLE_RATE_GPU;
|
||||
static constexpr uint64_t TICK_TIME_MAIN_MS = 1000 / SAMPLE_RATE_MAIN;
|
||||
static constexpr uint64_t TICK_TIME_MAIN_NS = 1E9 / SAMPLE_RATE_MAIN;
|
||||
static constexpr uint64_t SAMPLE_RATE = 200;
|
||||
static constexpr uint64_t TICK_TIME_MS = 1000 / SAMPLE_RATE;
|
||||
static constexpr uint64_t TICK_TIME_NS = 1000'000'000 / SAMPLE_RATE;
|
||||
|
||||
// Parameters for frequency ramp threshold
|
||||
static constexpr uint64_t CPU_THR_RAMP_DOWN = 70'0;
|
||||
static constexpr uint64_t CPU_THR_RAMP_UP = 90'0;
|
||||
static constexpr uint64_t GPU_THR_RAMP_DOWN = 60'0;
|
||||
static constexpr uint64_t GPU_THR_RAMP_DOWN = 70'0;
|
||||
static constexpr uint64_t GPU_THR_RAMP_UP = 80'0;
|
||||
static constexpr uint64_t GPU_THR_RAMP_MAX = 90'0;
|
||||
|
||||
@@ -196,7 +191,7 @@ private:
|
||||
// Much faster than <queue> from stl
|
||||
T queue[QUEUE_SIZE] = { 0 };
|
||||
T sum = 0;
|
||||
T pos = 0;
|
||||
size_t pos = 0;
|
||||
|
||||
T GetAvg() { return sum / QUEUE_SIZE; };
|
||||
T GetFirst() { return queue[pos % QUEUE_SIZE]; };
|
||||
|
||||
Reference in New Issue
Block a user