Boost mode typo; uplift sample rate to 200/s; GPU boost mode = GPU throttled mode

This commit is contained in:
KazushiM
2022-10-24 12:20:50 +08:00
parent b52bef3c31
commit 0f6fb06e53
7 changed files with 44 additions and 48 deletions

View File

@@ -39,7 +39,7 @@ DEFINES := -DDISABLE_IPC -DTARGET="\"$(TARGET)\"" -DTARGET_VERSION="\"$(TARGET_V
ARCH := -march=armv8-a+crc+crypto -mtune=cortex-a57 -mtp=soft -fPIE
CFLAGS := -g -Wall -O2 -ffunction-sections \
CFLAGS := -g -Wall -O3 -ffunction-sections \
$(ARCH) $(DEFINES)
CFLAGS += $(INCLUDE) -D__SWITCH__

View File

@@ -23,7 +23,13 @@ void apmExtExit(void);
Result apmExtGetPerformanceMode(u32 *out_mode);
Result apmExtSysRequestPerformanceMode(u32 mode);
Result apmExtGetCurrentPerformanceConfiguration(u32 *out_conf);
bool apmExtIsBoostMode(u32 conf_id, bool allow_cpu_limited);
inline bool apmExtIsCPUBoosted(u32 conf_id) { // CPU boosted to 1785 MHz
return (conf_id == 0x92220009 || conf_id == 0x9222000A);
};
inline bool apmExtIsBoostMode(u32 conf_id) { // GPU throttled to 76.8 MHz
return (conf_id >= 0x92220009 && conf_id <= 0x9222000C);
};
#ifdef __cplusplus
}

View File

@@ -64,9 +64,3 @@ Result apmExtGetCurrentPerformanceConfiguration(u32 *out_conf)
{
return serviceDispatchOut(&g_apmSysSrv, 7, *out_conf);
}
bool apmExtIsBoostMode(u32 conf_id, bool allow_cpu_limited) {
if (allow_cpu_limited)
return (conf_id >= 0x92220009 && conf_id <= 0x922200C);
return (conf_id == 0x92220009 || conf_id == 0x922200A);
}

View File

@@ -77,15 +77,15 @@ ClockManager::~ClockManager()
delete this->config;
}
bool ClockManager::IsCpuBoostMode()
bool ClockManager::IsBoostMode()
{
std::uint32_t confId = this->context->perfConfId;
bool isCpuBoostMode = apmExtIsBoostMode(confId, false);
if (isCpuBoostMode && !this->oc->boostCPUFreq) {
bool isBoostMode = apmExtIsBoostMode(confId);
if (apmExtIsCPUBoosted(confId) && !this->oc->boostCPUFreq) {
this->oc->boostCPUFreq = std::max(this->context->freqs[SysClkModule_CPU], 1785'000'000U);
this->governor->SetCPUBoostHz(this->oc->boostCPUFreq);
}
return isCpuBoostMode;
return isBoostMode;
}
void ClockManager::SetRunning(bool running)
@@ -186,7 +186,7 @@ void ClockManager::Tick()
if (hz && hz != this->context->freqs[module])
{
// Skip setting CPU or GPU clocks in CpuBoostMode if CPU <= boostCPUFreq or GPU >= 76.8MHz
bool skipBoost = IsCpuBoostMode() && ((module == SysClkModule_CPU && hz <= this->oc->boostCPUFreq) || module == SysClkModule_GPU);
bool skipBoost = IsBoostMode() && ((module == SysClkModule_CPU && hz <= this->oc->boostCPUFreq) || module == SysClkModule_GPU);
if (!skipBoost) {
FileUtils::LogLine("[mgr] %s clock set : %u.%u MHz", Clocks::GetModuleName((SysClkModule)module, true), hz/1000000, hz/100000 - hz/1000000*10);
Clocks::SetHz((SysClkModule)module, hz);
@@ -312,7 +312,7 @@ bool ClockManager::RefreshContext()
}
// let ptm module handle boost clocks rather than resetting
if (hasChanged && !IsCpuBoostMode()) {
if (hasChanged && !IsBoostMode()) {
Clocks::ResetToStock();
}

View File

@@ -52,7 +52,7 @@ class ClockManager
ReverseNXSync *rnxSync;
Governor *governor;
bool IsCpuBoostMode();
bool IsBoostMode();
uint32_t GetHz(SysClkModule);
};

View File

@@ -92,7 +92,7 @@ void PsmExt::ChargingHandler(bool fastChargingEnabled, uint32_t chargingLimit) {
void Governor::Start() {
m_stop_threads = false;
svcSleepThread(8 * TICK_TIME_MAIN_NS);
svcSleepThread(8 * TICK_TIME_NS);
Result rc = 0;
for (int core = 0; core < CORE_NUMS; core++) {
@@ -124,7 +124,7 @@ void Governor::Start() {
void Governor::Stop() {
m_stop_threads = true;
svcSleepThread(8 * TICK_TIME_MAIN_NS);
svcSleepThread(8 * TICK_TIME_NS);
threadWaitForExit(&m_t_main);
threadClose(&m_t_main);
@@ -144,7 +144,7 @@ void Governor::SetMaxHz(uint32_t max_hz, SysClkModule module) {
m_cpu_freq.idx_max_hz = FindIndex(&m_cpu_freq, max_hz);
break;
case SysClkModule_GPU:
m_gpu_freq.idx_boost_hz = m_gpu_freq.idx_max_hz = FindIndex(&m_gpu_freq, max_hz);
m_gpu_freq.idx_max_hz = FindIndex(&m_gpu_freq, max_hz);
break;
case SysClkModule_MEM:
m_mem_freq = max_hz;
@@ -237,19 +237,19 @@ void Governor::CheckCpuUtilWorker(void* args) {
}
void Governor::CheckCpuUtilWorkerAppCore(int64_t coreid) {
constexpr uint64_t STUCK_TICKS = 2;
constexpr uint64_t STUCK_TICKS = 5;
s_Queue<uint64_t> q;
while (!m_stop_threads) {
bool isBusy = m_core3_stuck_cnt > STUCK_TICKS * (CORE_NUMS - 1);
if (isBusy) {
m_core3_stuck_cnt = 0;
SetBoostHz(&m_cpu_freq);
svcSleepThread(STUCK_TICKS * TICK_TIME_CPU_NS);
svcSleepThread(STUCK_TICKS * TICK_TIME_NS);
} else {
m_core3_stuck_cnt++;
}
uint64_t load = CpuCoreUtil(coreid, TICK_TIME_CPU_MS).Get();
uint64_t load = CpuCoreUtil(coreid, TICK_TIME_NS).Get();
q.PopAndPush(load);
m_cpu_core_ctx[coreid].util = q.GetAvg();
}
@@ -259,7 +259,7 @@ void Governor::CheckCpuUtilWorkerSysCore() {
s_Queue<uint64_t> q;
int64_t coreid = CORE_NUMS - 1;
while (!m_stop_threads) {
uint64_t load = CpuCoreUtil(coreid, TICK_TIME_CPU_MS).Get();
uint64_t load = CpuCoreUtil(coreid, TICK_TIME_NS).Get();
q.PopAndPush(load);
m_cpu_core_ctx[coreid].util = q.GetAvg() * 7 / 8; // Adjusted, Multipler: 0.875
}
@@ -284,7 +284,7 @@ void Governor::Main(void* args) {
} q;
auto GetGpuUtil = [nvgpu_field, q]() mutable {
uint32_t load = GpuCoreUtil(nvgpu_field, TICK_TIME_GPU_MS).Get();
uint32_t load = GpuCoreUtil(nvgpu_field, TICK_TIME_NS).Get();
if (load > 20) { // Ignore load <= 2.0%
q.queue[q.pos % QUEUE_SIZE] = load;
q.pos++;
@@ -301,29 +301,30 @@ void Governor::Main(void* args) {
return load;
};
uint64_t update_ticks = SAMPLE_RATE_MAIN;
constexpr uint64_t UPDATE_CONTEXT_RATE = SAMPLE_RATE / 2;
uint64_t update_ticks = UPDATE_CONTEXT_RATE;
bool CPUBoosted = false;
bool GPUBoosted = false; // Limited to 76.8 MHz, literally
bool GPUThrottled = false;
while (!self->m_stop_threads) {
self->m_core3_stuck_cnt = 0;
bool shouldUpdateContext = update_ticks++ >= SAMPLE_RATE_MAIN;
bool shouldUpdateContext = update_ticks++ >= UPDATE_CONTEXT_RATE;
if (shouldUpdateContext) {
update_ticks = 0;
uint32_t hz = Clocks::GetCurrentHz(SysClkModule_GPU);
// Sleep mode detected, wait 1 tick
while (!hz) {
self->m_core3_stuck_cnt = 0;
svcSleepThread(TICK_TIME_MAIN_NS);
svcSleepThread(TICK_TIME_NS);
hz = Clocks::GetCurrentHz(SysClkModule_GPU);
}
GPUBoosted = apmExtIsBoostMode(self->m_perf_conf_id, true);
CPUBoosted = apmExtIsBoostMode(self->m_perf_conf_id, false);
GPUThrottled = apmExtIsBoostMode(self->m_perf_conf_id);
CPUBoosted = apmExtIsCPUBoosted(self->m_perf_conf_id);
self->m_gpu_freq.idx_target_hz = FindIndex(&self->m_gpu_freq, hz);
if (GPUBoosted)
if (GPUThrottled)
SetBoostHz(&self->m_gpu_freq);
hz = Clocks::GetCurrentHz(SysClkModule_CPU);
@@ -337,7 +338,7 @@ void Governor::Main(void* args) {
if (hz != self->m_mem_freq)
Clocks::SetHz(SysClkModule_MEM, self->m_mem_freq);
} else {
if (!GPUBoosted) {
if (!GPUThrottled) {
uint32_t gpu_util = GetGpuUtil();
if (gpu_util > GPU_THR_RAMP_MAX) {
if (TargetRamp(&self->m_gpu_freq, RAMP_MAX))
@@ -362,7 +363,7 @@ void Governor::Main(void* args) {
}
}
svcSleepThread(TICK_TIME_MAIN_NS);
svcSleepThread(TICK_TIME_NS);
}
}

View File

@@ -13,8 +13,8 @@
class CpuCoreUtil {
public:
CpuCoreUtil (int coreid = -2, uint64_t ms = 1):
m_core_id(coreid), m_wait_time_ms(ms), m_wait_time_ns(ms * 1000'000ULL) {};
CpuCoreUtil (int coreid = -2, uint64_t ns = 1000'000ULL):
m_core_id(coreid), m_wait_time_ns(ns) {};
inline uint64_t Get() { Start(); WaitForStop(); Stop(); return Calculate(); };
inline void Start() { m_idletick = GetIdleTickCount(); };
@@ -22,11 +22,11 @@ public:
inline void Stop() { m_idletick = GetIdleTickCount() - m_idletick; };
static constexpr uint64_t TICKS_PER_MS = 192;
inline uint64_t Calculate() { return 100'0 - m_idletick * 10 / (TICKS_PER_MS * m_wait_time_ms); };
inline uint64_t Calculate() { return 100'0 - m_idletick * 10 * 1000'000ULL / (TICKS_PER_MS * m_wait_time_ns); };
protected:
const int m_core_id;
const uint64_t m_wait_time_ms, m_wait_time_ns;
const uint64_t m_wait_time_ns;
uint64_t m_idletick;
inline uint64_t GetIdleTickCount() {
@@ -38,8 +38,8 @@ protected:
class GpuCoreUtil {
public:
GpuCoreUtil (uint32_t nvgpu_field, uint64_t ms = 1):
m_nvgpu_field(nvgpu_field), m_wait_time_ns(ms * 1000'000ULL) {};
GpuCoreUtil (uint32_t nvgpu_field, uint64_t ns = 1000'000ULL):
m_nvgpu_field(nvgpu_field), m_wait_time_ns(ns) {};
inline uint64_t Get() { Wait(); return GetLoad(); };
inline void Wait() { svcSleepThread(m_wait_time_ns); };
@@ -127,19 +127,14 @@ public:
protected:
// Parameters for sampling
static constexpr uint64_t SAMPLE_RATE_MAIN = 60, SAMPLE_RATE_GPU = 60;
static constexpr uint64_t SAMPLE_RATE_CPU = SAMPLE_RATE_GPU / 2;
static constexpr uint64_t UPDATE_CONTEXT_RATE = 60;
static constexpr uint64_t TICK_TIME_CPU_MS = 1000 / SAMPLE_RATE_CPU;
static constexpr uint64_t TICK_TIME_CPU_NS = 1E9 / SAMPLE_RATE_CPU;
static constexpr uint64_t TICK_TIME_GPU_MS = 1000 / SAMPLE_RATE_GPU;
static constexpr uint64_t TICK_TIME_MAIN_MS = 1000 / SAMPLE_RATE_MAIN;
static constexpr uint64_t TICK_TIME_MAIN_NS = 1E9 / SAMPLE_RATE_MAIN;
static constexpr uint64_t SAMPLE_RATE = 200;
static constexpr uint64_t TICK_TIME_MS = 1000 / SAMPLE_RATE;
static constexpr uint64_t TICK_TIME_NS = 1000'000'000 / SAMPLE_RATE;
// Parameters for frequency ramp threshold
static constexpr uint64_t CPU_THR_RAMP_DOWN = 70'0;
static constexpr uint64_t CPU_THR_RAMP_UP = 90'0;
static constexpr uint64_t GPU_THR_RAMP_DOWN = 60'0;
static constexpr uint64_t GPU_THR_RAMP_DOWN = 70'0;
static constexpr uint64_t GPU_THR_RAMP_UP = 80'0;
static constexpr uint64_t GPU_THR_RAMP_MAX = 90'0;
@@ -196,7 +191,7 @@ private:
// Much faster than <queue> from stl
T queue[QUEUE_SIZE] = { 0 };
T sum = 0;
T pos = 0;
size_t pos = 0;
T GetAvg() { return sum / QUEUE_SIZE; };
T GetFirst() { return queue[pos % QUEUE_SIZE]; };