From 6585abfcb86c10e61f1ea5c8b0f30c968b0138d5 Mon Sep 17 00:00:00 2001
From: KazushiM <85604869+KazushiMe@users.noreply.github.com>
Date: Thu, 17 Mar 2022 12:15:46 +0800
Subject: [PATCH] Configurable CPU Boost Freq
---
README.md | 50 ++++++++++-------
.../loader/source/oc/ldr_oc_customize.inc | 8 ++-
.../loader/source/oc/ldr_oc_suite.cpp | 53 ++++++++++++++-----
.../loader/source/oc/ldr_oc_suite.hpp | 3 +-
.../sys-clk-OC/common/include/sysclk/clocks.h | 2 +
.../sysmodule/src/clock_manager.cpp | 22 +++++---
.../sys-clk-OC/sysmodule/src/clock_manager.h | 1 -
ldr_config.py | 37 +++++++------
8 files changed, 118 insertions(+), 58 deletions(-)
diff --git a/README.md b/README.md
index 4f64bf1c..09f9d58d 100644
--- a/README.md
+++ b/README.md
@@ -2,13 +2,10 @@
[](https://www.gnu.org/licenses/old-licenses/gpl-2.0.en.html) [](https://gitter.im/Switch-OC-Suite/community?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge)
-Overclocking suite for Nintendo Switchâ„¢ running on Atmosphere CFW. Should support Horizon OS (HOS) >= 1.0.
+Overclocking suite for Nintendo Switchâ„¢ Horizon OS (HOS) running on Atmosphere CFW.
This project will not be actively maintained or regularly updated along with Atmosphere CFW.
-I'd appreciate if someone is willing to contribute or upload latest binaries. But if you are releasing somewhere else (with or without your own modifications), be sure you are complying with GPL v2 license and _include necessary warnings for users_.
-
-
## DISCLAIMER: USE AT YOUR OWN RISK!
@@ -16,6 +13,20 @@ I'd appreciate if someone is willing to contribute or upload latest binaries. Bu
- Higher RAM clocks without proper timings could be UNSTABLE and cause graphical glitches / instabilities / filesystem corruption. **Always make backup before usage.**
+- Why no CPU/GPU OC for Erista?
+
+
+ - Tegra X1 on Erista is on TSMC 20nm HPM node, consumes much more power (~2x) and generates much more heat, compared to Tegra X1+ on Mariko (TSMC 16nm FinFET).
+ - Erista Switch uses lower speedo (=== lower quality === higher voltage required) SoC from NVIDIA. You will NOT get comparable performance to NVIDIA Shield TV no matter what.
+ - Snapdragon 810 (4 x A57 @ 2.0GHz + 4 x A53) also uses 20nm HPM, see how it plagued Android phones in 2014.
+
+ - The board power supply is quite limited, even if you've done cooling mod.
+ - You could spot battery draining at higher clocks under stress test, even with official 39W PD charger.
+ - CPU / GPU performance at max clocks will be worse if power supply is not enough.
+
+ - CPU OC (up to ~ 2.1 GHz, depending on your CPU bin) is available mainly for emulation, but it does NOT work out of the box.
+
+
## Features
@@ -28,13 +39,18 @@ I'd appreciate if someone is willing to contribute or upload latest binaries. Bu
- Mariko: 1996.8 MHz has been tested stable for all (Samsung / Micron / Hynix), with built-in timing auto-adjustment.
- Erista: 1862.4 MHz.
- - Unsafe: > 1996.8 MHz or overvolting
+ - Unsafe: higher than 1996.8 MHz or overvolting
+
+
- Timing:
- Timing parameters could be auto-adjusted (default) or overwritten with user-provided mtc table.
- Customization: No GUI tool, requires [rebuilding](#Build).
+
- DRAM bus overvolting (Erista Only).
- Mariko: [use this to set DRAM bus voltage](https://gist.github.com/KazushiMe/6bb0fcbefe0e03b1274079522516d56d).
+
+
- **[System Settings (Optional)](https://github.com/KazushiMe/Switch-OC-Suite/blob/master/system_settings.md)**
- **TinyMemBenchNX**: DRAM throughput and latency test based on [tinymembench](https://github.com/ssvb/tinymembench)
@@ -51,6 +67,8 @@ I'd appreciate if someone is willing to contribute or upload latest binaries. Bu
- It has been proved safe without charger (not reaching battery power draw threshold)
- Unsafe: CPU/GPU @ 2397/1305 MHz
+
+
- Without chargers, CPU/GPU would be capped @ 1963/921 MHz.
- Without official chargers, GPU would be capped @ 1267 MHz.
@@ -70,6 +88,8 @@ I'd appreciate if someone is willing to contribute or upload latest binaries. Bu
- NVIDIA Official Maximum: 1267.2 MHz
- ~~Tested with deko3d compute shaders converted from Maxwell SASS assembly. Single-precision floating point (FP32 FFMA) performance maxes out at 1305 MHz.~~
+
+
- **Modded sys-clk and ReverseNX**(-RT)
- Permanent global clock override
@@ -77,7 +97,7 @@ I'd appreciate if someone is willing to contribute or upload latest binaries. Bu
- Miscellaneous
- Auto CPU Boost: For faster game loading
- - Enable CPU Boost (1785 MHz) when CPU Core#3 (System Core) is stressed, especially when the game is loading assets from eMMC/SD card (I/O ops).
+ - Enable CPU Boost (1785 MHz, could be configured higher) when CPU Core#3 (System Core) is stressed, especially when the game is loading assets from eMMC/SD card (I/O ops).
- Auto-Boost will be enabled only when charger is connected.
- Sync ReverseNX Mode: No need to change clocks manually after toggling modes in ReverseNX
@@ -98,15 +118,20 @@ I'd appreciate if someone is willing to contribute or upload latest binaries. Bu
3. Grab `x.x.x_loader.kip` for your Atmosphere version, rename it to `loader.kip` and place it in `/atmosphere/kips/`.
4. Customization
+
+
| Defaults | Mariko | Erista |
| ---------- | ------------- | ------------ |
| CPU OC | 2397 MHz Max | Disabled |
+ | CPU Boost | 1785 MHz | N/A |
| CPU Volt | 1220 mV Max | Disabled |
| GPU OC | 1305 MHz Max | N/A |
| RAM OC | 1996 MHz Max | 1862 MHz Max |
| RAM Volt | N/A | Disabled |
| RAM Timing | Auto-Adjusted | Disabled |
+
+
- Loader configurator
- Grab [ldr_config.py](https://github.com/KazushiMe/Switch-OC-Suite/raw/master/ldr_config.py) and modify values in `cust_conf` dict.
- `python ldr_config.py loader.kip -s` will save your configuration in-place.
@@ -130,19 +155,6 @@ Uncompress the kip to make it work with config editor: `hactool -t kip1 Atmosphe
-## Why no CPU/GPU OC for Erista?
-
-- Tegra X1 on Erista is on TSMC 20nm HPM node, consumes much more power (~2x) and generates much more heat, compared to Tegra X1+ on Mariko (TSMC 16nm FinFET).
- - Erista Switch uses lower speedo (=== lower quality === higher voltage required) SoC from NVIDIA. You will NOT get comparable performance to NVIDIA Shield TV no matter what.
- - Snapdragon 810 (4 x A57 @ 2.0GHz + 4 x A53) also uses 20nm HPM, see how it plagued Android phones in 2014.
-
-- The board power supply is quite limited, even if you've done cooling mod.
- - You could spot battery draining at higher clocks under stress test, even with official 39W PD charger.
- - CPU / GPU performance at max clocks will be worse if power supply is not enough.
-
-- CPU OC (up to ~ 2.1 GHz, depending on your CPU bin) is available mainly for emulation, but it does NOT work out of the box.
-
-
## Acknowledgement
- CTCaer for [Hekate-ipl](https://github.com/CTCaer/hekate) bootloader, RE and hardware research
diff --git a/Source/Atmosphere/stratosphere/loader/source/oc/ldr_oc_customize.inc b/Source/Atmosphere/stratosphere/loader/source/oc/ldr_oc_customize.inc
index 0c138569..06d42efa 100644
--- a/Source/Atmosphere/stratosphere/loader/source/oc/ldr_oc_customize.inc
+++ b/Source/Atmosphere/stratosphere/loader/source/oc/ldr_oc_customize.inc
@@ -13,12 +13,16 @@ static const volatile CustomizeTable C = {
* - Max Clock in kHz:
* Default: 1785000
* >= 2193000 will enable overvolting (> 1120 mV)
+ * - Boost Clock in kHz:
+ * Default: 1785000
+ * Boost clock will be applied when applications request higher CPU frequency for quicker loading.
* - Max Voltage in mV:
* Default voltage: 1120
* Haven't tested anything higher than 1220.
*/
-.marikoCpuMaxClock = 2397000,
-.marikoCpuMaxVolt = 1220,
+.marikoCpuMaxClock = 2397000,
+.marikoCpuBoostClock = 1785000,
+.marikoCpuMaxVolt = 1220,
/* Mariko GPU:
* - Max Clock in kHz:
diff --git a/Source/Atmosphere/stratosphere/loader/source/oc/ldr_oc_suite.cpp b/Source/Atmosphere/stratosphere/loader/source/oc/ldr_oc_suite.cpp
index 91f7945a..723dd8b2 100644
--- a/Source/Atmosphere/stratosphere/loader/source/oc/ldr_oc_suite.cpp
+++ b/Source/Atmosphere/stratosphere/loader/source/oc/ldr_oc_suite.cpp
@@ -1150,22 +1150,31 @@ namespace ams::ldr::oc {
return;
#endif
- perf_conf_entry* confTable = 0;
- constexpr u32 entryCnt = 16;
- constexpr u32 memPtmLimit = 1600'000'000;
- constexpr u32 memPtmAlt = 1331'200'000;
- constexpr u32 memPtmClamp = 1065'600'000;
- const u32 memPtmMax = C.marikoEmcMaxClock * 1000;
+ perf_conf_entry* confTable = 0;
+ constexpr u32 entryCnt = 16;
+ constexpr u32 cpuPtmDefault = 1020'000'000;
+ constexpr u32 cpuPtmDevOC = 1224'000'000;
+ constexpr u32 cpuPtmBoost = 1785'000'000;
+ const u32 cpuPtmBoostNew = C.marikoCpuBoostClock * 1000;
+ constexpr u32 memPtmLimit = 1600'000'000;
+ constexpr u32 memPtmAlt = 1331'200'000;
+ constexpr u32 memPtmClamp = 1065'600'000;
+ const u32 memPtmMax = C.marikoEmcMaxClock * 1000;
for (uintptr_t ptr = mapped_nso;
ptr <= mapped_nso + nso_size - sizeof(perf_conf_entry) * entryCnt;
ptr += sizeof(u32))
{
- u32 value = *(reinterpret_cast(ptr));
+ u32* ptr32 = reinterpret_cast(ptr);
+ u32 value = *(ptr32);
- if (value == memPtmLimit)
+ if (value == cpuPtmDefault)
{
- confTable = reinterpret_cast(ptr - offsetof(perf_conf_entry, emc_freq_1));
+ u32 value_next = *(ptr32 + 1);
+ if (value_next != cpuPtmDefault)
+ continue;
+
+ confTable = reinterpret_cast(ptr - offsetof(perf_conf_entry, cpu_freq_1));
break;
}
}
@@ -1179,24 +1188,44 @@ namespace ams::ldr::oc {
{
perf_conf_entry* entry_current = confTable + i;
- if (entry_current->emc_freq_1 != entry_current->emc_freq_2) {
- LOGGING("@%p: emc_freq_1(%u) != emc_freq_2(%u)", &(entry_current->emc_freq_1), entry_current->emc_freq_1, entry_current->emc_freq_2);
+ if (entry_current->cpu_freq_1 != entry_current->cpu_freq_2 ||
+ entry_current->gpu_freq_1 != entry_current->gpu_freq_2 ||
+ entry_current->emc_freq_1 != entry_current->emc_freq_2)
+ {
+ LOGGING("@%p: Invalid confTable entry", &entry_current);
CRASH();
}
+ switch (entry_current->cpu_freq_1)
+ {
+ case cpuPtmBoost:
+ PatchOffset(&(entry_current->cpu_freq_1), cpuPtmBoostNew);
+ PatchOffset(&(entry_current->cpu_freq_2), cpuPtmBoostNew);
+ LOGGING("0x%x: CPU: Boost Freq", entry_current->conf_id);
+ break;
+ case cpuPtmDefault:
+ case cpuPtmDevOC:
+ break;
+ default:
+ LOGGING("Unknown CPU Freq: %u @%p!", entry_current->cpu_freq_1, &(entry_current->cpu_freq_1));
+ CRASH();
+ }
+
switch (entry_current->emc_freq_1)
{
case memPtmLimit:
PatchOffset(&(entry_current->emc_freq_1), memPtmMax);
PatchOffset(&(entry_current->emc_freq_2), memPtmMax);
+ LOGGING("0x%x: MEM: Max Freq", entry_current->conf_id);
break;
case memPtmAlt:
case memPtmClamp:
PatchOffset(&(entry_current->emc_freq_1), memPtmLimit);
PatchOffset(&(entry_current->emc_freq_2), memPtmLimit);
+ LOGGING("0x%x: MEM: Alt Freq", entry_current->conf_id);
break;
default:
- LOGGING("Wrong mem freq: %u @%p!", entry_current->emc_freq_1, &(entry_current->emc_freq_1));
+ LOGGING("Unknown MEM Freq: %u @%p!", entry_current->emc_freq_1, &(entry_current->emc_freq_1));
CRASH();
}
}
diff --git a/Source/Atmosphere/stratosphere/loader/source/oc/ldr_oc_suite.hpp b/Source/Atmosphere/stratosphere/loader/source/oc/ldr_oc_suite.hpp
index a253a722..9fab6bc5 100644
--- a/Source/Atmosphere/stratosphere/loader/source/oc/ldr_oc_suite.hpp
+++ b/Source/Atmosphere/stratosphere/loader/source/oc/ldr_oc_suite.hpp
@@ -15,7 +15,7 @@
*/
#pragma once
-#define CUST_REV 1
+#define CUST_REV 2
#include "mtc_timing_table.hpp"
namespace ams::ldr::oc {
@@ -31,6 +31,7 @@ namespace ams::ldr::oc {
u16 custRev = CUST_REV;
u16 mtcConf = AUTO_ADJ_MARIKO_SAFE;
u32 marikoCpuMaxClock;
+ u32 marikoCpuBoostClock;
u32 marikoCpuMaxVolt;
u32 marikoGpuMaxClock;
u32 marikoEmcMaxClock;
diff --git a/Source/sys-clk-OC/common/include/sysclk/clocks.h b/Source/sys-clk-OC/common/include/sysclk/clocks.h
index e0f6efd9..ee5ed0dc 100644
--- a/Source/sys-clk-OC/common/include/sysclk/clocks.h
+++ b/Source/sys-clk-OC/common/include/sysclk/clocks.h
@@ -66,8 +66,10 @@ typedef enum
typedef struct
{
bool systemCoreBoostCPU;
+ bool gotBoostCPUFreq;
ReverseNXMode reverseNXMode;
uint32_t maxMEMFreq;
+ uint32_t boostCPUFreq;
} SysClkOcExtra;
typedef struct
diff --git a/Source/sys-clk-OC/sysmodule/src/clock_manager.cpp b/Source/sys-clk-OC/sysmodule/src/clock_manager.cpp
index 6756de69..fbe80a3c 100644
--- a/Source/sys-clk-OC/sysmodule/src/clock_manager.cpp
+++ b/Source/sys-clk-OC/sysmodule/src/clock_manager.cpp
@@ -59,8 +59,10 @@ ClockManager::ClockManager()
this->oc = new SysClkOcExtra;
this->oc->systemCoreBoostCPU = false;
+ this->oc->gotBoostCPUFreq = false;
this->oc->reverseNXMode = ReverseNX_NotFound;
this->oc->maxMEMFreq = 0;
+ this->oc->boostCPUFreq = 1785'000'000;
}
ClockManager::~ClockManager()
@@ -73,7 +75,13 @@ ClockManager::~ClockManager()
bool ClockManager::IsCpuBoostMode()
{
std::uint32_t confId = this->context->perfConfId;
- return (confId == 0x92220009 || confId == 0x9222000A);
+ bool isCpuBoostMode = (confId == 0x92220009 || confId == 0x9222000A);
+ if (isCpuBoostMode && !this->oc->gotBoostCPUFreq)
+ {
+ this->oc->gotBoostCPUFreq = true;
+ this->oc->boostCPUFreq = std::max(this->context->freqs[SysClkModule_CPU], this->oc->boostCPUFreq);
+ }
+ return isCpuBoostMode;
}
bool ClockManager::IsReverseNXModeValid()
@@ -158,8 +166,8 @@ uint32_t ClockManager::GetHz(SysClkModule module)
if (module == SysClkModule_CPU)
{
- if (this->oc->systemCoreBoostCPU && hz < CPU_BOOST_FREQ)
- return CPU_BOOST_FREQ;
+ if (this->oc->systemCoreBoostCPU && hz < this->oc->boostCPUFreq)
+ return this->oc->boostCPUFreq;
else if (!hz)
/* Prevent crash when hz = 0 in SetHz(0), trigger RefreshContext() and Tick() */
return 1020'000'000;
@@ -180,8 +188,8 @@ void ClockManager::Tick()
if (hz && hz != this->context->freqs[module])
{
- // Skip setting CPU or GPU clocks in CpuBoostMode if CPU <= 1963.5MHz or GPU >= 76.8MHz
- if (IsCpuBoostMode() && ((module == SysClkModule_CPU && hz <= CPU_BOOST_FREQ) || module == SysClkModule_GPU))
+ // Skip setting CPU or GPU clocks in CpuBoostMode if CPU <= boostCPUFreq or GPU >= 76.8MHz
+ if (IsCpuBoostMode() && ((module == SysClkModule_CPU && hz <= this->oc->boostCPUFreq) || module == SysClkModule_GPU))
{
continue;
}
@@ -204,7 +212,7 @@ void ClockManager::WaitForNextTick()
if ( isAutoBoostEnabled
&& this->context->realProfile != SysClkProfile_Handheld
&& this->context->enabled
- && this->context->freqs[SysClkModule_CPU] <= CPU_BOOST_FREQ)
+ && this->context->freqs[SysClkModule_CPU] <= this->oc->boostCPUFreq)
{
uint64_t systemCoreIdleTickPrev = 0, systemCoreIdleTickNext = 0;
svcGetInfo(&systemCoreIdleTickPrev, InfoType_IdleTickCount, INVALID_HANDLE, 3);
@@ -237,7 +245,7 @@ void ClockManager::WaitForNextTick()
}
else if (!systemCoreBoostCPUPrevState && this->oc->systemCoreBoostCPU)
{
- Clocks::SetHz(SysClkModule_CPU, CPU_BOOST_FREQ);
+ Clocks::SetHz(SysClkModule_CPU, this->oc->boostCPUFreq);
}
}
else
diff --git a/Source/sys-clk-OC/sysmodule/src/clock_manager.h b/Source/sys-clk-OC/sysmodule/src/clock_manager.h
index e8cf9ae1..68556265 100644
--- a/Source/sys-clk-OC/sysmodule/src/clock_manager.h
+++ b/Source/sys-clk-OC/sysmodule/src/clock_manager.h
@@ -47,7 +47,6 @@ class ClockManager
std::uint64_t lastCsvWriteNs;
SysClkOcExtra *oc;
- const uint32_t CPU_BOOST_FREQ = 1785'000'000;
bool IsCpuBoostMode();
bool IsReverseNXModeValid();
diff --git a/ldr_config.py b/ldr_config.py
index 99484e08..6943c47f 100755
--- a/ldr_config.py
+++ b/ldr_config.py
@@ -10,11 +10,15 @@ cust_conf = {
# - Max Clock in kHz:
# Default: 1785000
# >= 2193000 will enable overvolting (> 1120 mV)
+# - Boost Clock in kHz:
+# Default: 1785000
+# Boost clock will be applied when applications request higher CPU frequency for quicker loading.
# - Max Voltage in mV:
# Default voltage: 1120
# Haven't tested anything higher than 1220.
- "marikoCpuMaxClock": 2397000,
- "marikoCpuMaxVolt": 1220,
+ "marikoCpuMaxClock": 2397000,
+ "marikoCpuBoostClock": 1785000,
+ "marikoCpuMaxVolt": 1220,
# Mariko GPU:
# - Max Clock in kHz:
# Default: 921600
@@ -54,23 +58,24 @@ cust_conf = {
}
cust_range = {
- "mtcConf": (0, 3),
- "marikoCpuMaxClock": (1785000, 3000000),
- "marikoCpuMaxVolt": (1100, 1300),
- "marikoGpuMaxClock": (768000, 1536000),
- "marikoEmcMaxClock": (1612800, 2400000),
- "eristaCpuMaxVolt": (1100, 1400),
- "eristaEmcMaxClock": (1600000, 2400000),
- "eristaEmcVolt": (1100000, 1250000)
+ "mtcConf": (0, 3),
+ "marikoCpuMaxClock": (1785000, 3000000),
+ "marikoCpuBoostClock": (1785000, 3000000),
+ "marikoCpuMaxVolt": (1100, 1300),
+ "marikoGpuMaxClock": (768000, 1536000),
+ "marikoEmcMaxClock": (1612800, 2400000),
+ "eristaCpuMaxVolt": (1100, 1400),
+ "eristaEmcMaxClock": (1600000, 2400000),
+ "eristaEmcVolt": (1100000, 1250000)
}
import struct
import argparse
-cust_rev = 1
+cust_rev = 2
cust_head = ["cust", "custRev"]
cust_body = ["mtcConf",
- "marikoCpuMaxClock", "marikoCpuMaxVolt", "marikoGpuMaxClock", "marikoEmcMaxClock",
+ "marikoCpuMaxClock", "marikoCpuBoostClock", "marikoCpuMaxVolt", "marikoGpuMaxClock", "marikoEmcMaxClock",
"eristaCpuOCEnable", "eristaCpuMaxVolt", "eristaEmcMaxClock", "eristaEmcVolt"]
cust_key = [*cust_head, *cust_body]
@@ -92,7 +97,7 @@ def KIPCustParse(file_loc, conf_print=True) -> (int, dict):
raise Exception("\n Invalid kip file!")
file.seek(cust_pos)
- cust_fmt = '<4s2H8I'
+ cust_fmt = '<4s2H9I'
cust_size = struct.calcsize(cust_fmt)
cust_buf = file.read(cust_size)
cust_val = struct.unpack(cust_fmt, cust_buf)
@@ -105,7 +110,7 @@ def KIPCustParse(file_loc, conf_print=True) -> (int, dict):
if conf_print:
print("Configuration from file")
- [print(f"- {i:18s} : {cust_dict[i]:8d}") for i in cust_dict]
+ [print(f"- {i:20s} : {cust_dict[i]:8d}") for i in cust_dict]
return (cust_pos, cust_dict)
@@ -115,7 +120,7 @@ def CustRangeCheck(cust):
for i in cust_range:
val = int(cust[i])
if val and (val < cust_range[i][0] or val > cust_range[i][1]) :
- range_error_str += f"\n- {i:18s} = {val:8d}, Expected range: {[*cust_range[i]]}"
+ range_error_str += f"\n- {i:20s} = {val:8d}, Expected range: {[*cust_range[i]]}"
if range_error_str:
raise ValueError(range_error_str)
@@ -138,7 +143,7 @@ def KIPCustSave(file_loc, cust_pos, cust_dict, range_check=True, cust_to_save={}
if cust_dict[i] != cust_conf[i]:
diff_str = f"-> {cust_conf[i]:8d}"
diff_count += 1
- print(f"- {i:18s} : {cust_dict[i]:8d} {diff_str}")
+ print(f"- {i:20s} : {cust_dict[i]:8d} {diff_str}")
if not diff_count:
print("Cust is identical, abort saving!")