Merge pull request #7 from Lightos1/master

(hopefully) proper Auto_ADJ implementation, timing fixes, slight configuration tweaks, typos, formating.
This commit is contained in:
Souldbminer
2025-10-01 18:18:42 -04:00
committed by GitHub
6 changed files with 774 additions and 1016 deletions

View File

@@ -53,5 +53,5 @@ Run build.bat or cd into folder and run "python -m PyInstaller --onefile --add-d
## Credits
meha for Switch-Oc-Suite<br>
sys-clk team for sys-clk<br>
b0rd2auth for Ultrahand sys-clk fork<br>
Lightos and Sammybigio2010 for early testing<br>
b0rd2death for Ultrahand sys-clk fork<br>
Lightos and Sammybigio2011 for early testing<br>

View File

@@ -105,6 +105,8 @@ volatile CustomizeTable C = {
.commonGpuVoltOffset = 0,
.marikoEmcDvbShift = 0,
.t1_tRCD = 0,
.t2_tRP = 0,
.t3_tRAS = 0,
@@ -114,7 +116,7 @@ volatile CustomizeTable C = {
.t7_tWTR = 0,
.t8_tREFI = 0,
.mem_burst_latency = 2,
.mem_burst_latency = 0,
// NOTE: These tables should NOT BE USED and are only here as placeholders. Always try and find your own optimal tables.
// Ensure the voltages actually increase or stay the sameot

View File

@@ -30,12 +30,7 @@
#include "mtc_timing_table.hpp"
enum MtcConfig: u32 {
AUTO_ADJ_ALL = 0,
CUSTOM_ADJ_ALL = 1,
NO_ADJ_ALL = 2,
CUSTOMIZED_ALL = 4,
AUTO_ADJ = 5,
AUTO_ADJ = 0,
};
using CustomizeCpuDvfsTable = pcv::cvb_entry_t[pcv::DvfsTableEntryLimit];
@@ -49,8 +44,7 @@
typedef struct CustomizeTable {
u8 cust[4] = {'C', 'U', 'S', 'T'};
u32 custRev = CUST_REV;
u32 mtcConfErista = AUTO_ADJ;
u32 mtcConfMariko = AUTO_ADJ_ALL; // TODO: Fix mariko and merge into mtcConf
u32 mtcConf = AUTO_ADJ;
u32 commonCpuBoostClock;
u32 commonEmcMemVolt;
u32 eristaCpuMaxVolt;

View File

@@ -61,178 +61,104 @@
// Burst Length
const u32 BL = 16;
// tRFCpb (refresh cycle time per bank) in ns for 8Gb density
const u32 tRFCpb = !C.t5_tRFC ? 140 : tRFC_values[C.t5_tRFC-1];
// Write Latency
const u32 WL = 14 + C.mem_burst_latency;
// Read Latency
const u32 RL = 32 - C.mem_burst_latency;
// tRFCab (refresh cycle time all banks) in ns for 8Gb density
const u32 tRFCab = !C.t5_tRFC ? 280 : 2*tRFCpb;
// tRFCpb (refresh cycle time per bank) in ns for 8Gb density
const u32 tRFCpb = !C.t5_tRFC ? 140 : tRFC_values[C.t5_tRFC-1];
// tRAS (row active time) in ns
const u32 tRAS = !C.t3_tRAS ? 42 : tRAS_values[C.t3_tRAS-1];
// tRFCab (refresh cycle time all banks) in ns for 8Gb density
const u32 tRFCab = !C.t5_tRFC ? 280 : 2*tRFCpb;
// tRPpb (row precharge time per bank) in ns
const u32 tRPpb = !C.t2_tRP ? 18 : tRP_values[C.t2_tRP-1];
// tRAS (row active time) in ns
const u32 tRAS = !C.t3_tRAS ? 42 : tRAS_values[C.t3_tRAS-1];
// tRPab (row precharge time all banks) in ns
const u32 tRPab = !C.t2_tRP ? 21 : tRPpb + 3;
// tRPpb (row precharge time per bank) in ns
const u32 tRPpb = !C.t2_tRP ? 18 : tRP_values[C.t2_tRP-1];
// tRC (ACTIVATE-ACTIVATE command period same bank) in ns
const u32 tRC = tRPpb + tRAS;
// tRPab (row precharge time all banks) in ns
const u32 tRPab = !C.t2_tRP ? 21 : tRPpb + 3;
const u32 tRTW = !C.t6_tRTW ? 10 : tWTR_values[C.t6_tRTW-1];
// DQS output access time from CK_t/CK_c
const double tDQSCK_min = 1.5; // TODO: Fix/remove for mariko if needed
// DQS output access time from CK_t/CK_c
const double tDQSCK_max = 3.5; // TODO: Fix/remove for mariko if needed
// Write preamble (tCK)
const double tWPRE = 1.8; // TODO: Fix/remove for mariko if needed
// Read postamble (tCK)
const double tRPST = 0.4; // TODO: Fix/remove for mariko if needed
// WRITE command to first DQS transition(max) (tCK)
const double tDQSS_max = 1.25; // TODO: Fix/remove for mariko if needed
// DQ-to-DQS offset(max) (ns)
const double tDQS2DQ_max = 0.8; // TODO: Fix/remove for mariko if needed
// DQS_t, DQS_c to DQ skew total, per group, per access (DBI Disabled)
const double tDQSQ = 0.18; // TODO: Fix/remove for mariko if needed
// tRC (ACTIVATE-ACTIVATE command period same bank) in ns
const u32 tRC = tRPab + tRAS;
// Write-to-Read delay
const u32 tWTR = !C.t7_tWTR ? 10 : tWTR_values[C.t7_tWTR-1];
const u32 tPPD = 4;
// Internal READ-to-PRE-CHARGE command delay in ns
const double tRTP = !TIMING_PRESET_THREE ? 7.5 : tRTP_values[TIMING_PRESET_THREE-1];
const u32 tRTW = !C.t6_tRTW ? 10 : tWTR_values[C.t6_tRTW-1];
// write recovery time
const u32 tWR = !TIMING_PRESET_THREE ? 18 : tWR_values[TIMING_PRESET_THREE-1];
// Write-to-Read delay
const u32 tWTR = !C.t7_tWTR ? 10 : tWTR_values[C.t7_tWTR-1];
// Read to refresh delay
const u32 tR2REF = tRTP + tRPpb;
// Internal READ-to-PRE-CHARGE command delay in ns
const double tRTP = !TIMING_PRESET_THREE ? 7.5 : tRTP_values[TIMING_PRESET_THREE-1];
// tRCD (RAS-CAS delay) in ns
const u32 tRCD = !C.t1_tRCD ? 18 : tRCD_values[C.t1_tRCD-1];
// write recovery time
const u32 tWR = !TIMING_PRESET_THREE ? 18 : tWR_values[TIMING_PRESET_THREE-1];
// tRRD (Active bank-A to Active bank-B) in ns
const double tRRD = !C.t4_tRRD ? 10. : tRRD_values[C.t4_tRRD-1];
// tRCD (RAS-CAS delay) in ns
const u32 tRCD = !C.t1_tRCD ? 18 : tRCD_values[C.t1_tRCD-1];
// tREFpb (average refresh interval per bank) in ns for 8Gb density
const u32 tREFpb = !C.t8_tREFI ? 488 : tREFpb_values[C.t8_tREFI-1];
// tREFab (average refresh interval all 8 banks) in ns for 8Gb density
// const u32 tREFab = tREFpb * 8;
// tRRD (Active bank-A to Active bank-B) in ns
const double tRRD = !C.t4_tRRD ? 10. : tRRD_values[C.t4_tRRD-1];
// tPDEX2WR, tPDEX2RD (timing delay from exiting powerdown mode to a write/read command) in ns
// const u32 tPDEX2 = 10;
// Exit power-down to next valid command delay
const double tXP = 10;
// tREFpb (average refresh interval per bank) in ns for 8Gb density
const u32 tREFpb = !C.t8_tREFI ? 488 : tREFpb_values[C.t8_tREFI-1];
// Delay from valid command to CKE input LOW in ns
const double tCMDCKE = 1.75; // TODO: Fix/remove for mariko if needed
// Exit power-down to next valid command delay
const double tXP = 7.5;
// tACT2PDEN (timing delay from an activate, MRS or EMRS command to power-down entry) in ns
// Valid clock and CS requirement after CKE input LOW after MRW command
const u32 tMRWCKEL = 14; // TODO: Fix/remove for mariko if needed
// tXSR (SELF REFRESH exit to next valid command delay) in ns
const double tXSR = tRFCab + 7.5;
// Valid CS requirement after CKE input LOW
const double tCKELCS = 5; // TODO: Fix/remove for mariko if needed
// Minimum self refresh time (entry to exit)
const u32 tSR = 15;
// Valid CS requirement before CKE input HIGH
const double tCSCKEH = 1.75; // TODO: Fix/remove for mariko if needed
// tFAW (Four-bank Activate Window) in ns
const u32 tFAW = 40;// !TIMING_PRESET_TWO ? 40 : tFAW_values[TIMING_PRESET_TWO-1]; TOGO
// tXSR (SELF REFRESH exit to next valid command delay) in ns
const double tXSR = tRFCab + 7.5;
// #_of_rows per die for 8Gb density
const u32 numOfRows = 131072;
// tCKE (minimum pulse width(HIGH and LOW pulse width)) in ns
const double tCKE = 7.5; // TODO: Fix/remove for mariko if needed
// {REFRESH, REFRESH_LO} = max[(tREF/#_of_rows) / (emc_clk_period) - 64, (tREF/#_of_rows) / (emc_clk_period) * 97%]
// emc_clk_period = dram_clk / 2;
// 1600 MHz: 5894, but N' set to 6176 (~4.8% margin)
const u32 REFRESH = MIN((u32)65472, u32(std::ceil((double(tREFpb) * C.marikoEmcMaxClock / numOfRows * 1.048 / 2 - 64))) / 4 * 4);
const u32 REFBW = MIN((u32)65536, REFRESH+64);
// Minimum self refresh time (entry to exit)
const u32 tSR = 15;
// DQS output access time from CK_t/CK_c
const double tDQSCK_min = 1.5;
const double tDQSCK_max = 3.5;
// Write preamble (tCK)
const double tWPRE = 1.8;
// Read postamble (tCK)
const double tRPST = 0.4;
// tFAW (Four-bank Activate Window) in ns
const u32 tFAW = 40;// !TIMING_PRESET_TWO ? 40 : tFAW_values[TIMING_PRESET_TWO-1]; TOGO
namespace pcv::erista {
// tCK_avg (average clock period) in ns
const double tCK_avg = 1000'000. / C.eristaEmcMaxClock;
// Valid Clock requirement before CKE Input HIGH in ns
const double tCKCKEH = 1.75; // TODO: Fix/remove for mariko if needed
// minimum number of cycles from any read command to any write command, irrespective of bank
const u32 R2W = CEIL (RL + CEIL(tDQSCK_max/tCK_avg) + BL/2 - WL + tWPRE + FLOOR(tRPST)) + 6;
// p78 The first valid data is available RL × t CK + t DQSCK + t DQSQ
//const u32 QUSE = RL + CEIL(tDQSCK_min/tCK_avg + tDQSQ);
// Delay Time From WRITE-to-READ
const u32 W2R = WL + BL/2 + 1 + CEIL(tWTR/tCK_avg) - 6;
namespace pcv::erista {
// tCK_avg (average clock period) in ns
const double tCK_avg = 1000'000. / C.eristaEmcMaxClock;
// write-to-precharge time for commands to the same bank in cycles
const u32 WTP = WL + BL/2 + 1 + CEIL(tWR/tCK_avg) - 8;
}
// Write Latency
const u32 WL = 14 + C.mem_burst_latency;
// Read Latency
const u32 RL = 32 - C.mem_burst_latency;
namespace pcv::mariko {
// tCK_avg (average clock period) in ns
const double tCK_avg = 1000'000. / C.marikoEmcMaxClock;
// minimum number of cycles from any read command to any write command, irrespective of bank
// const u32 R2W = CEIL (RL + CEIL(tDQSCK_max/tCK_avg) + BL/2 - WL + tWPRE + FLOOR(tRPST)) + 6;
const u32 R2W = CEIL (RL + CEIL(tDQSCK_max/tCK_avg) + BL/2 - WL + tWPRE + FLOOR(tRPST)) + 6;
// Delay Time From WRITE-to-READ
// const u32 W2R = WL + BL/2 + 1 + CEIL(tWTR/tCK_avg) - 6;
// Delay Time From WRITE-to-READ
const u32 W2R = WL + BL/2 + 1 + CEIL(tWTR/tCK_avg) - 6;
// write-to-precharge time for commands to the same bank in cycles
// const u32 WTP = WL + BL/2 + 1 + CEIL(tWR/tCK_avg) - 8;
// #_of_rows per die for 16Gb density
const u32 numOfRows = 131072;
// {REFRESH, REFRESH_LO} = max[(tREF/#_of_rows) / (emc_clk_period) - 64, (tREF/#_of_rows) / (emc_clk_period) * 97%]
// emc_clk_period = dram_clk / 2;
// 1600 MHz: 5894, but N' set to 6176 (~4.8% margin)
const u32 REFRESH = MIN((u32)65472, u32(std::ceil((double(tREFpb) * C.eristaEmcMaxClock / numOfRows * 1.048 / 2 - 64))) / 4 * 4);
const u32 REFBW = MIN((u32)65536, REFRESH+64);
// Write With Auto Precharge to to Power-Down Entry
// const u32 WTPDEN = WTP + 1 + CEIL(tDQSS_max/tCK_avg) + CEIL(tDQS2DQ_max/tCK_avg) + 6;
// Additional time after t XP hasexpired until the MRR commandmay be issued
// const double tMRRI = tRCD + 3 * tCK_avg;
// tPDEX2MRR (timing delay from exiting powerdown mode to MRR command) in ns
// const double tPDEX2MRR = tXP + tMRRI;
}
namespace pcv::mariko {
// tCK_avg (average clock period) in ns
const double tCK_avg = 1000'000. / C.marikoEmcMaxClock;
// Write Latency
const u32 WL = 14 + C.mem_burst_latency;
// Read Latency
const u32 RL = 32 - C.mem_burst_latency;
// minimum number of cycles from any read command to any write command, irrespective of bank
const u32 R2W = WL + BL/2 + 1 + CEIL(tRTW/tCK_avg);
// Delay Time From WRITE-to-READ
const u32 W2R = WL + BL/2 + 1 + CEIL(tWTR/tCK_avg);
// write-to-precharge time for commands to the same bank in cycles
const u32 WTP = WL + BL/2 + 1 + CEIL(tWR/tCK_avg);
// Read-To-MRW delay
const u32 RTM = RL + BL/2 + CEIL(tDQSCK_max/tCK_avg) + FLOOR(tRPST) + CEIL(7.5/tCK_avg);
// Write-To-MRW/MRR delay
const u32 WTM = WL + 1 + BL/2 + CEIL(7.5/tCK_avg);
// Read With AP-To-MRW/MRR delay
const u32 RATM = RTM + CEIL(tRTP/tCK_avg) - 8;
// Write With AP-To-MRW/MRR delay
const u32 WATM = WTM + CEIL(tWR/tCK_avg);
// #_of_rows per die for 8Gb density
const u32 numOfRows = 65536;
// {REFRESH, REFRESH_LO} = max[(tREF/#_of_rows) / (emc_clk_period) - 64, (tREF/#_of_rows) / (emc_clk_period) * 97%]
// emc_clk_period = dram_clk / 2;
// 1600 MHz: 5894, but N' set to 6176 (~4.8% margin)
const u32 REFRESH = MIN((u32)65472, u32(std::ceil((double(tREFpb) * C.marikoEmcMaxClock / numOfRows * 1.048 / 2 - 64))) / 4 * 4);
const u32 REFBW = MIN((u32)65536, REFRESH+64);
// Write With Auto Precharge to to Power-Down Entry
const u32 WTPDEN = WTP + 1 + CEIL(tDQSS_max/tCK_avg) + CEIL(tDQS2DQ_max/tCK_avg) + 6;
// Additional time after t XP hasexpired until the MRR commandmay be issued
const double tMRRI = tRCD + 3 * tCK_avg;
// tPDEX2MRR (timing delay from exiting powerdown mode to MRR command) in ns
const double tPDEX2MRR = tXP + tMRRI;
}
}
// write-to-precharge time for commands to the same bank in cycles
const u32 WTP = WL + BL/2 + 1 + CEIL(tWR/tCK_avg) - 8;
}
}

View File

@@ -21,271 +21,274 @@
#include "pcv.hpp"
#include "../mtc_timing_value.hpp"
namespace ams::ldr::oc::pcv::erista {
Result CpuFreqVdd(u32* ptr) {
dvfs_rail* entry = reinterpret_cast<dvfs_rail *>(reinterpret_cast<u8 *>(ptr) - offsetof(dvfs_rail, freq));
namespace ams::ldr::oc::pcv::erista {
Result CpuFreqVdd(u32* ptr) {
dvfs_rail* entry = reinterpret_cast<dvfs_rail *>(reinterpret_cast<u8 *>(ptr) - offsetof(dvfs_rail, freq));
R_UNLESS(entry->id == 1, ldr::ResultInvalidCpuFreqVddEntry());
R_UNLESS(entry->min_mv == 250'000, ldr::ResultInvalidCpuFreqVddEntry());
R_UNLESS(entry->step_mv == 5000, ldr::ResultInvalidCpuFreqVddEntry());
R_UNLESS(entry->max_mv == 1525'000, ldr::ResultInvalidCpuFreqVddEntry());
R_UNLESS(entry->id == 1, ldr::ResultInvalidCpuFreqVddEntry());
R_UNLESS(entry->min_mv == 250'000, ldr::ResultInvalidCpuFreqVddEntry());
R_UNLESS(entry->step_mv == 5000, ldr::ResultInvalidCpuFreqVddEntry());
R_UNLESS(entry->max_mv == 1525'000, ldr::ResultInvalidCpuFreqVddEntry());
if (C.eristaCpuUV) {
if(!C.enableEristaCpuUnsafeFreqs) {
PATCH_OFFSET(ptr, GetDvfsTableLastEntry(C.eristaCpuDvfsTable)->freq);
} else {
PATCH_OFFSET(ptr, GetDvfsTableLastEntry(C.eristaCpuDvfsTableUnsafeFreqs)->freq);
}
} else {
if (C.eristaCpuUV) {
if(!C.enableEristaCpuUnsafeFreqs) {
PATCH_OFFSET(ptr, GetDvfsTableLastEntry(C.eristaCpuDvfsTable)->freq);
} else {
PATCH_OFFSET(ptr, GetDvfsTableLastEntry(C.eristaCpuDvfsTableUnsafeFreqs)->freq);
}
} else {
PATCH_OFFSET(ptr, GetDvfsTableLastEntry(C.eristaCpuDvfsTable)->freq);
}
R_SUCCEED();
}
Result GpuVmin(u32 *ptr) {
if (!C.eristaGpuVmin)
R_SKIP();
PATCH_OFFSET(ptr, (int)C.eristaGpuVmin);
R_SUCCEED();
}
Result GpuVmin(u32 *ptr) {
if (!C.eristaGpuVmin)
R_SKIP();
PATCH_OFFSET(ptr, (int)C.eristaGpuVmin);
}
Result CpuVoltRange(u32 *ptr) {
u32 min_volt_got = *(ptr - 1);
for (const auto &mv : CpuMinVolts) {
if (min_volt_got != mv)
continue;
if (!C.eristaCpuMaxVolt)
R_SKIP();
PATCH_OFFSET(ptr, C.eristaCpuMaxVolt);
R_SUCCEED();
}
R_THROW(ldr::ResultInvalidCpuMinVolt());
}
Result CpuVoltRange(u32 *ptr) {
u32 min_volt_got = *(ptr - 1);
for (const auto &mv : CpuMinVolts) {
if (min_volt_got != mv)
continue;
Result CpuVoltDfll(u32* ptr) {
cvb_cpu_dfll_data *entry = reinterpret_cast<cvb_cpu_dfll_data *>(ptr);
if (!C.eristaCpuMaxVolt)
R_SKIP();
PATCH_OFFSET(ptr, C.eristaCpuMaxVolt);
R_SUCCEED();
}
R_THROW(ldr::ResultInvalidCpuMinVolt());
}
Result CpuVoltDfll(u32* ptr) {
cvb_cpu_dfll_data *entry = reinterpret_cast<cvb_cpu_dfll_data *>(ptr);
// R_UNLESS(entry->tune0_low == 0x0000FFCF, ldr::ResultInvalidCpuVoltDfllEntry());
// R_UNLESS(entry->tune0_low == 0x0000FFCF, ldr::ResultInvalidCpuVoltDfllEntry());
// R_UNLESS(entry->tune0_high == 0x00000000, ldr::ResultInvalidCpuVoltDfllEntry());
// R_UNLESS(entry->tune1_low == 0x012207FF, ldr::ResultInvalidCpuVoltDfllEntry());
// R_UNLESS(entry->tune1_high == 0x03FFF7FF, ldr::ResultInvalidCpuVoltDfllEntry());
if(!C.eristaCpuUV) {
R_SKIP();
}
PATCH_OFFSET(&(entry->dvco_calibration_max), 0x1C);
PATCH_OFFSET(&(entry->tune1_high), 0x10);
PATCH_OFFSET(&(entry->tune_high_margin_millivolts), 0xc);
if(!C.eristaCpuUV) {
R_SKIP();
}
PATCH_OFFSET(&(entry->dvco_calibration_max), 0x1C);
PATCH_OFFSET(&(entry->tune1_high), 0x10);
PATCH_OFFSET(&(entry->tune_high_margin_millivolts), 0xc);
switch(C.eristaCpuUV) {
case 1:
PATCH_OFFSET(&(entry->tune0_low), 0x0000FFFF); //process_id 0 // EOS UV1
PATCH_OFFSET(&(entry->tune1_low), 0x027007FF);
break;
case 2:
PATCH_OFFSET(&(entry->tune0_low), 0x0000EFFF); //process_id 1 // EOS Uv2
PATCH_OFFSET(&(entry->tune1_low), 0x027407FF);
break;
case 3:
PATCH_OFFSET(&(entry->tune0_low), 0x0000DFFF); //process_id 0 // EOS UV3
PATCH_OFFSET(&(entry->tune1_low), 0x027807FF);
break;
case 4:
PATCH_OFFSET(&(entry->tune0_low), 0x0000DFDF); //process_id 1 // EOS Uv4
PATCH_OFFSET(&(entry->tune1_low), 0x027A07FF);
break;
case 5:
PATCH_OFFSET(&(entry->tune0_low), 0x0000CFDF); // EOS UV5
PATCH_OFFSET(&(entry->tune1_low), 0x037007FF);
break;
default:
break;
switch(C.eristaCpuUV) {
case 1:
PATCH_OFFSET(&(entry->tune0_low), 0x0000FFFF); //process_id 0 // EOS UV1
PATCH_OFFSET(&(entry->tune1_low), 0x027007FF);
break;
case 2:
PATCH_OFFSET(&(entry->tune0_low), 0x0000EFFF); //process_id 1 // EOS Uv2
PATCH_OFFSET(&(entry->tune1_low), 0x027407FF);
break;
case 3:
PATCH_OFFSET(&(entry->tune0_low), 0x0000DFFF); //process_id 0 // EOS UV3
PATCH_OFFSET(&(entry->tune1_low), 0x027807FF);
break;
case 4:
PATCH_OFFSET(&(entry->tune0_low), 0x0000DFDF); //process_id 1 // EOS Uv4
PATCH_OFFSET(&(entry->tune1_low), 0x027A07FF);
break;
case 5:
PATCH_OFFSET(&(entry->tune0_low), 0x0000CFDF); // EOS UV5
PATCH_OFFSET(&(entry->tune1_low), 0x037007FF);
break;
default:
break;
}
R_SUCCEED();
}
Result GpuFreqMaxAsm(u32 *ptr32) {
// Check if both two instructions match the pattern
u32 ins1 = *ptr32, ins2 = *(ptr32 + 1);
if (!(asm_compare_no_rd(ins1, asm_pattern[0]) && asm_compare_no_rd(ins2, asm_pattern[1])))
R_THROW(ldr::ResultInvalidGpuFreqMaxPattern());
// Both instructions should operate on the same register
u8 rd = asm_get_rd(ins1);
if (rd != asm_get_rd(ins2))
R_THROW(ldr::ResultInvalidGpuFreqMaxPattern());
u32 max_clock;
switch (C.eristaGpuUV) {
case 0:
max_clock = GetDvfsTableLastEntry(C.eristaGpuDvfsTable)->freq;
break;
case 1:
max_clock = GetDvfsTableLastEntry(C.eristaGpuDvfsTableSLT)->freq;
break;
case 2:
max_clock = GetDvfsTableLastEntry(C.eristaGpuDvfsTableHigh)->freq;
break;
case 3:
if(C.enableEristaGpuUnsafeFreqs) {
max_clock = GetDvfsTableLastEntry(C.eristaGpuDvfsTableUv3UnsafeFreqs)->freq;
} else {
max_clock = GetDvfsTableLastEntry(C.eristaGpuDvfsTable)->freq;
}
break;
default:
max_clock = GetDvfsTableLastEntry(C.eristaGpuDvfsTable)->freq;
break;
}
u32 asm_patch[2] = {
asm_set_rd(asm_set_imm16(asm_pattern[0], max_clock), rd),
asm_set_rd(asm_set_imm16(asm_pattern[1], max_clock >> 16), rd)};
PATCH_OFFSET(ptr32, asm_patch[0]);
PATCH_OFFSET(ptr32 + 1, asm_patch[1]);
R_SUCCEED();
}
Result GpuFreqMaxAsm(u32 *ptr32) {
// Check if both two instructions match the pattern
u32 ins1 = *ptr32, ins2 = *(ptr32 + 1);
if (!(asm_compare_no_rd(ins1, asm_pattern[0]) && asm_compare_no_rd(ins2, asm_pattern[1])))
R_THROW(ldr::ResultInvalidGpuFreqMaxPattern());
Result GpuFreqPllLimit(u32 *ptr) {
clk_pll_param *entry = reinterpret_cast<clk_pll_param *>(ptr);
// Both instructions should operate on the same register
u8 rd = asm_get_rd(ins1);
if (rd != asm_get_rd(ins2))
R_THROW(ldr::ResultInvalidGpuFreqMaxPattern());
// All zero except for freq
for (size_t i = 1; i < sizeof(clk_pll_param) / sizeof(u32); i++) {
R_UNLESS(*(ptr + i) == 0, ldr::ResultInvalidGpuPllEntry());
}
u32 max_clock;
switch (C.eristaGpuUV) {
case 0:
max_clock = GetDvfsTableLastEntry(C.eristaGpuDvfsTable)->freq;
break;
case 1:
max_clock = GetDvfsTableLastEntry(C.eristaGpuDvfsTableSLT)->freq;
break;
case 2:
max_clock = GetDvfsTableLastEntry(C.eristaGpuDvfsTableHigh)->freq;
break;
case 3:
if(C.enableEristaGpuUnsafeFreqs) {
max_clock = GetDvfsTableLastEntry(C.eristaGpuDvfsTableUv3UnsafeFreqs)->freq;
} else {
max_clock = GetDvfsTableLastEntry(C.eristaGpuDvfsTable)->freq;
}
break;
default:
max_clock = GetDvfsTableLastEntry(C.eristaGpuDvfsTable)->freq;
break;
}
u32 asm_patch[2] = {
asm_set_rd(asm_set_imm16(asm_pattern[0], max_clock), rd),
asm_set_rd(asm_set_imm16(asm_pattern[1], max_clock >> 16), rd)};
PATCH_OFFSET(ptr32, asm_patch[0]);
PATCH_OFFSET(ptr32 + 1, asm_patch[1]);
// Double the max clk simply
u32 max_clk = entry->freq * 2;
entry->freq = max_clk;
R_SUCCEED();
}
R_SUCCEED();
}
void MemMtcTableAutoAdjust(EristaMtcTable *table) {
if (C.mtcConf != AUTO_ADJ)
return;
Result GpuFreqPllLimit(u32 *ptr) {
clk_pll_param *entry = reinterpret_cast<clk_pll_param *>(ptr);
#define WRITE_PARAM_ALL_REG(TABLE, PARAM, VALUE) \
TABLE->burst_regs.PARAM = VALUE; \
TABLE->shadow_regs_ca_train.PARAM = VALUE; \
TABLE->shadow_regs_quse_train.PARAM = VALUE; \
TABLE->shadow_regs_rdwr_train.PARAM = VALUE;
// All zero except for freq
for (size_t i = 1; i < sizeof(clk_pll_param) / sizeof(u32); i++) {
R_UNLESS(*(ptr + i) == 0, ldr::ResultInvalidGpuPllEntry());
}
#define GET_CYCLE_CEIL(PARAM) u32(CEIL(double(PARAM) / tCK_avg))
// Double the max clk simply
u32 max_clk = entry->freq * 2;
entry->freq = max_clk;
R_SUCCEED();
}
WRITE_PARAM_ALL_REG(table, emc_rc, GET_CYCLE_CEIL(tRC));
WRITE_PARAM_ALL_REG(table, emc_rfc, GET_CYCLE_CEIL(tRFCab));
WRITE_PARAM_ALL_REG(table, emc_rfcpb, GET_CYCLE_CEIL(tRFCpb));
WRITE_PARAM_ALL_REG(table, emc_ras, GET_CYCLE_CEIL(tRAS));
WRITE_PARAM_ALL_REG(table, emc_rp, GET_CYCLE_CEIL(tRPpb));
WRITE_PARAM_ALL_REG(table, emc_r2p, GET_CYCLE_CEIL(tRTP));
WRITE_PARAM_ALL_REG(table, emc_r2w, R2W);
WRITE_PARAM_ALL_REG(table, emc_w2r, W2R);
WRITE_PARAM_ALL_REG(table, emc_w2p, WTP);
WRITE_PARAM_ALL_REG(table, emc_rd_rcd, GET_CYCLE_CEIL(tRCD));
WRITE_PARAM_ALL_REG(table, emc_wr_rcd, GET_CYCLE_CEIL(tRCD));
WRITE_PARAM_ALL_REG(table, emc_rrd, GET_CYCLE_CEIL(tRRD));
WRITE_PARAM_ALL_REG(table, emc_refresh, REFRESH);
WRITE_PARAM_ALL_REG(table, emc_pre_refresh_req_cnt, REFRESH / 4);
WRITE_PARAM_ALL_REG(table, emc_pdex2wr, GET_CYCLE_CEIL(tXP));
WRITE_PARAM_ALL_REG(table, emc_pdex2rd, GET_CYCLE_CEIL(tXP));
WRITE_PARAM_ALL_REG(table, emc_txsr, MIN(GET_CYCLE_CEIL(tXSR), (u32)0x3fe));
WRITE_PARAM_ALL_REG(table, emc_txsrdll, MIN(GET_CYCLE_CEIL(tXSR), (u32)0x3fe));
WRITE_PARAM_ALL_REG(table, emc_tckesr, GET_CYCLE_CEIL(tSR));
WRITE_PARAM_ALL_REG(table, emc_tfaw, GET_CYCLE_CEIL(tFAW));
WRITE_PARAM_ALL_REG(table, emc_trpab, GET_CYCLE_CEIL(tRPab));
WRITE_PARAM_ALL_REG(table, emc_trefbw, REFBW);
void MemMtcTableAutoAdjust(EristaMtcTable *table) {
if (C.mtcConfErista != AUTO_ADJ)
return;
#define WRITE_PARAM_BURST_MC_REG(TABLE, PARAM, VALUE) TABLE->burst_mc_regs.PARAM = VALUE;
#define WRITE_PARAM_ALL_REG(TABLE, PARAM, VALUE) \
TABLE->burst_regs.PARAM = VALUE; \
TABLE->shadow_regs_ca_train.PARAM = VALUE; \
TABLE->shadow_regs_quse_train.PARAM = VALUE; \
TABLE->shadow_regs_rdwr_train.PARAM = VALUE;
constexpr u32 MC_ARB_DIV = 4;
constexpr u32 MC_ARB_SFA = 2;
table->burst_mc_regs.mc_emem_arb_timing_rcd = CEIL(GET_CYCLE_CEIL(tRCD) / MC_ARB_DIV) - 2;
table->burst_mc_regs.mc_emem_arb_timing_rp = CEIL(GET_CYCLE_CEIL(tRPpb) / MC_ARB_DIV) - 1 + MC_ARB_SFA;
table->burst_mc_regs.mc_emem_arb_timing_rc = CEIL(GET_CYCLE_CEIL(tRC) / MC_ARB_DIV) - 1;
table->burst_mc_regs.mc_emem_arb_timing_ras = CEIL(GET_CYCLE_CEIL(tRAS) / MC_ARB_DIV) - 2;
table->burst_mc_regs.mc_emem_arb_timing_faw = CEIL(GET_CYCLE_CEIL(tFAW) / MC_ARB_DIV) - 1;
table->burst_mc_regs.mc_emem_arb_timing_rrd = CEIL(GET_CYCLE_CEIL(tRRD) / MC_ARB_DIV) - 1;
//table->burst_mc_regs.mc_emem_arb_timing_rap2pre = CEIL(GET_CYCLE_CEIL(tRTP) / MC_ARB_DIV);
//table->burst_mc_regs.mc_emem_arb_timing_wap2pre = CEIL(WTP / MC_ARB_DIV);
// table->burst_mc_regs.mc_emem_arb_timing_r2r = CEIL(table->burst_regs.emc_rext / MC_ARB_DIV) - 1 + MC_ARB_SFA;
// table->burst_mc_regs.mc_emem_arb_timing_w2w = CEIL(table->burst_regs.emc_wext / MC_ARB_DIV) - 1 + MC_ARB_SFA;
// table->burst_mc_regs.mc_emem_arb_timing_r2w = CEIL(R2W / MC_ARB_DIV) - 1 + MC_ARB_SFA;
// table->burst_mc_regs.mc_emem_arb_timing_w2r = CEIL(W2R / MC_ARB_DIV) - 1 + MC_ARB_SFA;
table->burst_mc_regs.mc_emem_arb_timing_rfcpb = CEIL(GET_CYCLE_CEIL(tRFCpb) / MC_ARB_DIV);
// table->burst_mc_regs.mc_emem_arb_timing_ccdmw = CEIL(tCCDMW / MC_ARB_DIV) -1 + MC_ARB_SFA;
}
#define GET_CYCLE_CEIL(PARAM) u32(CEIL(double(PARAM) / tCK_avg))
Result MemFreqMtcTable(u32 *ptr) {
u32 khz_list[] = {1600000, 1331200, 1065600, 800000, 665600, 408000, 204000, 102000, 68000, 40800};
u32 khz_list_size = sizeof(khz_list) / sizeof(u32);
WRITE_PARAM_ALL_REG(table, emc_rc, GET_CYCLE_CEIL(tRC));
WRITE_PARAM_ALL_REG(table, emc_rfc, GET_CYCLE_CEIL(tRFCab));
WRITE_PARAM_ALL_REG(table, emc_rfcpb, GET_CYCLE_CEIL(tRFCpb));
WRITE_PARAM_ALL_REG(table, emc_ras, GET_CYCLE_CEIL(tRAS));
WRITE_PARAM_ALL_REG(table, emc_rp, GET_CYCLE_CEIL(tRPpb));
WRITE_PARAM_ALL_REG(table, emc_r2p, GET_CYCLE_CEIL(tRTP));
WRITE_PARAM_ALL_REG(table, emc_rd_rcd, GET_CYCLE_CEIL(tRCD));
WRITE_PARAM_ALL_REG(table, emc_wr_rcd, GET_CYCLE_CEIL(tRCD));
WRITE_PARAM_ALL_REG(table, emc_rrd, GET_CYCLE_CEIL(tRRD));
WRITE_PARAM_ALL_REG(table, emc_refresh, REFRESH);
WRITE_PARAM_ALL_REG(table, emc_pre_refresh_req_cnt, REFRESH / 4);
WRITE_PARAM_ALL_REG(table, emc_pdex2wr, GET_CYCLE_CEIL(tXP));
WRITE_PARAM_ALL_REG(table, emc_pdex2rd, GET_CYCLE_CEIL(tXP));
WRITE_PARAM_ALL_REG(table, emc_txsr, MIN(GET_CYCLE_CEIL(tXSR), (u32)0x3fe));
WRITE_PARAM_ALL_REG(table, emc_txsrdll, MIN(GET_CYCLE_CEIL(tXSR), (u32)0x3fe));
WRITE_PARAM_ALL_REG(table, emc_tckesr, GET_CYCLE_CEIL(tSR));
WRITE_PARAM_ALL_REG(table, emc_tfaw, GET_CYCLE_CEIL(tFAW));
WRITE_PARAM_ALL_REG(table, emc_trpab, GET_CYCLE_CEIL(tRPab));
WRITE_PARAM_ALL_REG(table, emc_trefbw, REFBW);
// Generate list for mtc table pointers
EristaMtcTable *table_list[khz_list_size];
for (u32 i = 0; i < khz_list_size; i++) {
u8 *table = reinterpret_cast<u8 *>(ptr) - offsetof(EristaMtcTable, rate_khz) - i * sizeof(EristaMtcTable);
table_list[i] = reinterpret_cast<EristaMtcTable *>(table);
R_UNLESS(table_list[i]->rate_khz == khz_list[i], ldr::ResultInvalidMtcTable());
R_UNLESS(table_list[i]->rev == MTC_TABLE_REV, ldr::ResultInvalidMtcTable());
}
#define WRITE_PARAM_BURST_MC_REG(TABLE, PARAM, VALUE) TABLE->burst_mc_regs.PARAM = VALUE;
if (C.eristaEmcMaxClock <= EmcClkOSLimit)
R_SKIP();
constexpr u32 MC_ARB_DIV = 4;
constexpr u32 MC_ARB_SFA = 2;
table->burst_mc_regs.mc_emem_arb_timing_rcd = CEIL(GET_CYCLE_CEIL(tRCD) / MC_ARB_DIV) - 2;
table->burst_mc_regs.mc_emem_arb_timing_rp = CEIL(GET_CYCLE_CEIL(tRPpb) / MC_ARB_DIV) - 1 + MC_ARB_SFA;
table->burst_mc_regs.mc_emem_arb_timing_rc = CEIL(GET_CYCLE_CEIL(tRC) / MC_ARB_DIV) - 1;
table->burst_mc_regs.mc_emem_arb_timing_ras = CEIL(GET_CYCLE_CEIL(tRAS) / MC_ARB_DIV) - 2;
table->burst_mc_regs.mc_emem_arb_timing_faw = CEIL(GET_CYCLE_CEIL(tFAW) / MC_ARB_DIV) - 1;
table->burst_mc_regs.mc_emem_arb_timing_rrd = CEIL(GET_CYCLE_CEIL(tRRD) / MC_ARB_DIV) - 1;
//table->burst_mc_regs.mc_emem_arb_timing_rap2pre = CEIL(GET_CYCLE_CEIL(tRTP) / MC_ARB_DIV);
//table->burst_mc_regs.mc_emem_arb_timing_wap2pre = CEIL(WTP / MC_ARB_DIV);
// table->burst_mc_regs.mc_emem_arb_timing_r2r = CEIL(table->burst_regs.emc_rext / MC_ARB_DIV) - 1 + MC_ARB_SFA;
// table->burst_mc_regs.mc_emem_arb_timing_w2w = CEIL(table->burst_regs.emc_wext / MC_ARB_DIV) - 1 + MC_ARB_SFA;
// table->burst_mc_regs.mc_emem_arb_timing_r2w = CEIL(R2W / MC_ARB_DIV) - 1 + MC_ARB_SFA;
// table->burst_mc_regs.mc_emem_arb_timing_w2r = CEIL(W2R / MC_ARB_DIV) - 1 + MC_ARB_SFA;
table->burst_mc_regs.mc_emem_arb_timing_rfcpb = CEIL(GET_CYCLE_CEIL(tRFCpb) / MC_ARB_DIV);
// table->burst_mc_regs.mc_emem_arb_timing_ccdmw = CEIL(tCCDMW / MC_ARB_DIV) -1 + MC_ARB_SFA;
}
// Make room for new mtc table, discarding useless 40.8 MHz table
// 40800 overwritten by 68000, ..., 1331200 overwritten by 1600000, leaving table_list[0] not overwritten
for (u32 i = khz_list_size - 1; i > 0; i--)
std::memcpy(static_cast<void *>(table_list[i]), static_cast<void *>(table_list[i - 1]), sizeof(EristaMtcTable));
Result MemFreqMtcTable(u32 *ptr) {
u32 khz_list[] = {1600000, 1331200, 1065600, 800000, 665600, 408000, 204000, 102000, 68000, 40800};
u32 khz_list_size = sizeof(khz_list) / sizeof(u32);
MemMtcTableAutoAdjust(table_list[0]);
PATCH_OFFSET(ptr, C.eristaEmcMaxClock);
// Generate list for mtc table pointers
EristaMtcTable *table_list[khz_list_size];
for (u32 i = 0; i < khz_list_size; i++) {
u8 *table = reinterpret_cast<u8 *>(ptr) - offsetof(EristaMtcTable, rate_khz) - i * sizeof(EristaMtcTable);
table_list[i] = reinterpret_cast<EristaMtcTable *>(table);
R_UNLESS(table_list[i]->rate_khz == khz_list[i], ldr::ResultInvalidMtcTable());
R_UNLESS(table_list[i]->rev == MTC_TABLE_REV, ldr::ResultInvalidMtcTable());
}
// Handle customize table replacement
// if (C.mtcConf == CUSTOMIZED_ALL) {
// MemMtcCustomizeTable(table_list[0], const_cast<EristaMtcTable *>(C.eristaMtcTable));
//}
if (C.eristaEmcMaxClock <= EmcClkOSLimit)
R_SKIP();
R_SUCCEED();
}
// Make room for new mtc table, discarding useless 40.8 MHz table
// 40800 overwritten by 68000, ..., 1331200 overwritten by 1600000, leaving table_list[0] not overwritten
for (u32 i = khz_list_size - 1; i > 0; i--)
std::memcpy(static_cast<void *>(table_list[i]), static_cast<void *>(table_list[i - 1]), sizeof(EristaMtcTable));
Result MemFreqMax(u32 *ptr) {
if (C.eristaEmcMaxClock <= EmcClkOSLimit)
R_SKIP();
MemMtcTableAutoAdjust(table_list[0]);
PATCH_OFFSET(ptr, C.eristaEmcMaxClock);
PATCH_OFFSET(ptr, C.eristaEmcMaxClock);
// Handle customize table replacement
// if (C.mtcConf == CUSTOMIZED_ALL) {
// MemMtcCustomizeTable(table_list[0], const_cast<EristaMtcTable *>(C.eristaMtcTable));
//}
R_SUCCEED();
}
R_SUCCEED();
}
void Patch(uintptr_t mapped_nso, size_t nso_size) {
u32 CpuCvbDefaultMaxFreq = static_cast<u32>(GetDvfsTableLastEntry(CpuCvbTableDefault)->freq);
u32 GpuCvbDefaultMaxFreq = static_cast<u32>(GetDvfsTableLastEntry(GpuCvbTableDefault)->freq);
Result MemFreqMax(u32 *ptr) {
if (C.eristaEmcMaxClock <= EmcClkOSLimit)
R_SKIP();
PatcherEntry<u32> patches[] = {
{"CPU Freq Vdd", &CpuFreqVdd, 1, nullptr, CpuClkOSLimit },
{"CPU Freq Table", CpuFreqCvbTable<false>, 1, nullptr, CpuCvbDefaultMaxFreq},
{"CPU Volt Limit", &CpuVoltRange, 13, nullptr, CpuVoltOfficial },
{"CPU Volt Dfll", &CpuVoltDfll, 1, nullptr, 0xFFEAD0FF },
{"GPU Freq Table", GpuFreqCvbTable<false>, 1, nullptr, GpuCvbDefaultMaxFreq},
{"GPU Freq Asm", &GpuFreqMaxAsm, 2, &GpuMaxClockPatternFn},
{"GPU Freq PLL", &GpuFreqPllLimit, 1, nullptr, GpuClkPllLimit},
{"MEM Freq Mtc", &MemFreqMtcTable, 0, nullptr, EmcClkOSLimit},
{"MEM Freq Max", &MemFreqMax, 0, nullptr, EmcClkOSLimit},
{"MEM Freq PLLM", &MemFreqPllmLimit, 2, nullptr, EmcClkPllmLimit},
{"MEM Volt", &MemVoltHandler, 2, nullptr, MemVoltHOS},
{"GPU Vmin", &GpuVmin, 0, nullptr, gpuVmin},
};
PATCH_OFFSET(ptr, C.eristaEmcMaxClock);
for (uintptr_t ptr = mapped_nso;
ptr <= mapped_nso + nso_size - sizeof(EristaMtcTable);
ptr += sizeof(u32)) {
u32 *ptr32 = reinterpret_cast<u32 *>(ptr);
for (auto &entry : patches) {
if (R_SUCCEEDED(entry.SearchAndApply(ptr32)))
break;
}
}
R_SUCCEED();
}
for (auto &entry : patches) {
LOGGING("%s Count: %zu", entry.description, entry.patched_count);
if (R_FAILED(entry.CheckResult()))
CRASH(entry.description);
}
}
void Patch(uintptr_t mapped_nso, size_t nso_size) {
u32 CpuCvbDefaultMaxFreq = static_cast<u32>(GetDvfsTableLastEntry(CpuCvbTableDefault)->freq);
u32 GpuCvbDefaultMaxFreq = static_cast<u32>(GetDvfsTableLastEntry(GpuCvbTableDefault)->freq);
PatcherEntry<u32> patches[] = {
{"CPU Freq Vdd", &CpuFreqVdd, 1, nullptr, CpuClkOSLimit },
{"CPU Freq Table", CpuFreqCvbTable<false>, 1, nullptr, CpuCvbDefaultMaxFreq},
{"CPU Volt Limit", &CpuVoltRange, 13, nullptr, CpuVoltOfficial },
{"CPU Volt Dfll", &CpuVoltDfll, 1, nullptr, 0xFFEAD0FF },
{"GPU Freq Table", GpuFreqCvbTable<false>, 1, nullptr, GpuCvbDefaultMaxFreq},
{"GPU Freq Asm", &GpuFreqMaxAsm, 2, &GpuMaxClockPatternFn},
{"GPU Freq PLL", &GpuFreqPllLimit, 1, nullptr, GpuClkPllLimit},
{"MEM Freq Mtc", &MemFreqMtcTable, 0, nullptr, EmcClkOSLimit},
{"MEM Freq Max", &MemFreqMax, 0, nullptr, EmcClkOSLimit},
{"MEM Freq PLLM", &MemFreqPllmLimit, 2, nullptr, EmcClkPllmLimit},
{"MEM Volt", &MemVoltHandler, 2, nullptr, MemVoltHOS},
{"GPU Vmin", &GpuVmin, 0, nullptr, gpuVmin},
};
for (uintptr_t ptr = mapped_nso;
ptr <= mapped_nso + nso_size - sizeof(EristaMtcTable);
ptr += sizeof(u32)) {
u32 *ptr32 = reinterpret_cast<u32 *>(ptr);
for (auto &entry : patches) {
if (R_SUCCEEDED(entry.SearchAndApply(ptr32)))
break;
}
}
for (auto &entry : patches) {
LOGGING("%s Count: %zu", entry.description, entry.patched_count);
if (R_FAILED(entry.CheckResult()))
CRASH(entry.description);
}
}
}
}