From 2778d29c430b2af484678f4f208bee5b40d6a89d Mon Sep 17 00:00:00 2001
From: KazushiM <85604869+KazushiMe@users.noreply.github.com>
Date: Fri, 21 Jan 2022 01:43:47 +0800
Subject: [PATCH] [WIP] Auto patching without finding offsets manually,
including EMC OC offsets for Erista
---
.../loader/source/ldr_oc_patch.hpp | 1651 +++++++++--------
.../loader/source/ldr_patcher.cpp | 232 ---
.../loader/source/ldr_process_creation.cpp | 783 ++++++++
3 files changed, 1683 insertions(+), 983 deletions(-)
delete mode 100644 Source/Atmosphere/stratosphere/loader/source/ldr_patcher.cpp
create mode 100644 Source/Atmosphere/stratosphere/loader/source/ldr_process_creation.cpp
diff --git a/Source/Atmosphere/stratosphere/loader/source/ldr_oc_patch.hpp b/Source/Atmosphere/stratosphere/loader/source/ldr_oc_patch.hpp
index 7c097e93..de8f7947 100644
--- a/Source/Atmosphere/stratosphere/loader/source/ldr_oc_patch.hpp
+++ b/Source/Atmosphere/stratosphere/loader/source/ldr_oc_patch.hpp
@@ -1,800 +1,949 @@
+/*
+ * Copyright (C) Switch-OC-Suite
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see .
+ */
+
//#define EXPERIMENTAL
-constexpr ro::ModuleId PcvModuleId[] = {
- // ParseModuleId("91D61D59D7002378E35584FC0B38C7693A3ABAB5"), //11.0.0
- // ParseModuleId("C503E96550F302E121873136B814A529863D949B"), //12.x
- ParseModuleId("2058C97C551571506656AA04EC85E2B1B01B155C"), //13.0.0-13.2.0
-};
+#pragma once
+#include
-constexpr ro::ModuleId PtmModuleId[] = {
- // ParseModuleId("A79706954C6C45568B0FFE610627E2E89D8FB0D4"), //12.x
- ParseModuleId("2CA78D4066C1C11317CC2705EBADA9A51D3AC981"), //13.0.0-13.2.0
-};
+namespace ams::ldr {
+ // RAM(Emc) clockrates:
+ // 1862400, 1894400, 1932800, 1996800, 2064000, 2099200, 2131200
+ // Other values might work as well
+ // RAM overclock could be UNSTABLE and generate graphical glitches / instabilities / NAND corruption
+ // 1862400/1996800 has been tested stable for all DRAM chips
+ constexpr u32 EmcClock = 1996800;
-namespace pcv {
- typedef struct {
- s32 c0 = 0;
- s32 c1 = 0;
- s32 c2 = 0;
- s32 c3 = 0;
- s32 c4 = 0;
- s32 c5 = 0;
- } cvb_coefficients;
+ // CPU max clockrate:
+ // >= 2193000 will enable overvolting
+ constexpr u32 CpuMaxClock = 2397000;
- typedef struct {
- u64 freq = 0;
- cvb_coefficients cvb_dfll_param;
- cvb_coefficients cvb_pll_param; // only c0 is reserved
- } cpu_freq_cvb_table_t;
+ // CPU max voltage
+ constexpr u32 CpuVoltageLimit = 1220; // default max 1120mV
+ static_assert(CpuVoltageLimit <= 1250);
- typedef struct {
- u64 freq = 0;
- cvb_coefficients cvb_dfll_param; // empty, dfll clock source not selected
- cvb_coefficients cvb_pll_param;
- } gpu_cvb_pll_table_t;
+ namespace pcv {
+ typedef struct {
+ s32 c0 = 0;
+ s32 c1 = 0;
+ s32 c2 = 0;
+ s32 c3 = 0;
+ s32 c4 = 0;
+ s32 c5 = 0;
+ } cvb_coefficients;
- typedef struct {
- u64 freq;
- s32 volt[4] = {0};
- } emc_dvb_dvfs_table_t;
+ typedef struct {
+ u64 freq;
+ cvb_coefficients cvb_dfll_param;
+ cvb_coefficients cvb_pll_param; // only c0 is reserved
+ } cpu_freq_cvb_table_t;
- /* CPU */
- constexpr u32 CpuVoltageLimitOffsets[][11] = {
- // { 0xE1A8C, 0xE1A98, 0xE1AA4, 0xE1AB0, 0xE1AF8, 0xE1B04, 0xE1B10, 0xE1B1C, 0xE1B28, 0xE1B34, 0xE1F4C },
- // { 0xF08DC, 0xF08E8, 0xF08F4, 0xF0900, 0xF0948, 0xF0954, 0xF0960, 0xF096C, 0xF0978, 0xF0984, 0xF0D9C },
- { 0xF092C, 0xF0938, 0xF0944, 0xF0950, 0xF0998, 0xF09A4, 0xF09B0, 0xF09BC, 0xF09C8, 0xF09D4, 0xF0DEC },
- };
- constexpr u32 NewCpuVoltageLimit = 1220;
- static_assert(NewCpuVoltageLimit <= 1300); //1300mV hangs for me
+ typedef struct {
+ u64 freq;
+ cvb_coefficients cvb_dfll_param; // empty, dfll clock source not selected
+ cvb_coefficients cvb_pll_param;
+ } gpu_cvb_pll_table_t;
- constexpr u32 CpuVoltageOldTableCoeff[][10] = {
- // { 0xE2140, 0xE2178, 0xE21B0, 0xE21E8, 0xE2220, 0xE2258, 0xE2290, 0xE22C8, 0xE2300, 0xE2338 },
- // { 0xF0F90, 0xF0FC8, 0xF1000, 0xF1038, 0xF1070, 0xF10A8, 0xF10E0, 0xF1118, 0xF1150, 0xF1188 },
- { 0xF0FE0, 0xF1018, 0xF1050, 0xF1088, 0xF10C0, 0xF10F8, 0xF1130, 0xF1168, 0xF11A0, 0xF11D8 },
- };
- constexpr u32 CpuVoltageScale = 1000;
- constexpr u32 NewCpuVoltageScaled = NewCpuVoltageLimit * CpuVoltageScale;
+ typedef struct {
+ u64 freq;
+ s32 volt[4] = {0};
+ } emc_dvb_dvfs_table_t;
- constexpr u32 CpuTablesFreeSpace[] = {
- // 0xE2350,
- // 0xF11A0,
- 0xF11F0,
- };
- constexpr cpu_freq_cvb_table_t NewCpuTables[] = {
- // OldCpuTables
- // { 204000, { 721589, -12695, 27 }, { 1120000 } },
- // { 306000, { 747134, -14195, 27 }, { 1120000 } },
- // { 408000, { 776324, -15705, 27 }, { 1120000 } },
- // { 510000, { 809160, -17205, 27 }, { 1120000 } },
- // { 612000, { 845641, -18715, 27 }, { 1120000 } },
- // { 714000, { 885768, -20215, 27 }, { 1120000 } },
- // { 816000, { 929540, -21725, 27 }, { 1120000 } },
- // { 918000, { 976958, -23225, 27 }, { 1120000 } },
- // { 1020000, { 1028021, -24725, 27 }, { 1120000 } },
- // { 1122000, { 1082730, -26235, 27 }, { 1120000 } },
- // { 1224000, { 1141084, -27735, 27 }, { 1120000 } },
- // { 1326000, { 1203084, -29245, 27 }, { 1120000 } },
- // { 1428000, { 1268729, -30745, 27 }, { 1120000 } },
- // { 1581000, { 1374032, -33005, 27 }, { 1120000 } },
- // { 1683000, { 1448791, -34505, 27 }, { 1120000 } },
- // { 1785000, { 1527196, -36015, 27 }, { 1120000 } },
- // { 1887000, { 1609246, -37515, 27 }, { 1120000 } },
- // { 1963500, { 1675751, -38635, 27 }, { 1120000 } },
- { 2091000, { 1785520, -40523, 27 }, { NewCpuVoltageScaled } },
- { 2193000, { 1878755, -42027, 27 }, { NewCpuVoltageScaled } },
- { 2295000, { 1975655, -43531, 27 }, { NewCpuVoltageScaled } },
- { 2397000, { 2076220, -45036, 27 }, { NewCpuVoltageScaled } },
- };
- static_assert(sizeof(NewCpuTables) <= sizeof(cpu_freq_cvb_table_t)*14);
+ /* CPU */
+ constexpr u32 NewCpuVoltageScaled = CpuVoltageLimit * 1000;
- constexpr u32 MaxCpuClockOffset[] = {
- // 0xE2740,
- // 0xF1590,
- 0xF15E0,
- };
- constexpr u32 NewMaxCpuClock = 2397000;
+ // TODO: correctly derive c0-c1 dfll coefficients
+ constexpr cpu_freq_cvb_table_t NewCpuTables[] = {
+ // OldCpuTables
+ // { 204000, { 721589, -12695, 27 }, { 1120000 } },
+ // { 306000, { 747134, -14195, 27 }, { 1120000 } },
+ // { 408000, { 776324, -15705, 27 }, { 1120000 } },
+ // { 510000, { 809160, -17205, 27 }, { 1120000 } },
+ // { 612000, { 845641, -18715, 27 }, { 1120000 } },
+ // { 714000, { 885768, -20215, 27 }, { 1120000 } },
+ // { 816000, { 929540, -21725, 27 }, { 1120000 } },
+ // { 918000, { 976958, -23225, 27 }, { 1120000 } },
+ // { 1020000, { 1028021, -24725, 27 }, { 1120000 } },
+ // { 1122000, { 1082730, -26235, 27 }, { 1120000 } },
+ // { 1224000, { 1141084, -27735, 27 }, { 1120000 } },
+ // { 1326000, { 1203084, -29245, 27 }, { 1120000 } },
+ // { 1428000, { 1268729, -30745, 27 }, { 1120000 } },
+ // { 1581000, { 1374032, -33005, 27 }, { 1120000 } },
+ // { 1683000, { 1448791, -34505, 27 }, { 1120000 } },
+ // { 1785000, { 1527196, -36015, 27 }, { 1120000 } },
+ // { 1887000, { 1609246, -37515, 27 }, { 1120000 } },
+ // { 1963500, { 1675751, -38635, 27 }, { 1120000 } },
+ { 2091000, { 1785520, -40523, 27 }, { NewCpuVoltageScaled } },
+ { 2193000, { 1878755, -42027, 27 }, { NewCpuVoltageScaled } },
+ { 2295000, { 1975655, -43531, 27 }, { NewCpuVoltageScaled } },
+ { 2397000, { 2076220, -45036, 27 }, { NewCpuVoltageScaled } },
+ };
+ static_assert(sizeof(NewCpuTables) <= sizeof(cpu_freq_cvb_table_t)*14);
- /* GPU */
- // constexpr u32 GpuVoltageLimitOffsets[] = {
- // // 0xE3044,
- // // 0xF1E94,
- // 0xF1EE4,
- // };
- // constexpr u32 NewGpuVoltageLimit = 1170; // default max 1050mV
+ /* GPU */
+ constexpr gpu_cvb_pll_table_t NewGpuTables[] = {
+ // OldGpuTables
+ // { 76800, {}, { 610000, } },
+ // { 153600, {}, { 610000, } },
+ // { 230400, {}, { 610000, } },
+ // { 307200, {}, { 610000, } },
+ // { 460800, {}, { 610000, } },
+ // { 537600, {}, { 801688, -10900, -163, 298, -10599, 162 } },
+ // { 614400, {}, { 824214, -5743, -452, 238, -6325, 81 } },
+ // { 691200, {}, { 848830, -3903, -552, 119, -4030, -2 } },
+ // { 768000, {}, { 891575, -4409, -584, 0, -2849, 39 } },
+ // { 844800, {}, { 940071, -5367, -602, -60, -63, -93 } },
+ // { 921600, {}, { 986765, -6637, -614, -179, 1905, -13 } },
+ // { 998400, {}, { 1098475, -13529, -497, -179, 3626, 9 } },
+ // { 1075200, {}, { 1163644, -12688, -648, 0, 1077, 40 } },
+ // { 1152000, {}, { 1204812, -9908, -830, 0, 1469, 110 } },
+ // { 1228800, {}, { 1277303, -11675, -859, 0, 3722, 313 } },
+ // { 1267200, {}, { 1335531, -12567, -867, 0, 3681, 559 } },
+ { 1305600, {}, { 1374130, -13725, -859, 0, 4442, 576 } },
+ };
+ static_assert(sizeof(NewGpuTables) <= sizeof(gpu_cvb_pll_table_t)*15);
- constexpr u32 GpuTablesFreeSpace[] = {
- // 0xE3410,
- // 0xF2260,
- 0xF22B0,
- };
- // No way to correctly derive c0-c5 coefficients, as coefficients >= 1152000 only make sense
- constexpr gpu_cvb_pll_table_t NewGpuTables[] = {
- // OldGpuTables
- // { 537600, {}, { 801688, -10900, -163, 298, -10599, 162 } },
- // { 614400, {}, { 824214, -5743, -452, 238, -6325, 81 } },
- // { 691200, {}, { 848830, -3903, -552, 119, -4030, -2 } },
- // { 768000, {}, { 891575, -4409, -584, 0, -2849, 39 } },
- // { 844800, {}, { 940071, -5367, -602, -60, -63, -93 } },
- // { 921600, {}, { 986765, -6637, -614, -179, 1905, -13 } },
- // { 998400, {}, { 1098475, -13529, -497, -179, 3626, 9 } },
- // { 1075200, {}, { 1163644, -12688, -648, 0, 1077, 40 } },
- // { 1152000, {}, { 1204812, -9908, -830, 0, 1469, 110 } },
- // { 1228800, {}, { 1277303, -11675, -859, 0, 3722, 313 } },
- // { 1267200, {}, { 1335531, -12567, -867, 0, 3681, 559 } },
- { 1305600, {}, { 1374130, -13725, -859, 0, 4442, 576 } },
- };
- static_assert(sizeof(NewGpuTables) <= sizeof(gpu_cvb_pll_table_t)*15);
+ /* EMC */
- constexpr u32 Reg1MaxGpuOffset[] = {
- // 0x2E0AC,
- // 0x3F6CC,
- 0x3F12C,
- };
- constexpr u8 Reg1NewMaxGpuClock[][0xC] = {
- /* Original: 1228.8MHz
- *
- * MOV W13,#0x1000
- * MOVK W13,#0xE,LSL #16
- * ADD X13, X13, #0x4B,LSL#12
- *
- * Bump to 1536MHz
- *
- * MOV W13,#0x7000
- * MOVK W13,#0x17,LSL #16
- * NOP
- */
- // { 0x0D, 0x00, 0x8E, 0x52, 0xED, 0x02, 0xA0, 0x72, 0x1F, 0x20, 0x03, 0xD5 },
- // { 0x0B, 0x00, 0x8E, 0x52, 0xEB, 0x02, 0xA0, 0x72, 0x1F, 0x20, 0x03, 0xD5 },
- { 0x0B, 0x00, 0x8E, 0x52, 0xEB, 0x02, 0xA0, 0x72, 0x1F, 0x20, 0x03, 0xD5 },
- };
+ // DvbTable is all about frequency scaling along with CPU core voltage, no need to care about this for now.
- constexpr u32 Reg2MaxGpuOffset[] = {
- // 0x2E110,
- // 0x3F730,
- 0x3F190,
- };
- constexpr u8 Reg2NewMaxGpuClock[][0x8] = {
- /* Original: 921.6MHz
- *
- * MOV W13,#0x1000
- * MOVK W13,#0xE,LSL #16
- *
- * Bump to 1536MHz
- *
- * MOV W13,#0x7000
- * MOVK W13,#0x17,LSL #16
- */
- // { 0x0D, 0x00, 0x8E, 0x52, 0xED, 0x02, 0xA0, 0x72, },
- // { 0x0B, 0x00, 0x8E, 0x52, 0xEB, 0x02, 0xA0, 0x72, },
- { 0x0B, 0x00, 0x8E, 0x52, 0xEB, 0x02, 0xA0, 0x72, },
- };
+ // constexpr emc_dvb_dvfs_table_t EmcDvbTable[6] =
+ // {
+ // { 204000, { 637, 637, 637, } },
+ // { 408000, { 637, 637, 637, } },
+ // { 800000, { 637, 637, 637, } },
+ // { 1065600, { 637, 637, 637, } },
+ // { 1331200, { 650, 637, 637, } },
+ // { 1600000, { 675, 650, 637, } },
+ // };
- /* EMC */
+ // Mariko have 3 mtc tables (204/1331/1600 MHz), only these 3 frequencies could be set.
- // DvbTable is all about frequency scaling along with CPU core voltage, no need to care about this for now.
+ // Mariko mtc tables starting from rev, see mtc_timing_table.hpp for parameters.
+ // All mariko mtc tables will be patched to simplify the procedure.
+ #include "mtc_timing_table.hpp"
- // constexpr u32 EmcDvbTableOffsets[] =
- // {
- // 0xFFFFFFFF,
- // 0xFFFFFFFF,
- // 0xF0628,
- // };
-
- // constexpr emc_dvb_dvfs_table_t EmcDvbTable[6] =
- // {
- // { 204000, { 637, 637, 637, } },
- // { 408000, { 637, 637, 637, } },
- // { 800000, { 637, 637, 637, } },
- // { 1065600, { 637, 637, 637, } },
- // { 1331200, { 650, 637, 637, } },
- // { 1600000, { 675, 650, 637, } },
- // };
-
- constexpr u32 EmcDvb1331[] = {
- 0xF0688,
- };
-
- // Sourced from 13.x pcv module
- // 1st regulator table, 0x142778 - 0x143BB4, if mask = 0b0110101
- // 2nd regulator table, 0x143BB8 - 0x144FF4, if mask = 0b1010011
-
- // Access pattern:
- // BL 0x6C390 // read mask from 0x195588 (.bss section) and return X0 (address of regulator table)
- // MOV W8, #0x120 // offset per entry
- // (S)MADD(L) X8, X22, X8, X0 // X8 = X22 * X8 + X0, X22 is regulator entry ID (0x11 for max77812_dram)
- // LDR W8, [X8, #0x10] // read maxim regulator identifier
- // CMP W8, #3
- // B.EQ ...
-
- // 1st regulator table:
- // 0x143A98 2 #0x0
- // 0x143A9C 0 #0x4
- // 0x143AA0 "max77812_dram" #0x8
- // 0x143AA8 3 #0x10 // maxim regulator identifier ( 1 = max77620, 2 = max77621, 3 = max77812)
- // 0x143AAC 0 #0x14
- // 0x143AB0 5000 #0x18 // voltage step
- // 0x143AB4 0 #0x1C
- // 0x143AB8 250000 #0x20 // min voltage
- // 0x143ABC 1525000 #0x24 // max voltage
- // 0x143AC0 0 #0x28 // voltage multiplier ( * step )
- // 0x143AC4 600000 #0x2C
-
- // 0x142898 1 #0x0
- // 0x14289C 0 #0x4
- // 0x1428A0 "max77620_sd1" #0x8
- // 0x1428A8 1 #0x10 // maxim regulator identifier ( 1 = max77620, 2 = max77621, 3 = max77812)
- // 0x1428AC 23 #0x14
- // 0x1428B0 12500 #0x18 // voltage step
- // 0x1428B4 600000 #0x1C
- // 0x1428B8 1125000 #0x20 // min voltage, default Vddq for Erista EMC
- // 0x1428BC 1125000 #0x24 // max voltage, default Vddq for Erista EMC
- // 0x1428C0 0 #0x28
- // 0x1428C4 0 #0x2C
-
- // HOS does not seem to change DRAM voltage on Mariko (validate only)
-
- // void EnableVddMemory() in Atmosphere/libraries/libexosphere/source/pmic/pmic_api.cpp:
- // /* On Erista, set Sd1 voltage. */
- // if (soc_type == fuse::SocType_Erista) {
- // SetVoltage(Max77620RegisterSd1, 1100);
- // }
-
- // in hekate/bdk/power/max77812.h:
- // #define MAX77812_REG_M3_VOUT 0x25 // DRAM on PHASE211.
- // 3 outputs (CPU/GPU/DRAM) from max77812. Does PHASE31 mode exist?
- // If so, read/query max77812 pmic via i2c for voltage info in hekate and get DRAM reg on PHASE31.
- // max77812 document: https://datasheets.maximintegrated.com/en/ds/MAX77812.pdf
-
- // Mariko have 3 mtc tables (204/1331/1600 MHz), only these 3 frequencies could be set.
- constexpr u32 EmcFreqOffsets[][30] = {
- // { 0xD7C60, 0xD7C68, 0xD7C70, 0xD7C78, 0xD7C80, 0xD7C88, 0xD7C90, 0xD7C98, 0xD7CA0, 0xD7CA8, 0xE1800, 0xEEFA0, 0xF2478, 0xFE284, 0x10A304, 0x10D7DC, 0x110A40, 0x113CA4, 0x116F08, 0x11A16C, 0x11D3D0, 0x120634, 0x123898, 0x126AFC, 0x129D60, 0x12CFC4, 0x130228, 0x13BFE0, 0x140D00, 0x140D50, },
- // { 0xE1810, 0xE6530, 0xE6580, 0xE6AB0, 0xE6AB8, 0xE6AC0, 0xE6AC8, 0xE6AD0, 0xE6AD8, 0xE6AE0, 0xE6AE8, 0xE6AF0, 0xE6AF8, 0xF0650, 0xFDDF0, 0x1012C8, 0x10D0D4, 0x119154, 0x11C62C, 0x11F890, 0x122AF4, 0x125D58, 0x128FBC, 0x12C220, 0x12F484, 0x1326E8, 0x13594C, 0x138BB0, 0x13BE14, 0x13F078, },
- { 0xE1860, 0xE6580, 0xE65D0, 0xE6B00, 0xE6B08, 0xE6B10, 0xE6B18, 0xE6B20, 0xE6B28, 0xE6B30, 0xE6B38, 0xE6B40, 0xE6B48, 0xF06A0, 0xFDE40, 0x101318, 0x10D124, 0x1191A4, 0x11C67C, 0x11F8E0, 0x122B44, 0x125DA8, 0x12900C, 0x12C270, 0x12F4D4, 0x132738, 0x13599C, 0x138C00, 0x13BE64, 0x13F0C8, },
- };
-
- // Mariko mtc tables starting from rev, see mtc_timing_table.hpp for parameters.
- // All mariko mtc tables will be patched to simplify the procedure.
- constexpr u32 MtcTable_1600[][13] = {
- { 0x1012D8, 0x11C63C, 0x11F8A0, 0x122B04, 0x125D68, 0x128FCC, 0x12C230, 0x12F494, 0x1326F8, 0x13595C, 0x138BC0, 0x13BE24, 0x13F088 },
- };
-
- constexpr u32 MtcTableOffset = 0x10CC;
-
- #include "mtc_timing_table.hpp"
-
- void AdjustMtcTable(MarikoMtcTable* table, MarikoMtcTable* ref)
- {
- /* Official Tegra X1 TRM, sign up for nvidia developer program (free) to download:
- * https://developer.nvidia.com/embedded/dlc/tegra-x1-technical-reference-manual
- * Section 18.11: MC Registers
- *
- * Retail Mariko: 200FBGA 16Gb DDP LPDDR4X SDRAM x 2
- * x16/Ch, 1Ch/die, Double-die, 2Ch, 1CS(rank), 8Gb density per die
- * 64Mb x 16DQ x 8banks x 2channels = 2048MB (x32DQ) per package
- *
- * Devkit Mariko: 200FBGA 32Gb DDP LPDDR4X SDRAM x 2
- * x16/Ch, 1Ch/die, Quad-die, 2Ch, 2CS(rank), 8Gb density per die
- * X1+ EMC can R/W to both ranks at the same time, resulting in doubled DQ
- * 64Mb x 32DQ x 8banks x 2channels = 4096MB (x64DQ) per package
- *
- * If you have access to LPDDR4(X) specs or datasheets (from manufacturers or Google),
- * you'd better calculate timings yourself rather than relying on following algorithm.
- */
-
- #define ADJUST_PARAM(TARGET, REF) TARGET = std::ceil(REF + ((GetEmcClock()-1331200)*(TARGET-REF))/(1600000-1331200));
-
- #define ADJUST_PARAM_TABLE(TABLE, PARAM, REF) ADJUST_PARAM(TABLE->PARAM, REF->PARAM)
-
- #define ADJUST_PARAM_ALL_REG(TABLE, PARAM, REF) \
- ADJUST_PARAM_TABLE(TABLE, burst_regs.PARAM, REF) \
- ADJUST_PARAM_TABLE(TABLE, shadow_regs_ca_train.PARAM, REF) \
- ADJUST_PARAM_TABLE(TABLE, shadow_regs_rdwr_train.PARAM, REF)
-
- #define WRITE_PARAM_ALL_REG(TABLE, PARAM, VALUE)\
- TABLE->burst_regs.PARAM = VALUE; \
- TABLE->shadow_regs_ca_train.PARAM = VALUE; \
- TABLE->shadow_regs_rdwr_train.PARAM = VALUE;
-
- // tCK_avg (average clock period) in ns (10E-3 ns)
- const double tCK_avg = GetEmcClock() == 2131200 ? 0.468 : 1000'000. / GetEmcClock();
- // tRPpb (row precharge time per bank) in ns
- const u32 tRPpb = 18;
- // tRPab (row precharge time all banks) in ns
- const u32 tRPab = 21;
- // tRAS (row active time) in ns
- const u32 tRAS = 42;
- // tRC (ACTIVATE-ACTIVATE command period same bank) in ns
- const u32 tRC = tRPpb + tRAS;
- // tRFCab (refresh cycle time all banks) in ns for 8Gb density
- const u32 tRFCab = 280;
- // tRFCpb (refresh cycle time per bank) in ns for 8Gb density
- const u32 tRFCpb = 140;
- // tRCD (RAS-CAS delay) in ns
- const u32 tRCD = 18;
- // tRRD (Active bank-A to Active bank-B) in ns
- const double tRRD = GetEmcClock() == 2131200 ? 7.5 : 10.;
- // tREFpb (average refresh interval per bank) in ns for 8Gb density
- const u32 tREFpb = 488;
- // tREFab (average refresh interval all 8 banks) in ns for 8Gb density
- // const u32 tREFab = tREFpb * 8;
- // #_of_rows per die for 8Gb density
- const u32 numOfRows = 65536;
- // {REFRESH, REFRESH_LO} = max[(tREF/#_of_rows) / (emc_clk_period) - 64, (tREF/#_of_rows) / (emc_clk_period) * 97%]
- // emc_clk_period = dram_clk / 2;
- // 1600 MHz: 5894, but N' set to 6176 (~4.8% margin)
- const u32 REFRESH = std::ceil((double(tREFpb) * GetEmcClock() / numOfRows * (1.048) / 2 - 64)) / 4 * 4;
- // tPDEX2WR, tPDEX2RD (timing delay from exiting powerdown mode to a write/read command) in ns
- const u32 tPDEX2 = 10;
- // [Guessed] tACT2PDEN (timing delay from an activate, MRS or EMRS command to power-down entry) in ns
- const u32 tACT2PDEN = 14;
- // [Guessed] tPDEX2MRR (timing delay from exiting powerdown mode to MRR command) in ns
- const double tPDEX2MRR = 28.75;
- // [Guessed] tCKE2PDEN (timing delay from turning off CKE to power-down entry) in ns
- const double tCKE2PDEN = 8.5;
- // tXSR (SELF REFRESH exit to next valid command delay) in ns
- const double tXSR = tRFCab + 7.5;
- // tCKE (minimum CKE high pulse width) in ns
- const u32 tCKE = 8;
- // tCKELPD (minimum CKE low pulse width in SELF REFRESH) in ns
- const u32 tCKELPD = 15;
- // [Guessed] tPD (minimum CKE low pulse width in power-down mode) in ns
- const double tPD = 7.5;
- // tFAW (Four-bank Activate Window) in ns
- const u32 tFAW = GetEmcClock() == 2131200 ? 30 : 40;
-
- #define GET_CYCLE_CEIL(PARAM) std::ceil(double(PARAM) / tCK_avg)
-
- WRITE_PARAM_ALL_REG(table, emc_rc, GET_CYCLE_CEIL(tRC));
- WRITE_PARAM_ALL_REG(table, emc_rfc, GET_CYCLE_CEIL(tRFCab));
- WRITE_PARAM_ALL_REG(table, emc_rfcpb, GET_CYCLE_CEIL(tRFCpb));
- WRITE_PARAM_ALL_REG(table, emc_ras, GET_CYCLE_CEIL(tRAS));
- WRITE_PARAM_ALL_REG(table, emc_rp, GET_CYCLE_CEIL(tRPpb));
- ADJUST_PARAM_ALL_REG(table, emc_r2w, ref);
- ADJUST_PARAM_ALL_REG(table, emc_w2r, ref);
- ADJUST_PARAM_ALL_REG(table, emc_r2p, ref);
- ADJUST_PARAM_ALL_REG(table, emc_w2p, ref);
- ADJUST_PARAM_ALL_REG(table, emc_trtm, ref);
- ADJUST_PARAM_ALL_REG(table, emc_twtm, ref);
- ADJUST_PARAM_ALL_REG(table, emc_tratm, ref);
- ADJUST_PARAM_ALL_REG(table, emc_twatm, ref);
- WRITE_PARAM_ALL_REG(table, emc_rd_rcd, GET_CYCLE_CEIL(tRCD));
- WRITE_PARAM_ALL_REG(table, emc_wr_rcd, GET_CYCLE_CEIL(tRCD));
- WRITE_PARAM_ALL_REG(table, emc_rrd, GET_CYCLE_CEIL(tRRD));
- WRITE_PARAM_ALL_REG(table, emc_refresh, REFRESH);
- WRITE_PARAM_ALL_REG(table, emc_pre_refresh_req_cnt, REFRESH / 4);
- WRITE_PARAM_ALL_REG(table, emc_pdex2wr, GET_CYCLE_CEIL(tPDEX2));
- WRITE_PARAM_ALL_REG(table, emc_pdex2rd, GET_CYCLE_CEIL(tPDEX2));
- WRITE_PARAM_ALL_REG(table, emc_act2pden,GET_CYCLE_CEIL(tACT2PDEN));
- ADJUST_PARAM_ALL_REG(table, emc_rw2pden, ref);
- WRITE_PARAM_ALL_REG(table, emc_cke2pden,GET_CYCLE_CEIL(tCKE2PDEN));
- WRITE_PARAM_ALL_REG(table, emc_pdex2mrr,GET_CYCLE_CEIL(tPDEX2MRR));
- WRITE_PARAM_ALL_REG(table, emc_txsr, GET_CYCLE_CEIL(tXSR));
- WRITE_PARAM_ALL_REG(table, emc_txsrdll, GET_CYCLE_CEIL(tXSR));
- WRITE_PARAM_ALL_REG(table, emc_tcke, GET_CYCLE_CEIL(tCKE));
- WRITE_PARAM_ALL_REG(table, emc_tckesr, GET_CYCLE_CEIL(tCKELPD));
- WRITE_PARAM_ALL_REG(table, emc_tpd, GET_CYCLE_CEIL(tPD));
- WRITE_PARAM_ALL_REG(table, emc_tfaw, GET_CYCLE_CEIL(tFAW));
- WRITE_PARAM_ALL_REG(table, emc_trpab, GET_CYCLE_CEIL(tRPab));
- ADJUST_PARAM_ALL_REG(table, emc_tclkstop, ref);
- WRITE_PARAM_ALL_REG(table, emc_trefbw, REFRESH + 64);
-
- ADJUST_PARAM_ALL_REG(table, emc_pmacro_dll_cfg_2, ref); // EMC_DLL_CFG_2_0: level select for VDDA?
-
- // ADJUST_PARAM_TABLE(table, dram_timings.rl); // not used on Mariko
-
- constexpr u32 DIV = 4; // ?
- table->burst_mc_regs.mc_emem_arb_timing_rcd = std::ceil(GET_CYCLE_CEIL(tRCD) / DIV - 2);
- table->burst_mc_regs.mc_emem_arb_timing_rp = std::ceil(GET_CYCLE_CEIL(tRPpb) / DIV - 1);
- table->burst_mc_regs.mc_emem_arb_timing_rc = std::ceil(std::max(GET_CYCLE_CEIL(tRC), GET_CYCLE_CEIL(tRAS)+GET_CYCLE_CEIL(tRPpb))/ DIV);
- table->burst_mc_regs.mc_emem_arb_timing_ras = std::ceil(GET_CYCLE_CEIL(tRAS) / DIV - 2);
- table->burst_mc_regs.mc_emem_arb_timing_faw = std::ceil(GET_CYCLE_CEIL(tFAW) / DIV - 1);
- table->burst_mc_regs.mc_emem_arb_timing_rrd = std::ceil(GET_CYCLE_CEIL(tRRD) / DIV - 1);
- table->burst_mc_regs.mc_emem_arb_timing_rap2pre = std::ceil(table->burst_regs.emc_r2p / DIV);
- table->burst_mc_regs.mc_emem_arb_timing_wap2pre = std::ceil(table->burst_regs.emc_w2p / DIV);
- table->burst_mc_regs.mc_emem_arb_timing_r2w = std::ceil(table->burst_regs.emc_r2w / DIV + 1);
- table->burst_mc_regs.mc_emem_arb_timing_w2r = std::ceil(table->burst_regs.emc_w2r / DIV + 1);
- table->burst_mc_regs.mc_emem_arb_timing_rfcpb = std::ceil(GET_CYCLE_CEIL(tRFCpb) / DIV + 1); // ?
-
- ADJUST_PARAM_TABLE(table, la_scale_regs.mc_mll_mpcorer_ptsa_rate, ref);
- ADJUST_PARAM_TABLE(table, la_scale_regs.mc_ptsa_grant_decrement, ref);
-
- // ADJUST_PARAM_TABLE(table, min_mrs_wait); // not used on LPDDR4X
- // ADJUST_PARAM_TABLE(table, latency); // not used
-
- // Calculate DIVM and DIVN (clock DIVisors)
- // Common PLL oscillator is 38.4 MHz
- // PLLMB_OUT = 38.4 MHz / PLLLMB_DIVM * PLLMB_DIVN
- u32 divm = 1;
- u32 divn = GetEmcClock() / 38400;
- u32 remainder = GetEmcClock() % 38400;
- if (remainder >= 38400 * (3/4)) {
- divm = 4;
- divn = divn * divm + 3;
- } else
- if (remainder >= 38400 * (2/3)) {
- divm = 3;
- divn = divn * divm + 2;
- } else
- if (remainder >= 38400 * (1/2)) {
- divm = 2;
- divn = divn * divm + 1;
- } else
- if (remainder >= 38400 * (1/3)) {
- divm = 3;
- divn = divn * divm + 1;
- } else
- if (remainder >= 38400 * (1/4)) {
- divm = 4;
- divn = divn * divm + 1;
- }
-
- /* Patch PLLMB divisors */
- table->pllmb_divm = divm;
- table->pllmb_divn = divn;
-
- #ifdef EXPERIMENTAL
+ void AdjustMtcTable(MarikoMtcTable* table, MarikoMtcTable* ref)
{
- #define ADJUST_PARAM_ROUND2_ALL_REG(TARGET_TABLE, REF_TABLE, PARAM) \
- TARGET_TABLE->burst_regs.PARAM = \
- ((ADJUST_PROP(TARGET_TABLE->burst_regs.PARAM, REF_TABLE->burst_regs.PARAM) + 1) >> 1) << 1; \
- TARGET_TABLE->shadow_regs_ca_train.PARAM = \
- ((ADJUST_PROP(TARGET_TABLE->shadow_regs_ca_train.PARAM, REF_TABLE->shadow_regs_ca_train.PARAM) + 1) >> 1) << 1; \
- TARGET_TABLE->shadow_regs_rdwr_train.PARAM = \
- ((ADJUST_PROP(TARGET_TABLE->shadow_regs_rdwr_train.PARAM, REF_TABLE->shadow_regs_rdwr_train.PARAM) + 1) >> 1) << 1;
+ /* Official Tegra X1 TRM, sign up for nvidia developer program (free) to download:
+ * https://developer.nvidia.com/embedded/dlc/tegra-x1-technical-reference-manual
+ * Section 18.11: MC Registers
+ *
+ * Retail Mariko: 200FBGA 16Gb DDP LPDDR4X SDRAM x 2
+ * x16/Ch, 1Ch/die, Double-die, 2Ch, 1CS(rank), 8Gb density per die
+ * 64Mb x 16DQ x 8banks x 2channels = 2048MB (x32DQ) per package
+ *
+ * Devkit Mariko: 200FBGA 32Gb DDP LPDDR4X SDRAM x 2
+ * x16/Ch, 1Ch/die, Quad-die, 2Ch, 2CS(rank), 8Gb density per die
+ * X1+ EMC can R/W to both ranks at the same time, resulting in doubled DQ
+ * 64Mb x 32DQ x 8banks x 2channels = 4096MB (x64DQ) per package
+ *
+ * If you have access to LPDDR4(X) specs or datasheets (from manufacturers or Google),
+ * you'd better calculate timings yourself rather than relying on following algorithm.
+ */
- #define ADJUST_PARAM(TARGET_PARAM, REF_PARAM) \
- TARGET_PARAM = ADJUST_PROP(TARGET_PARAM, REF_PARAM);
+ #define ADJUST_PARAM(TARGET, REF) TARGET = std::ceil(REF + ((EmcClock-1331200)*(TARGET-REF))/(1600000-1331200));
- #define ADJUST_PARAM_TABLE(TARGET_TABLE, REF_TABLE, PARAM) \
- ADJUST_PARAM(TARGET_TABLE->PARAM, REF_TABLE->PARAM)
+ #define ADJUST_PARAM_TABLE(TABLE, PARAM, REF) ADJUST_PARAM(TABLE->PARAM, REF->PARAM)
- #define ADJUST_PARAM_ALL_REG(TARGET_TABLE, REF_TABLE, PARAM) \
- ADJUST_PARAM_TABLE(TARGET_TABLE, REF_TABLE, burst_regs.PARAM) \
- ADJUST_PARAM_TABLE(TARGET_TABLE, REF_TABLE, shadow_regs_ca_train.PARAM) \
- ADJUST_PARAM_TABLE(TARGET_TABLE, REF_TABLE, shadow_regs_rdwr_train.PARAM)
+ #define ADJUST_PARAM_ALL_REG(TABLE, PARAM, REF) \
+ ADJUST_PARAM_TABLE(TABLE, burst_regs.PARAM, REF) \
+ ADJUST_PARAM_TABLE(TABLE, shadow_regs_ca_train.PARAM, REF) \
+ ADJUST_PARAM_TABLE(TABLE, shadow_regs_rdwr_train.PARAM, REF)
- #define TRIM_BIT(IN_BITS, HIGH, LOW) \
- ((IN_BITS >> LOW) & ( (1u << (HIGH - LOW + 1u)) - 1u ))
+ #define WRITE_PARAM_ALL_REG(TABLE, PARAM, VALUE)\
+ TABLE->burst_regs.PARAM = VALUE; \
+ TABLE->shadow_regs_ca_train.PARAM = VALUE; \
+ TABLE->shadow_regs_rdwr_train.PARAM = VALUE;
- #define ADJUST_BIT(TARGET_PARAM, REF_PARAM, HIGH, LOW) \
- ADJUST_PROP(TRIM_BIT(TARGET_PARAM, HIGH, LOW), TRIM_BIT(REF_PARAM, HIGH, LOW))
-
- #define CLEAR_BIT(BITS, HIGH, LOW) \
- BITS = BITS & ~( ((1u << HIGH) << 1u) - (1u << LOW) );
-
- #define ADJUST_BIT_ALL_REG_SINGLE_OP(TARGET_TABLE, REF_TABLE, PARAM, HIGH, LOW, OPERATION) \
- TARGET_TABLE->burst_regs.PARAM = \
- (ADJUST_BIT(TARGET_TABLE->burst_regs.PARAM, REF_TABLE->burst_regs.PARAM, HIGH, LOW) << LOW) OPERATION; \
- TARGET_TABLE->shadow_regs_ca_train.PARAM = \
- (ADJUST_BIT(TARGET_TABLE->shadow_regs_ca_train.PARAM, REF_TABLE->shadow_regs_ca_train.PARAM, HIGH, LOW)) << LOW OPERATION; \
- TARGET_TABLE->shadow_regs_rdwr_train.PARAM = \
- (ADJUST_BIT(TARGET_TABLE->shadow_regs_rdwr_train.PARAM, REF_TABLE->shadow_regs_rdwr_train.PARAM, HIGH, LOW)) << LOW OPERATION;
-
- #define ADJUST_BIT_ALL_REG_PAIR(TARGET_TABLE, REF_TABLE, PARAM, HIGH1, LOW1, HIGH2, LOW2) \
- TARGET_TABLE->burst_regs.PARAM = \
- ADJUST_BIT(TARGET_TABLE->burst_regs.PARAM, REF_TABLE->burst_regs.PARAM, HIGH1, LOW1) << LOW1 \
- | ADJUST_BIT(TARGET_TABLE->burst_regs.PARAM, REF_TABLE->burst_regs.PARAM, HIGH2, LOW2) << LOW2; \
- TARGET_TABLE->shadow_regs_ca_train.PARAM = \
- ADJUST_BIT(TARGET_TABLE->shadow_regs_ca_train.PARAM, REF_TABLE->shadow_regs_ca_train.PARAM, HIGH1, LOW1) << LOW1 \
- | ADJUST_BIT(TARGET_TABLE->shadow_regs_ca_train.PARAM, REF_TABLE->shadow_regs_ca_train.PARAM, HIGH2, LOW2) << LOW2; \
- TARGET_TABLE->shadow_regs_rdwr_train.PARAM = \
- ADJUST_BIT(TARGET_TABLE->shadow_regs_rdwr_train.PARAM, REF_TABLE->shadow_regs_rdwr_train.PARAM, HIGH1, LOW1) << LOW1 \
- | ADJUST_BIT(TARGET_TABLE->shadow_regs_rdwr_train.PARAM, REF_TABLE->shadow_regs_rdwr_train.PARAM, HIGH2, LOW2) << LOW2;
-
- /* For latency allowance */
- #define ADJUST_INVERSE(TARGET) ((TARGET*1000) / (GetEmcClock()/1600))
-
- /* emc_wdv, emc_wsv, emc_wev, emc_wdv_mask,
- emc_quse, emc_quse_width, emc_ibdly, emc_obdly,
- emc_einput, emc_einput_duration, emc_qrst, emc_qsafe,
- emc_rdv, emc_rdv_mask, emc_rdv_early, emc_rdv_early_mask */
- ADJUST_PARAM_ROUND2_ALL_REG(target_table, ref_table, emc_wdv);
- ADJUST_PARAM_ROUND2_ALL_REG(target_table, ref_table, emc_wsv);
- ADJUST_PARAM_ROUND2_ALL_REG(target_table, ref_table, emc_wev);
- ADJUST_PARAM_ROUND2_ALL_REG(target_table, ref_table, emc_wdv_mask);
-
- ADJUST_PARAM_ALL_REG(target_table, ref_table, emc_quse);
- ADJUST_PARAM_ALL_REG(target_table, ref_table, emc_quse_width);
-
- ADJUST_BIT_ALL_REG_SINGLE_OP(target_table, ref_table, emc_ibdly, 6,0, | (1 << 28));
- ADJUST_BIT_ALL_REG_SINGLE_OP(target_table, ref_table, emc_obdly, 5,0, | (1 << 28));
-
- ADJUST_PARAM_ALL_REG(target_table, ref_table, emc_einput);
- ADJUST_PARAM_ALL_REG(target_table, ref_table, emc_einput_duration);
-
- ADJUST_BIT_ALL_REG_SINGLE_OP(target_table, ref_table, emc_qrst, 6,0, | (6 << 16));
- ADJUST_PARAM_ALL_REG(target_table, ref_table, emc_qsafe);
-
- ADJUST_PARAM_ALL_REG(target_table, ref_table, emc_rdv);
- target_table->burst_regs.emc_rdv_mask = target_table->burst_regs.emc_rdv + 2;
- target_table->shadow_regs_ca_train.emc_rdv_mask = target_table->shadow_regs_ca_train.emc_rdv + 2;
- target_table->shadow_regs_rdwr_train.emc_rdv_mask = target_table->shadow_regs_rdwr_train.emc_rdv + 2;
-
- ADJUST_PARAM_ALL_REG(target_table, ref_table, emc_rdv_early);
- target_table->burst_regs.emc_rdv_early_mask = target_table->burst_regs.emc_rdv_early + 2;
- target_table->shadow_regs_ca_train.emc_rdv_early_mask = target_table->shadow_regs_ca_train.emc_rdv_early + 2;
- target_table->shadow_regs_rdwr_train.emc_rdv_early_mask = target_table->shadow_regs_rdwr_train.emc_rdv_early + 2;
-
- /* emc_pmacro_...,
- emc_zcal_wait_cnt, emc_mrs_wait_cnt(2),
- emc_pmacro_autocal_cfg_common, emc_dyn_self_ref_control, emc_qpop, emc_pmacro_cmd_pad_tx_ctrl,
- emc_tr_timing_0, emc_tr_rdv, emc_tr_qpop, emc_tr_rdv_mask, emc_tr_qsafe, emc_tr_qrst,
- emc_training_vref_settle */
- /* DDLL values */
+ /* Timings that are available in or can be derived from LPDDR4X datasheet or TRM */
{
- #define OFFSET_ALL_REG(PARAM) \
- offsetof(MarikoMtcTable, burst_regs.PARAM), \
- offsetof(MarikoMtcTable, shadow_regs_ca_train.PARAM), \
- offsetof(MarikoMtcTable, shadow_regs_rdwr_train.PARAM) \
+ // tCK_avg (average clock period) in ns
+ const double tCK_avg = (EmcClock == 2131200) ? 0.468 : 1000'000. / EmcClock;
+ // tRPpb (row precharge time per bank) in ns
+ const u32 tRPpb = 18;
+ // tRPab (row precharge time all banks) in ns
+ const u32 tRPab = 21;
+ // tRAS (row active time) in ns
+ const u32 tRAS = 42;
+ // tRC (ACTIVATE-ACTIVATE command period same bank) in ns
+ const u32 tRC = tRPpb + tRAS;
+ // tRFCab (refresh cycle time all banks) in ns for 8Gb density
+ const u32 tRFCab = 280;
+ // tRFCpb (refresh cycle time per bank) in ns for 8Gb density
+ const u32 tRFCpb = 140;
+ // tRCD (RAS-CAS delay) in ns
+ const u32 tRCD = 18;
+ // tRRD (Active bank-A to Active bank-B) in ns
+ const double tRRD = (EmcClock == 2131200) ? 7.5 : 10.;
+ // tREFpb (average refresh interval per bank) in ns for 8Gb density
+ const u32 tREFpb = 488;
+ // tREFab (average refresh interval all 8 banks) in ns for 8Gb density
+ // const u32 tREFab = tREFpb * 8;
+ // #_of_rows per die for 8Gb density
+ const u32 numOfRows = 65536;
+ // {REFRESH, REFRESH_LO} = max[(tREF/#_of_rows) / (emc_clk_period) - 64, (tREF/#_of_rows) / (emc_clk_period) * 97%]
+ // emc_clk_period = dram_clk / 2;
+ // 1600 MHz: 5894, but N' set to 6176 (~4.8% margin)
+ const u32 REFRESH = std::ceil((double(tREFpb) * EmcClock / numOfRows * (1.048) / 2 - 64)) / 4 * 4;
+ // tPDEX2WR, tPDEX2RD (timing delay from exiting powerdown mode to a write/read command) in ns
+ const u32 tPDEX2 = 10;
+ // [Guessed] tACT2PDEN (timing delay from an activate, MRS or EMRS command to power-down entry) in ns
+ const u32 tACT2PDEN = 14;
+ // [Guessed] tPDEX2MRR (timing delay from exiting powerdown mode to MRR command) in ns
+ const double tPDEX2MRR = 28.75;
+ // [Guessed] tCKE2PDEN (timing delay from turning off CKE to power-down entry) in ns
+ const double tCKE2PDEN = 8.5;
+ // tXSR (SELF REFRESH exit to next valid command delay) in ns
+ const double tXSR = tRFCab + 7.5;
+ // tCKE (minimum CKE high pulse width) in ns
+ const u32 tCKE = 8;
+ // tCKELPD (minimum CKE low pulse width in SELF REFRESH) in ns
+ const u32 tCKELPD = 15;
+ // [Guessed] tPD (minimum CKE low pulse width in power-down mode) in ns
+ const double tPD = 7.5;
+ // tFAW (Four-bank Activate Window) in ns
+ const u32 tFAW = (EmcClock == 2131200) ? 30 : 40;
- /* Section 1: adjust HI bits: BIT 26:16 */
- const uint32_t ddll_high[] = {
- OFFSET_ALL_REG(emc_pmacro_ob_ddll_long_dq_rank1_4),
- OFFSET_ALL_REG(emc_pmacro_ob_ddll_long_dq_rank1_5),
- OFFSET_ALL_REG(emc_pmacro_ob_ddll_long_dqs_rank0_4),
- OFFSET_ALL_REG(emc_pmacro_ob_ddll_long_dqs_rank0_5),
- OFFSET_ALL_REG(emc_pmacro_ob_ddll_long_dqs_rank1_4),
- OFFSET_ALL_REG(emc_pmacro_ob_ddll_long_dqs_rank1_5),
- OFFSET_ALL_REG(emc_pmacro_ddll_long_cmd_0),
- OFFSET_ALL_REG(emc_pmacro_ddll_long_cmd_1),
- OFFSET_ALL_REG(emc_pmacro_ddll_long_cmd_2),
- OFFSET_ALL_REG(emc_pmacro_ddll_long_cmd_3),
- OFFSET_ALL_REG(emc_pmacro_ddll_long_cmd_4),
- offsetof(MarikoMtcTable, trim_regs.emc_pmacro_ob_ddll_long_dq_rank0_0),
- offsetof(MarikoMtcTable, trim_regs.emc_pmacro_ob_ddll_long_dq_rank0_1),
- offsetof(MarikoMtcTable, trim_regs.emc_pmacro_ob_ddll_long_dq_rank0_2),
- offsetof(MarikoMtcTable, trim_regs.emc_pmacro_ob_ddll_long_dq_rank0_3),
- offsetof(MarikoMtcTable, trim_regs.emc_pmacro_ob_ddll_long_dq_rank0_4),
- offsetof(MarikoMtcTable, trim_regs.emc_pmacro_ob_ddll_long_dq_rank0_5),
- offsetof(MarikoMtcTable, trim_regs.emc_pmacro_ob_ddll_long_dq_rank1_0),
- offsetof(MarikoMtcTable, trim_regs.emc_pmacro_ob_ddll_long_dq_rank1_1),
- offsetof(MarikoMtcTable, trim_regs.emc_pmacro_ob_ddll_long_dq_rank1_2),
- offsetof(MarikoMtcTable, trim_regs.emc_pmacro_ob_ddll_long_dq_rank1_3),
- };
- for (uint32_t i = 0; i < sizeof(ddll_high)/sizeof(uint32_t); i++)
- {
- uint32_t *ddll = reinterpret_cast(reinterpret_cast(target_table) + ddll_high[i]);
- uint32_t *ddll_ref = reinterpret_cast(reinterpret_cast(ref_table) + ddll_high[i]);
- uint16_t adjusted_ddll = ADJUST_BIT(*ddll, *ddll_ref, 26,16) & ((1 << 10) - 1);
- CLEAR_BIT(*ddll, 26,16)
- *ddll |= adjusted_ddll << 16;
- }
+ #define GET_CYCLE_CEIL(PARAM) std::ceil(double(PARAM) / tCK_avg)
- /* Section 2: adjust LOW bits: BIT 10:0 */
- const uint32_t ddll_low[] = {
- OFFSET_ALL_REG(emc_pmacro_ob_ddll_long_dq_rank1_4),
- OFFSET_ALL_REG(emc_pmacro_ob_ddll_long_dq_rank1_5),
- OFFSET_ALL_REG(emc_pmacro_ob_ddll_long_dqs_rank0_0),
- OFFSET_ALL_REG(emc_pmacro_ob_ddll_long_dqs_rank0_1),
- OFFSET_ALL_REG(emc_pmacro_ob_ddll_long_dqs_rank0_3),
- OFFSET_ALL_REG(emc_pmacro_ob_ddll_long_dqs_rank0_4),
- OFFSET_ALL_REG(emc_pmacro_ob_ddll_long_dqs_rank1_0),
- OFFSET_ALL_REG(emc_pmacro_ob_ddll_long_dqs_rank1_1),
- OFFSET_ALL_REG(emc_pmacro_ob_ddll_long_dqs_rank1_3),
- OFFSET_ALL_REG(emc_pmacro_ob_ddll_long_dqs_rank1_4),
- OFFSET_ALL_REG(emc_pmacro_ddll_long_cmd_0),
- OFFSET_ALL_REG(emc_pmacro_ddll_long_cmd_1),
- OFFSET_ALL_REG(emc_pmacro_ddll_long_cmd_2),
- OFFSET_ALL_REG(emc_pmacro_ddll_long_cmd_3),
- OFFSET_ALL_REG(emc_pmacro_ddll_long_cmd_4),
- offsetof(MarikoMtcTable, trim_regs.emc_pmacro_ob_ddll_long_dq_rank0_0),
- offsetof(MarikoMtcTable, trim_regs.emc_pmacro_ob_ddll_long_dq_rank0_1),
- offsetof(MarikoMtcTable, trim_regs.emc_pmacro_ob_ddll_long_dq_rank0_2),
- offsetof(MarikoMtcTable, trim_regs.emc_pmacro_ob_ddll_long_dq_rank0_3),
- offsetof(MarikoMtcTable, trim_regs.emc_pmacro_ob_ddll_long_dq_rank0_4),
- offsetof(MarikoMtcTable, trim_regs.emc_pmacro_ob_ddll_long_dq_rank0_5),
- offsetof(MarikoMtcTable, trim_regs.emc_pmacro_ob_ddll_long_dq_rank1_0),
- offsetof(MarikoMtcTable, trim_regs.emc_pmacro_ob_ddll_long_dq_rank1_1),
- offsetof(MarikoMtcTable, trim_regs.emc_pmacro_ob_ddll_long_dq_rank1_2),
- offsetof(MarikoMtcTable, trim_regs.emc_pmacro_ob_ddll_long_dq_rank1_3),
- };
- for (uint32_t i = 0; i < sizeof(ddll_low)/sizeof(uint32_t); i++)
- {
- uint32_t *ddll = reinterpret_cast(reinterpret_cast(target_table) + ddll_low[i]);
- uint32_t *ddll_ref = reinterpret_cast(reinterpret_cast(ref_table) + ddll_low[i]);
- uint16_t adjusted_ddll = ADJUST_BIT(*ddll, *ddll_ref, 10,0) & ((1 << 10) - 1);
- CLEAR_BIT(*ddll, 10,0)
- *ddll |= adjusted_ddll;
- }
+ WRITE_PARAM_ALL_REG(table, emc_rc, GET_CYCLE_CEIL(tRC));
+ WRITE_PARAM_ALL_REG(table, emc_rfc, GET_CYCLE_CEIL(tRFCab));
+ WRITE_PARAM_ALL_REG(table, emc_rfcpb, GET_CYCLE_CEIL(tRFCpb));
+ WRITE_PARAM_ALL_REG(table, emc_ras, GET_CYCLE_CEIL(tRAS));
+ WRITE_PARAM_ALL_REG(table, emc_rp, GET_CYCLE_CEIL(tRPpb));
+ WRITE_PARAM_ALL_REG(table, emc_rd_rcd, GET_CYCLE_CEIL(tRCD));
+ WRITE_PARAM_ALL_REG(table, emc_wr_rcd, GET_CYCLE_CEIL(tRCD));
+ WRITE_PARAM_ALL_REG(table, emc_rrd, GET_CYCLE_CEIL(tRRD));
+ WRITE_PARAM_ALL_REG(table, emc_refresh, REFRESH);
+ WRITE_PARAM_ALL_REG(table, emc_pre_refresh_req_cnt, REFRESH / 4);
+ WRITE_PARAM_ALL_REG(table, emc_pdex2wr, GET_CYCLE_CEIL(tPDEX2));
+ WRITE_PARAM_ALL_REG(table, emc_pdex2rd, GET_CYCLE_CEIL(tPDEX2));
+ WRITE_PARAM_ALL_REG(table, emc_act2pden,GET_CYCLE_CEIL(tACT2PDEN));
+ WRITE_PARAM_ALL_REG(table, emc_cke2pden,GET_CYCLE_CEIL(tCKE2PDEN));
+ WRITE_PARAM_ALL_REG(table, emc_pdex2mrr,GET_CYCLE_CEIL(tPDEX2MRR));
+ WRITE_PARAM_ALL_REG(table, emc_txsr, GET_CYCLE_CEIL(tXSR));
+ WRITE_PARAM_ALL_REG(table, emc_txsrdll, GET_CYCLE_CEIL(tXSR));
+ WRITE_PARAM_ALL_REG(table, emc_tcke, GET_CYCLE_CEIL(tCKE));
+ WRITE_PARAM_ALL_REG(table, emc_tckesr, GET_CYCLE_CEIL(tCKELPD));
+ WRITE_PARAM_ALL_REG(table, emc_tpd, GET_CYCLE_CEIL(tPD));
+ WRITE_PARAM_ALL_REG(table, emc_tfaw, GET_CYCLE_CEIL(tFAW));
+ WRITE_PARAM_ALL_REG(table, emc_trpab, GET_CYCLE_CEIL(tRPab));
+ WRITE_PARAM_ALL_REG(table, emc_trefbw, REFRESH + 64);
+
+ constexpr u32 MC_ARB_DIV = 4; // ?
+ table->burst_mc_regs.mc_emem_arb_timing_rcd = std::ceil(GET_CYCLE_CEIL(tRCD) / MC_ARB_DIV - 2);
+ table->burst_mc_regs.mc_emem_arb_timing_rp = std::ceil(GET_CYCLE_CEIL(tRPpb) / MC_ARB_DIV - 1);
+ table->burst_mc_regs.mc_emem_arb_timing_rc = std::ceil(std::max(GET_CYCLE_CEIL(tRC), GET_CYCLE_CEIL(tRAS)+GET_CYCLE_CEIL(tRPpb))/ MC_ARB_DIV);
+ table->burst_mc_regs.mc_emem_arb_timing_ras = std::ceil(GET_CYCLE_CEIL(tRAS) / MC_ARB_DIV - 2);
+ table->burst_mc_regs.mc_emem_arb_timing_faw = std::ceil(GET_CYCLE_CEIL(tFAW) / MC_ARB_DIV - 1);
+ table->burst_mc_regs.mc_emem_arb_timing_rrd = std::ceil(GET_CYCLE_CEIL(tRRD) / MC_ARB_DIV - 1);
+ table->burst_mc_regs.mc_emem_arb_timing_rap2pre = std::ceil(table->burst_regs.emc_r2p / MC_ARB_DIV);
+ table->burst_mc_regs.mc_emem_arb_timing_wap2pre = std::ceil(table->burst_regs.emc_w2p / MC_ARB_DIV);
+ table->burst_mc_regs.mc_emem_arb_timing_r2w = std::ceil(table->burst_regs.emc_r2w / MC_ARB_DIV + 1);
+ table->burst_mc_regs.mc_emem_arb_timing_w2r = std::ceil(table->burst_regs.emc_w2r / MC_ARB_DIV + 1);
+ table->burst_mc_regs.mc_emem_arb_timing_rfcpb = std::ceil(GET_CYCLE_CEIL(tRFCpb) / MC_ARB_DIV + 1); // ?
}
- ADJUST_BIT_ALL_REG_PAIR(target_table, ref_table, emc_zcal_wait_cnt, 21,16, 10,0)
- ADJUST_BIT_ALL_REG_PAIR(target_table, ref_table, emc_mrs_wait_cnt, 21,16, 10,0)
- ADJUST_BIT_ALL_REG_PAIR(target_table, ref_table, emc_mrs_wait_cnt2, 21,16, 10,0)
+ ADJUST_PARAM_ALL_REG(table, emc_r2w, ref);
+ ADJUST_PARAM_ALL_REG(table, emc_w2r, ref);
+ ADJUST_PARAM_ALL_REG(table, emc_r2p, ref);
+ ADJUST_PARAM_ALL_REG(table, emc_w2p, ref);
+ ADJUST_PARAM_ALL_REG(table, emc_trtm, ref);
+ ADJUST_PARAM_ALL_REG(table, emc_twtm, ref);
+ ADJUST_PARAM_ALL_REG(table, emc_tratm, ref);
+ ADJUST_PARAM_ALL_REG(table, emc_twatm, ref);
- ADJUST_BIT_ALL_REG_SINGLE_OP(target_table, ref_table, emc_auto_cal_channel, 5,0, | 0xC1E00300)
- ADJUST_BIT_ALL_REG_SINGLE_OP(target_table, ref_table, emc_pmacro_autocal_cfg_common, 5,0, | 8 << 8)
+ ADJUST_PARAM_ALL_REG(table, emc_rw2pden, ref);
- ADJUST_BIT_ALL_REG_PAIR(target_table, ref_table, emc_dyn_self_ref_control, 31,31, 15,0)
+ ADJUST_PARAM_ALL_REG(table, emc_tclkstop, ref);
- ADJUST_PARAM_ALL_REG(target_table, ref_table, emc_qpop);
+ ADJUST_PARAM_ALL_REG(table, emc_pmacro_dll_cfg_2, ref); // EMC_DLL_CFG_2_0: level select for VDDA?
- ADJUST_BIT_ALL_REG_SINGLE_OP(target_table, ref_table, emc_tr_timing_0, 9,0, | 0x1186100)
+ // ADJUST_PARAM_TABLE(table, dram_timings.rl); // not used on Mariko
- ADJUST_PARAM_ALL_REG(target_table, ref_table, emc_tr_rdv);
- target_table->burst_regs.emc_tr_rdv_mask = target_table->burst_regs.emc_tr_rdv + 2;
- target_table->shadow_regs_ca_train.emc_tr_rdv_mask = target_table->shadow_regs_ca_train.emc_tr_rdv + 2;
- target_table->shadow_regs_rdwr_train.emc_tr_rdv_mask = target_table->shadow_regs_rdwr_train.emc_tr_rdv + 2;
+ ADJUST_PARAM_TABLE(table, la_scale_regs.mc_mll_mpcorer_ptsa_rate, ref);
+ ADJUST_PARAM_TABLE(table, la_scale_regs.mc_ptsa_grant_decrement, ref);
- ADJUST_PARAM_ALL_REG(target_table, ref_table, emc_tr_qpop);
- ADJUST_PARAM_ALL_REG(target_table, ref_table, emc_tr_qsafe);
- ADJUST_BIT_ALL_REG_SINGLE_OP(target_table, ref_table, emc_tr_qrst, 6,0, | (6 << 16));
+ // ADJUST_PARAM_TABLE(table, min_mrs_wait); // not used on LPDDR4X
+ // ADJUST_PARAM_TABLE(table, latency); // not used
- ADJUST_BIT_ALL_REG_SINGLE_OP(target_table, ref_table, emc_training_vref_settle, 15,0, | (4 << 16));
-
- /* External Memory Arbitration Configuration */
- /* BIT 20:16 - EXTRA_TICKS_PER_UPDATE: 0 */
- /* BIT 8:0 - CYCLES_PER_UPDATE: 12(1600MHz), 10(1331.2MHz) */
- ADJUST_PARAM_TABLE(target_table, ref_table, burst_mc_regs.mc_emem_arb_cfg);
-
- /* External Memory Arbitration Configuration: Direction Arbiter: Turns */
- /* BIT 31:24 - W2R_TURN: approx. mc_emem_arb_timing_w2r */
- /* BIT 23:16 - R2W_TURN: approx. mc_emem_arb_timing_r2w */
- /* BIT 15:8 - W2W_TURN: 0 */
- /* BIT 7:0 - R2R_TURN: 0 */
+ /* Patch PLLMB divisors */
{
- uint32_t param_1600 = target_table->burst_mc_regs.mc_emem_arb_da_turns;
- uint32_t param_1331 = ref_table->burst_mc_regs.mc_emem_arb_da_turns;
- uint8_t w2r_turn = ADJUST_BIT(param_1600, param_1331, 31,24);
- uint8_t r2w_turn = ADJUST_BIT(param_1600, param_1331, 23,16);
- target_table->burst_mc_regs.mc_emem_arb_da_turns = w2r_turn << 24 | r2w_turn << 16;
- }
-
- /* External Memory Arbitration Configuration: Direction Arbiter: Covers */
- /* BIT 23:16 - RCD_W_COVER: 13(1600MHz), 11(1331.2MHz) */
- /* BIT 15:8 - RCD_R_COVER: 8(1600MHz), 7(1331.2MHz) */
- /* BIT 7:0 - RC_COVER: approx. mc_emem_arb_timing_rc, 12(1600MHz), 9(1331.2MHz) */
- {
- uint32_t param_1600 = target_table->burst_mc_regs.mc_emem_arb_da_covers;
- uint32_t param_1331 = ref_table->burst_mc_regs.mc_emem_arb_da_covers;
- uint8_t rcd_w_cover = ADJUST_BIT(param_1600, param_1331, 23,16);
- uint8_t rcd_r_cover = ADJUST_BIT(param_1600, param_1331, 15,8);
- uint8_t rc_cover = ADJUST_BIT(param_1600, param_1331, 7,0);
- target_table->burst_mc_regs.mc_emem_arb_da_covers = rcd_w_cover << 16 | rcd_r_cover << 8 | rc_cover;
- }
-
- /* External Memory Arbitration Configuration: Miscellaneous Thresholds (0) */
- /* BIT 20:16 - PRIORITY_INVERSION_ISO_THRESHOLD: 12(1600MHz), 10(1331.2MHz) */
- /* BIT 14:8 - PRIORITY_INVERSION_THRESHOLD: 36(1600MHz), 30(1331.2MHz) */
- /* BIT 7:0 - BC2AA_HOLDOFF_THRESHOLD: set to mc_emem_arb_timing_rc */
- {
- uint32_t param_1600 = target_table->burst_mc_regs.mc_emem_arb_misc0;
- uint32_t param_1331 = ref_table->burst_mc_regs.mc_emem_arb_misc0;
- uint8_t priority_inversion_iso_threshold = ADJUST_BIT(param_1600, param_1331, 20,16);
- uint8_t priority_inversion_threshold = ADJUST_BIT(param_1600, param_1331, 14,8);
- uint8_t bc2aa_holdoff_threshold = target_table->burst_mc_regs.mc_emem_arb_timing_rc;
- CLEAR_BIT(target_table->burst_mc_regs.mc_emem_arb_misc0, 20,16)
- CLEAR_BIT(target_table->burst_mc_regs.mc_emem_arb_misc0, 14,8)
- CLEAR_BIT(target_table->burst_mc_regs.mc_emem_arb_misc0, 7,0)
- target_table->burst_mc_regs.mc_emem_arb_misc0 |=
- (priority_inversion_iso_threshold << 16 | priority_inversion_threshold << 8 | bc2aa_holdoff_threshold);
- }
-
- /* Latency allowance settings */
- {
- /* Section 1: adjust write latency */
- /* BIT 23:16 - ALLOWANCE_WRITE: 128(1600MHz), 153(1331.2MHz) */
- const uint32_t latency_write_offset[] = {
- offsetof(MarikoMtcTable, la_scale_regs.mc_latency_allowance_xusb_0),
- offsetof(MarikoMtcTable, la_scale_regs.mc_latency_allowance_xusb_1),
- offsetof(MarikoMtcTable, la_scale_regs.mc_latency_allowance_tsec_0),
- offsetof(MarikoMtcTable, la_scale_regs.mc_latency_allowance_sdmmca_0),
- offsetof(MarikoMtcTable, la_scale_regs.mc_latency_allowance_sdmmcaa_0),
- offsetof(MarikoMtcTable, la_scale_regs.mc_latency_allowance_sdmmc_0),
- offsetof(MarikoMtcTable, la_scale_regs.mc_latency_allowance_sdmmcab_0),
- offsetof(MarikoMtcTable, la_scale_regs.mc_latency_allowance_ppcs_1),
- offsetof(MarikoMtcTable, la_scale_regs.mc_latency_allowance_mpcore_0),
- offsetof(MarikoMtcTable, la_scale_regs.mc_latency_allowance_avpc_0),
- offsetof(MarikoMtcTable, la_scale_regs.mc_latency_allowance_gpu_0),
- offsetof(MarikoMtcTable, la_scale_regs.mc_latency_allowance_gpu2_0),
- offsetof(MarikoMtcTable, la_scale_regs.mc_latency_allowance_nvenc_0),
- offsetof(MarikoMtcTable, la_scale_regs.mc_latency_allowance_nvdec_0),
- offsetof(MarikoMtcTable, la_scale_regs.mc_latency_allowance_vic_0),
- offsetof(MarikoMtcTable, la_scale_regs.mc_latency_allowance_isp2_1),
- };
- for (uint32_t i = 0; i < sizeof(latency_write_offset)/sizeof(uint32_t); i++)
- {
- uint32_t *latency = reinterpret_cast(reinterpret_cast(target_table) + latency_write_offset[i]);
- CLEAR_BIT(*latency, 23,16)
- *latency |= ADJUST_INVERSE(128) << 16;
+ // Calculate DIVM and DIVN (clock divisors)
+ // Common PLL oscillator is 38.4 MHz
+ // PLLMB_OUT = 38.4 MHz / PLLLMB_DIVM * PLLMB_DIVN
+ u32 divm = 1;
+ u32 divn = EmcClock / 38400;
+ u32 remainder = EmcClock % 38400;
+ if (remainder >= 38400 * (3/4)) {
+ divm = 4;
+ divn = divn * divm + 3;
+ } else
+ if (remainder >= 38400 * (2/3)) {
+ divm = 3;
+ divn = divn * divm + 2;
+ } else
+ if (remainder >= 38400 * (1/2)) {
+ divm = 2;
+ divn = divn * divm + 1;
+ } else
+ if (remainder >= 38400 * (1/3)) {
+ divm = 3;
+ divn = divn * divm + 1;
+ } else
+ if (remainder >= 38400 * (1/4)) {
+ divm = 4;
+ divn = divn * divm + 1;
}
- /* Section 2: adjust read latency */
- /* BIT 7:0 - ALLOWANCE_READ */
- const uint32_t latency_read_offset[] = {
- offsetof(MarikoMtcTable, la_scale_regs.mc_latency_allowance_hc_0),
- offsetof(MarikoMtcTable, la_scale_regs.mc_latency_allowance_hc_1),
- offsetof(MarikoMtcTable, la_scale_regs.mc_latency_allowance_gpu_0),
- offsetof(MarikoMtcTable, la_scale_regs.mc_latency_allowance_gpu2_0),
- offsetof(MarikoMtcTable, la_scale_regs.mc_latency_allowance_vic_0),
- offsetof(MarikoMtcTable, la_scale_regs.mc_latency_allowance_vi2_0),
- offsetof(MarikoMtcTable, la_scale_regs.mc_latency_allowance_isp2_1),
- };
- for (uint32_t i = 0; i < sizeof(latency_read_offset)/sizeof(uint32_t); i++)
- {
- uint32_t *latency = reinterpret_cast(reinterpret_cast(target_table) + latency_read_offset[i]);
- uint8_t adjusted_latency = ADJUST_INVERSE(TRIM_BIT(*latency, 7,0));
- CLEAR_BIT(*latency, 7,0)
- *latency |= adjusted_latency;
- }
+ table->pllmb_divm = divm;
+ table->pllmb_divn = divn;
}
- /* PLLM and PLLMB control */
+ #ifdef EXPERIMENTAL
{
- /*
- * CLK_RST_CONTROLLER_PLLM_SS_CTRL1:
- * BIT 31:16 : PLLM_SDM_SSC_MAX
- * BIT 15:0 : PLLM_SDM_SSC_MIN
- *
- * CLK_RST_CONTROLLER_PLLM_SS_CTRL2:
- * BIT 31:16 : PLLM_SDM_SSC_STEP
- * BIT 15:0 : PLLM_SDM_DIN
- *
- * pllm(b)_ss_ctrl1:
- * 1365, 342 (1600MHz)
- * 0xFAAB, 0xF404 (1331MHz)
- *
- * pllm(b)_ss_ctrl2:
- * 2, 1365 (1600MHz)
- * 6, 0xFAAB (1331MHz)
- *
- * No need to care about this if Spread Spectrum (SS) is disabled
- */
- // Disable PLL Spread Spectrum Control
- table->pll_en_ssc = 0;
- table->pllm_ss_cfg = 1 << 30;
+ #define ADJUST_PARAM_ROUND2_ALL_REG(TARGET_TABLE, REF_TABLE, PARAM) \
+ TARGET_TABLE->burst_regs.PARAM = \
+ ((ADJUST_PROP(TARGET_TABLE->burst_regs.PARAM, REF_TABLE->burst_regs.PARAM) + 1) >> 1) << 1; \
+ TARGET_TABLE->shadow_regs_ca_train.PARAM = \
+ ((ADJUST_PROP(TARGET_TABLE->shadow_regs_ca_train.PARAM, REF_TABLE->shadow_regs_ca_train.PARAM) + 1) >> 1) << 1; \
+ TARGET_TABLE->shadow_regs_rdwr_train.PARAM = \
+ ((ADJUST_PROP(TARGET_TABLE->shadow_regs_rdwr_train.PARAM, REF_TABLE->shadow_regs_rdwr_train.PARAM) + 1) >> 1) << 1;
+
+ #define ADJUST_PARAM(TARGET_PARAM, REF_PARAM) \
+ TARGET_PARAM = ADJUST_PROP(TARGET_PARAM, REF_PARAM);
+
+ #define ADJUST_PARAM_TABLE(TARGET_TABLE, REF_TABLE, PARAM) \
+ ADJUST_PARAM(TARGET_TABLE->PARAM, REF_TABLE->PARAM)
+
+ #define ADJUST_PARAM_ALL_REG(TARGET_TABLE, REF_TABLE, PARAM) \
+ ADJUST_PARAM_TABLE(TARGET_TABLE, REF_TABLE, burst_regs.PARAM) \
+ ADJUST_PARAM_TABLE(TARGET_TABLE, REF_TABLE, shadow_regs_ca_train.PARAM) \
+ ADJUST_PARAM_TABLE(TARGET_TABLE, REF_TABLE, shadow_regs_rdwr_train.PARAM)
+
+ #define TRIM_BIT(IN_BITS, HIGH, LOW) \
+ ((IN_BITS >> LOW) & ( (1u << (HIGH - LOW + 1u)) - 1u ))
+
+ #define ADJUST_BIT(TARGET_PARAM, REF_PARAM, HIGH, LOW) \
+ ADJUST_PROP(TRIM_BIT(TARGET_PARAM, HIGH, LOW), TRIM_BIT(REF_PARAM, HIGH, LOW))
+
+ #define CLEAR_BIT(BITS, HIGH, LOW) \
+ BITS = BITS & ~( ((1u << HIGH) << 1u) - (1u << LOW) );
+
+ #define ADJUST_BIT_ALL_REG_SINGLE_OP(TARGET_TABLE, REF_TABLE, PARAM, HIGH, LOW, OPERATION) \
+ TARGET_TABLE->burst_regs.PARAM = \
+ (ADJUST_BIT(TARGET_TABLE->burst_regs.PARAM, REF_TABLE->burst_regs.PARAM, HIGH, LOW) << LOW) OPERATION; \
+ TARGET_TABLE->shadow_regs_ca_train.PARAM = \
+ (ADJUST_BIT(TARGET_TABLE->shadow_regs_ca_train.PARAM, REF_TABLE->shadow_regs_ca_train.PARAM, HIGH, LOW)) << LOW OPERATION; \
+ TARGET_TABLE->shadow_regs_rdwr_train.PARAM = \
+ (ADJUST_BIT(TARGET_TABLE->shadow_regs_rdwr_train.PARAM, REF_TABLE->shadow_regs_rdwr_train.PARAM, HIGH, LOW)) << LOW OPERATION;
+
+ #define ADJUST_BIT_ALL_REG_PAIR(TARGET_TABLE, REF_TABLE, PARAM, HIGH1, LOW1, HIGH2, LOW2) \
+ TARGET_TABLE->burst_regs.PARAM = \
+ ADJUST_BIT(TARGET_TABLE->burst_regs.PARAM, REF_TABLE->burst_regs.PARAM, HIGH1, LOW1) << LOW1 \
+ | ADJUST_BIT(TARGET_TABLE->burst_regs.PARAM, REF_TABLE->burst_regs.PARAM, HIGH2, LOW2) << LOW2; \
+ TARGET_TABLE->shadow_regs_ca_train.PARAM = \
+ ADJUST_BIT(TARGET_TABLE->shadow_regs_ca_train.PARAM, REF_TABLE->shadow_regs_ca_train.PARAM, HIGH1, LOW1) << LOW1 \
+ | ADJUST_BIT(TARGET_TABLE->shadow_regs_ca_train.PARAM, REF_TABLE->shadow_regs_ca_train.PARAM, HIGH2, LOW2) << LOW2; \
+ TARGET_TABLE->shadow_regs_rdwr_train.PARAM = \
+ ADJUST_BIT(TARGET_TABLE->shadow_regs_rdwr_train.PARAM, REF_TABLE->shadow_regs_rdwr_train.PARAM, HIGH1, LOW1) << LOW1 \
+ | ADJUST_BIT(TARGET_TABLE->shadow_regs_rdwr_train.PARAM, REF_TABLE->shadow_regs_rdwr_train.PARAM, HIGH2, LOW2) << LOW2;
+
+ /* For latency allowance */
+ #define ADJUST_INVERSE(TARGET) ((TARGET*1000) / (EmcClock/1600))
+
+ /* emc_wdv, emc_wsv, emc_wev, emc_wdv_mask,
+ emc_quse, emc_quse_width, emc_ibdly, emc_obdly,
+ emc_einput, emc_einput_duration, emc_qrst, emc_qsafe,
+ emc_rdv, emc_rdv_mask, emc_rdv_early, emc_rdv_early_mask */
+ ADJUST_PARAM_ROUND2_ALL_REG(target_table, ref_table, emc_wdv);
+ ADJUST_PARAM_ROUND2_ALL_REG(target_table, ref_table, emc_wsv);
+ ADJUST_PARAM_ROUND2_ALL_REG(target_table, ref_table, emc_wev);
+ ADJUST_PARAM_ROUND2_ALL_REG(target_table, ref_table, emc_wdv_mask);
+
+ ADJUST_PARAM_ALL_REG(target_table, ref_table, emc_quse);
+ ADJUST_PARAM_ALL_REG(target_table, ref_table, emc_quse_width);
+
+ ADJUST_BIT_ALL_REG_SINGLE_OP(target_table, ref_table, emc_ibdly, 6,0, | (1 << 28));
+ ADJUST_BIT_ALL_REG_SINGLE_OP(target_table, ref_table, emc_obdly, 5,0, | (1 << 28));
+
+ ADJUST_PARAM_ALL_REG(target_table, ref_table, emc_einput);
+ ADJUST_PARAM_ALL_REG(target_table, ref_table, emc_einput_duration);
+
+ ADJUST_BIT_ALL_REG_SINGLE_OP(target_table, ref_table, emc_qrst, 6,0, | (6 << 16));
+ ADJUST_PARAM_ALL_REG(target_table, ref_table, emc_qsafe);
+
+ ADJUST_PARAM_ALL_REG(target_table, ref_table, emc_rdv);
+ target_table->burst_regs.emc_rdv_mask = target_table->burst_regs.emc_rdv + 2;
+ target_table->shadow_regs_ca_train.emc_rdv_mask = target_table->shadow_regs_ca_train.emc_rdv + 2;
+ target_table->shadow_regs_rdwr_train.emc_rdv_mask = target_table->shadow_regs_rdwr_train.emc_rdv + 2;
+
+ ADJUST_PARAM_ALL_REG(target_table, ref_table, emc_rdv_early);
+ target_table->burst_regs.emc_rdv_early_mask = target_table->burst_regs.emc_rdv_early + 2;
+ target_table->shadow_regs_ca_train.emc_rdv_early_mask = target_table->shadow_regs_ca_train.emc_rdv_early + 2;
+ target_table->shadow_regs_rdwr_train.emc_rdv_early_mask = target_table->shadow_regs_rdwr_train.emc_rdv_early + 2;
+
+ /* emc_pmacro_...,
+ emc_zcal_wait_cnt, emc_mrs_wait_cnt(2),
+ emc_pmacro_autocal_cfg_common, emc_dyn_self_ref_control, emc_qpop, emc_pmacro_cmd_pad_tx_ctrl,
+ emc_tr_timing_0, emc_tr_rdv, emc_tr_qpop, emc_tr_rdv_mask, emc_tr_qsafe, emc_tr_qrst,
+ emc_training_vref_settle */
+ /* DDLL values */
+ {
+ #define OFFSET_ALL_REG(PARAM) \
+ offsetof(MarikoMtcTable, burst_regs.PARAM), \
+ offsetof(MarikoMtcTable, shadow_regs_ca_train.PARAM), \
+ offsetof(MarikoMtcTable, shadow_regs_rdwr_train.PARAM) \
+
+ /* Section 1: adjust HI bits: BIT 26:16 */
+ const uint32_t ddll_high[] = {
+ OFFSET_ALL_REG(emc_pmacro_ob_ddll_long_dq_rank1_4),
+ OFFSET_ALL_REG(emc_pmacro_ob_ddll_long_dq_rank1_5),
+ OFFSET_ALL_REG(emc_pmacro_ob_ddll_long_dqs_rank0_4),
+ OFFSET_ALL_REG(emc_pmacro_ob_ddll_long_dqs_rank0_5),
+ OFFSET_ALL_REG(emc_pmacro_ob_ddll_long_dqs_rank1_4),
+ OFFSET_ALL_REG(emc_pmacro_ob_ddll_long_dqs_rank1_5),
+ OFFSET_ALL_REG(emc_pmacro_ddll_long_cmd_0),
+ OFFSET_ALL_REG(emc_pmacro_ddll_long_cmd_1),
+ OFFSET_ALL_REG(emc_pmacro_ddll_long_cmd_2),
+ OFFSET_ALL_REG(emc_pmacro_ddll_long_cmd_3),
+ OFFSET_ALL_REG(emc_pmacro_ddll_long_cmd_4),
+ offsetof(MarikoMtcTable, trim_regs.emc_pmacro_ob_ddll_long_dq_rank0_0),
+ offsetof(MarikoMtcTable, trim_regs.emc_pmacro_ob_ddll_long_dq_rank0_1),
+ offsetof(MarikoMtcTable, trim_regs.emc_pmacro_ob_ddll_long_dq_rank0_2),
+ offsetof(MarikoMtcTable, trim_regs.emc_pmacro_ob_ddll_long_dq_rank0_3),
+ offsetof(MarikoMtcTable, trim_regs.emc_pmacro_ob_ddll_long_dq_rank0_4),
+ offsetof(MarikoMtcTable, trim_regs.emc_pmacro_ob_ddll_long_dq_rank0_5),
+ offsetof(MarikoMtcTable, trim_regs.emc_pmacro_ob_ddll_long_dq_rank1_0),
+ offsetof(MarikoMtcTable, trim_regs.emc_pmacro_ob_ddll_long_dq_rank1_1),
+ offsetof(MarikoMtcTable, trim_regs.emc_pmacro_ob_ddll_long_dq_rank1_2),
+ offsetof(MarikoMtcTable, trim_regs.emc_pmacro_ob_ddll_long_dq_rank1_3),
+ };
+ for (uint32_t i = 0; i < sizeof(ddll_high)/sizeof(uint32_t); i++)
+ {
+ uint32_t *ddll = reinterpret_cast(reinterpret_cast(target_table) + ddll_high[i]);
+ uint32_t *ddll_ref = reinterpret_cast(reinterpret_cast(ref_table) + ddll_high[i]);
+ uint16_t adjusted_ddll = ADJUST_BIT(*ddll, *ddll_ref, 26,16) & ((1 << 10) - 1);
+ CLEAR_BIT(*ddll, 26,16)
+ *ddll |= adjusted_ddll << 16;
+ }
+
+ /* Section 2: adjust LOW bits: BIT 10:0 */
+ const uint32_t ddll_low[] = {
+ OFFSET_ALL_REG(emc_pmacro_ob_ddll_long_dq_rank1_4),
+ OFFSET_ALL_REG(emc_pmacro_ob_ddll_long_dq_rank1_5),
+ OFFSET_ALL_REG(emc_pmacro_ob_ddll_long_dqs_rank0_0),
+ OFFSET_ALL_REG(emc_pmacro_ob_ddll_long_dqs_rank0_1),
+ OFFSET_ALL_REG(emc_pmacro_ob_ddll_long_dqs_rank0_3),
+ OFFSET_ALL_REG(emc_pmacro_ob_ddll_long_dqs_rank0_4),
+ OFFSET_ALL_REG(emc_pmacro_ob_ddll_long_dqs_rank1_0),
+ OFFSET_ALL_REG(emc_pmacro_ob_ddll_long_dqs_rank1_1),
+ OFFSET_ALL_REG(emc_pmacro_ob_ddll_long_dqs_rank1_3),
+ OFFSET_ALL_REG(emc_pmacro_ob_ddll_long_dqs_rank1_4),
+ OFFSET_ALL_REG(emc_pmacro_ddll_long_cmd_0),
+ OFFSET_ALL_REG(emc_pmacro_ddll_long_cmd_1),
+ OFFSET_ALL_REG(emc_pmacro_ddll_long_cmd_2),
+ OFFSET_ALL_REG(emc_pmacro_ddll_long_cmd_3),
+ OFFSET_ALL_REG(emc_pmacro_ddll_long_cmd_4),
+ offsetof(MarikoMtcTable, trim_regs.emc_pmacro_ob_ddll_long_dq_rank0_0),
+ offsetof(MarikoMtcTable, trim_regs.emc_pmacro_ob_ddll_long_dq_rank0_1),
+ offsetof(MarikoMtcTable, trim_regs.emc_pmacro_ob_ddll_long_dq_rank0_2),
+ offsetof(MarikoMtcTable, trim_regs.emc_pmacro_ob_ddll_long_dq_rank0_3),
+ offsetof(MarikoMtcTable, trim_regs.emc_pmacro_ob_ddll_long_dq_rank0_4),
+ offsetof(MarikoMtcTable, trim_regs.emc_pmacro_ob_ddll_long_dq_rank0_5),
+ offsetof(MarikoMtcTable, trim_regs.emc_pmacro_ob_ddll_long_dq_rank1_0),
+ offsetof(MarikoMtcTable, trim_regs.emc_pmacro_ob_ddll_long_dq_rank1_1),
+ offsetof(MarikoMtcTable, trim_regs.emc_pmacro_ob_ddll_long_dq_rank1_2),
+ offsetof(MarikoMtcTable, trim_regs.emc_pmacro_ob_ddll_long_dq_rank1_3),
+ };
+ for (uint32_t i = 0; i < sizeof(ddll_low)/sizeof(uint32_t); i++)
+ {
+ uint32_t *ddll = reinterpret_cast(reinterpret_cast(target_table) + ddll_low[i]);
+ uint32_t *ddll_ref = reinterpret_cast(reinterpret_cast(ref_table) + ddll_low[i]);
+ uint16_t adjusted_ddll = ADJUST_BIT(*ddll, *ddll_ref, 10,0) & ((1 << 10) - 1);
+ CLEAR_BIT(*ddll, 10,0)
+ *ddll |= adjusted_ddll;
+ }
+ }
+
+ ADJUST_BIT_ALL_REG_PAIR(target_table, ref_table, emc_zcal_wait_cnt, 21,16, 10,0)
+ ADJUST_BIT_ALL_REG_PAIR(target_table, ref_table, emc_mrs_wait_cnt, 21,16, 10,0)
+ ADJUST_BIT_ALL_REG_PAIR(target_table, ref_table, emc_mrs_wait_cnt2, 21,16, 10,0)
+
+ ADJUST_BIT_ALL_REG_SINGLE_OP(target_table, ref_table, emc_auto_cal_channel, 5,0, | 0xC1E00300)
+ ADJUST_BIT_ALL_REG_SINGLE_OP(target_table, ref_table, emc_pmacro_autocal_cfg_common, 5,0, | 8 << 8)
+
+ ADJUST_BIT_ALL_REG_PAIR(target_table, ref_table, emc_dyn_self_ref_control, 31,31, 15,0)
+
+ ADJUST_PARAM_ALL_REG(target_table, ref_table, emc_qpop);
+
+ ADJUST_BIT_ALL_REG_SINGLE_OP(target_table, ref_table, emc_tr_timing_0, 9,0, | 0x1186100)
+
+ ADJUST_PARAM_ALL_REG(target_table, ref_table, emc_tr_rdv);
+ target_table->burst_regs.emc_tr_rdv_mask = target_table->burst_regs.emc_tr_rdv + 2;
+ target_table->shadow_regs_ca_train.emc_tr_rdv_mask = target_table->shadow_regs_ca_train.emc_tr_rdv + 2;
+ target_table->shadow_regs_rdwr_train.emc_tr_rdv_mask = target_table->shadow_regs_rdwr_train.emc_tr_rdv + 2;
+
+ ADJUST_PARAM_ALL_REG(target_table, ref_table, emc_tr_qpop);
+ ADJUST_PARAM_ALL_REG(target_table, ref_table, emc_tr_qsafe);
+ ADJUST_BIT_ALL_REG_SINGLE_OP(target_table, ref_table, emc_tr_qrst, 6,0, | (6 << 16));
+
+ ADJUST_BIT_ALL_REG_SINGLE_OP(target_table, ref_table, emc_training_vref_settle, 15,0, | (4 << 16));
+
+ /* External Memory Arbitration Configuration */
+ /* BIT 20:16 - EXTRA_TICKS_PER_UPDATE: 0 */
+ /* BIT 8:0 - CYCLES_PER_UPDATE: 12(1600MHz), 10(1331.2MHz) */
+ ADJUST_PARAM_TABLE(target_table, ref_table, burst_mc_regs.mc_emem_arb_cfg);
+
+ /* External Memory Arbitration Configuration: Direction Arbiter: Turns */
+ /* BIT 31:24 - W2R_TURN: approx. mc_emem_arb_timing_w2r */
+ /* BIT 23:16 - R2W_TURN: approx. mc_emem_arb_timing_r2w */
+ /* BIT 15:8 - W2W_TURN: 0 */
+ /* BIT 7:0 - R2R_TURN: 0 */
+ {
+ uint32_t param_1600 = target_table->burst_mc_regs.mc_emem_arb_da_turns;
+ uint32_t param_1331 = ref_table->burst_mc_regs.mc_emem_arb_da_turns;
+ uint8_t w2r_turn = ADJUST_BIT(param_1600, param_1331, 31,24);
+ uint8_t r2w_turn = ADJUST_BIT(param_1600, param_1331, 23,16);
+ target_table->burst_mc_regs.mc_emem_arb_da_turns = w2r_turn << 24 | r2w_turn << 16;
+ }
+
+ /* External Memory Arbitration Configuration: Direction Arbiter: Covers */
+ /* BIT 23:16 - RCD_W_COVER: 13(1600MHz), 11(1331.2MHz) */
+ /* BIT 15:8 - RCD_R_COVER: 8(1600MHz), 7(1331.2MHz) */
+ /* BIT 7:0 - RC_COVER: approx. mc_emem_arb_timing_rc, 12(1600MHz), 9(1331.2MHz) */
+ {
+ uint32_t param_1600 = target_table->burst_mc_regs.mc_emem_arb_da_covers;
+ uint32_t param_1331 = ref_table->burst_mc_regs.mc_emem_arb_da_covers;
+ uint8_t rcd_w_cover = ADJUST_BIT(param_1600, param_1331, 23,16);
+ uint8_t rcd_r_cover = ADJUST_BIT(param_1600, param_1331, 15,8);
+ uint8_t rc_cover = ADJUST_BIT(param_1600, param_1331, 7,0);
+ target_table->burst_mc_regs.mc_emem_arb_da_covers = rcd_w_cover << 16 | rcd_r_cover << 8 | rc_cover;
+ }
+
+ /* External Memory Arbitration Configuration: Miscellaneous Thresholds (0) */
+ /* BIT 20:16 - PRIORITY_INVERSION_ISO_THRESHOLD: 12(1600MHz), 10(1331.2MHz) */
+ /* BIT 14:8 - PRIORITY_INVERSION_THRESHOLD: 36(1600MHz), 30(1331.2MHz) */
+ /* BIT 7:0 - BC2AA_HOLDOFF_THRESHOLD: set to mc_emem_arb_timing_rc */
+ {
+ uint32_t param_1600 = target_table->burst_mc_regs.mc_emem_arb_misc0;
+ uint32_t param_1331 = ref_table->burst_mc_regs.mc_emem_arb_misc0;
+ uint8_t priority_inversion_iso_threshold = ADJUST_BIT(param_1600, param_1331, 20,16);
+ uint8_t priority_inversion_threshold = ADJUST_BIT(param_1600, param_1331, 14,8);
+ uint8_t bc2aa_holdoff_threshold = target_table->burst_mc_regs.mc_emem_arb_timing_rc;
+ CLEAR_BIT(target_table->burst_mc_regs.mc_emem_arb_misc0, 20,16)
+ CLEAR_BIT(target_table->burst_mc_regs.mc_emem_arb_misc0, 14,8)
+ CLEAR_BIT(target_table->burst_mc_regs.mc_emem_arb_misc0, 7,0)
+ target_table->burst_mc_regs.mc_emem_arb_misc0 |=
+ (priority_inversion_iso_threshold << 16 | priority_inversion_threshold << 8 | bc2aa_holdoff_threshold);
+ }
+
+ /* Latency allowance settings */
+ {
+ /* Section 1: adjust write latency */
+ /* BIT 23:16 - ALLOWANCE_WRITE: 128(1600MHz), 153(1331.2MHz) */
+ const uint32_t latency_write_offset[] = {
+ offsetof(MarikoMtcTable, la_scale_regs.mc_latency_allowance_xusb_0),
+ offsetof(MarikoMtcTable, la_scale_regs.mc_latency_allowance_xusb_1),
+ offsetof(MarikoMtcTable, la_scale_regs.mc_latency_allowance_tsec_0),
+ offsetof(MarikoMtcTable, la_scale_regs.mc_latency_allowance_sdmmca_0),
+ offsetof(MarikoMtcTable, la_scale_regs.mc_latency_allowance_sdmmcaa_0),
+ offsetof(MarikoMtcTable, la_scale_regs.mc_latency_allowance_sdmmc_0),
+ offsetof(MarikoMtcTable, la_scale_regs.mc_latency_allowance_sdmmcab_0),
+ offsetof(MarikoMtcTable, la_scale_regs.mc_latency_allowance_ppcs_1),
+ offsetof(MarikoMtcTable, la_scale_regs.mc_latency_allowance_mpcore_0),
+ offsetof(MarikoMtcTable, la_scale_regs.mc_latency_allowance_avpc_0),
+ offsetof(MarikoMtcTable, la_scale_regs.mc_latency_allowance_gpu_0),
+ offsetof(MarikoMtcTable, la_scale_regs.mc_latency_allowance_gpu2_0),
+ offsetof(MarikoMtcTable, la_scale_regs.mc_latency_allowance_nvenc_0),
+ offsetof(MarikoMtcTable, la_scale_regs.mc_latency_allowance_nvdec_0),
+ offsetof(MarikoMtcTable, la_scale_regs.mc_latency_allowance_vic_0),
+ offsetof(MarikoMtcTable, la_scale_regs.mc_latency_allowance_isp2_1),
+ };
+ for (uint32_t i = 0; i < sizeof(latency_write_offset)/sizeof(uint32_t); i++)
+ {
+ uint32_t *latency = reinterpret_cast(reinterpret_cast(target_table) + latency_write_offset[i]);
+ CLEAR_BIT(*latency, 23,16)
+ *latency |= ADJUST_INVERSE(128) << 16;
+ }
+
+ /* Section 2: adjust read latency */
+ /* BIT 7:0 - ALLOWANCE_READ */
+ const uint32_t latency_read_offset[] = {
+ offsetof(MarikoMtcTable, la_scale_regs.mc_latency_allowance_hc_0),
+ offsetof(MarikoMtcTable, la_scale_regs.mc_latency_allowance_hc_1),
+ offsetof(MarikoMtcTable, la_scale_regs.mc_latency_allowance_gpu_0),
+ offsetof(MarikoMtcTable, la_scale_regs.mc_latency_allowance_gpu2_0),
+ offsetof(MarikoMtcTable, la_scale_regs.mc_latency_allowance_vic_0),
+ offsetof(MarikoMtcTable, la_scale_regs.mc_latency_allowance_vi2_0),
+ offsetof(MarikoMtcTable, la_scale_regs.mc_latency_allowance_isp2_1),
+ };
+ for (uint32_t i = 0; i < sizeof(latency_read_offset)/sizeof(uint32_t); i++)
+ {
+ uint32_t *latency = reinterpret_cast(reinterpret_cast(target_table) + latency_read_offset[i]);
+ uint8_t adjusted_latency = ADJUST_INVERSE(TRIM_BIT(*latency, 7,0));
+ CLEAR_BIT(*latency, 7,0)
+ *latency |= adjusted_latency;
+ }
+ }
+
+ /* PLLM and PLLMB control */
+ {
+ /*
+ * CLK_RST_CONTROLLER_PLLM_SS_CTRL1:
+ * BIT 31:16 : PLLM_SDM_SSC_MAX
+ * BIT 15:0 : PLLM_SDM_SSC_MIN
+ *
+ * CLK_RST_CONTROLLER_PLLM_SS_CTRL2:
+ * BIT 31:16 : PLLM_SDM_SSC_STEP
+ * BIT 15:0 : PLLM_SDM_DIN
+ *
+ * pllm(b)_ss_ctrl1:
+ * 1365, 342 (1600MHz)
+ * 0xFAAB, 0xF404 (1331MHz)
+ *
+ * pllm(b)_ss_ctrl2:
+ * 2, 1365 (1600MHz)
+ * 6, 0xFAAB (1331MHz)
+ *
+ * No need to care about this if Spread Spectrum (SS) is disabled
+ */
+ // Disable PLL Spread Spectrum Control
+ table->pll_en_ssc = 0;
+ table->pllm_ss_cfg = 1 << 30;
+ }
+
+ /* EMC misc. configuration */
+ {
+ /* ? Command Trigger: MRW, MRW2: MRW_OP - [PMC] data to be written ?
+ *
+ * EMC_MRW: MRW_OP
+ * 1600 MHz: 0x54
+ * 1331 MHz: 0x44
+ * 1065 MHz: 0x34
+ * 800 MHz: 0x34
+ * 665 MHz: 0x14
+ * 408 MHz: 0x04
+ * 204 MHz: 0x04
+ *
+ * EMC_MRW2: MRW2_OP
+ * 1600 MHz: 0x2D 45 5*9
+ * 1331 MHz: 0x24 36 4*9
+ * 1065 MHz: 0x1B 27 3*9
+ * 800 MHz: 0x12 18 2*9
+ * 665 MHz: 0x09 9 1*9
+ * 408 MHz: 0x00
+ * 204 MHz: 0x00
+ */
+ {
+
+ }
+
+ /* EMC_CFG_2 */
+ /* BIT 5:3 - ZQ_EXTRA_DELAY: 6(1600MHz), 5(1331.2MHz), max possible value: 7 */
+ {
+ CLEAR_BIT(target_table->emc_cfg_2, 5,3)
+ target_table->emc_cfg_2 |= 7 << 3;
+ }
+ }
+ }
+ #endif
+ }
+
+ #pragma GCC diagnostic ignored "-Wunused-variable"
+ void ApplyAutoPcvPatch(uintptr_t mapped_nso, size_t nso_size) {
+ /* Abort immediately once something goes wrong */
+ bool isMariko = (spl::GetSocType() == spl::SocType_Mariko);
+
+ constexpr u32 emcMaxClockMaxCnt = 30;
+ constexpr u32 cpuMaxVoltMarikoMaxCnt = 13;
+ constexpr u32 mtcTableMarikoMaxCnt = 13;
+ constexpr u32 gpuMaxClockMarikoReqCnt = 2;
+
+ constexpr u32 cpuClockVddCpuPatternNext = 0;
+ constexpr u32 cpuTableMarikoPatternNext = 1527196;
+ // constexpr u32 cpuTableEristaPatternNext = 1227500;
+
+ constexpr u32 cpuMinVolt[] = { 800, 637, 620, 610 };
+
+ u8 emcMaxClock {};
+ u8 cpuClockVddCpu {};
+ u8 cpuTableMariko {};
+ // u8 cpuTableErista {};
+ u8 gpuTableMariko {};
+ u8 cpuMaxVoltMariko {};
+ u8 mtcTableMariko {};
+ u8 dvbTableMariko {};
+ u8 gpuMaxClockMariko {};
+ u8 gpuMaxClockMarikoRd {};
+ u32 gpuMaxClockPattern[2] = { 0x528E0000, 0x72A002E0 }; // 1536 MHz
+
+ uintptr_t i = mapped_nso;
+ while (i <= mapped_nso + nso_size - sizeof(MarikoMtcTable))
+ {
+ u32 value = *(reinterpret_cast(i));
+
+ #ifdef EXPERIMENTAL
+ if (isMariko)
+ {
+ // CPU Table
+ if (value == 1785'000)
+ {
+ u32 value_next2 = *(reinterpret_cast(i + sizeof(u32) * 2));
+ if (value_next2 == cpuClockVddCpuPatternNext)
+ {
+ u32 value_next = *(reinterpret_cast(i + sizeof(u32)));
+ if (value_next == cpuClockVddCpuPatternNext)
+ {
+ std::memcpy(reinterpret_cast(i), &CpuMaxClock, sizeof(CpuMaxClock));
+ cpuClockVddCpu++;
+ }
+ }
+
+ if (value_next2 == cpuTableMarikoPatternNext)
+ {
+ uintptr_t entry_1963 = i + 2 * sizeof(cpu_freq_cvb_table_t);
+ uintptr_t free_space = entry_1963 + sizeof(cpu_freq_cvb_table_t);
+ uintptr_t entry_204 = free_space - 18 * sizeof(cpu_freq_cvb_table_t);
+
+ if ( *(reinterpret_cast(entry_1963)) == 1963'500
+ && *(reinterpret_cast(free_space)) == 0
+ && *(reinterpret_cast(entry_204)) == 204'000 )
+ {
+ std::memcpy(reinterpret_cast(free_space), NewCpuTables, sizeof(NewCpuTables));
+ cpuTableMariko++;
+
+ // Patch CPU max volt (1120'000) in CPU dvfs table
+ for (u32 i = 0; i < 18; i++)
+ {
+ void* max_volt_dvfs = reinterpret_cast(free_space - i * sizeof(cpu_freq_cvb_table_t) - sizeof(cvb_coefficients));
+ std::memcpy(max_volt_dvfs, &NewCpuVoltageScaled, sizeof(NewCpuVoltageScaled));
+ }
+ }
+ }
+ }
+
+ // GPU Table
+ if (value == 1267'200)
+ {
+ u32 free_space = i + sizeof(gpu_cvb_pll_table_t);
+ if (*(reinterpret_cast(free_space)) == 0)
+ {
+ std::memcpy(reinterpret_cast(free_space), NewGpuTables, sizeof(NewGpuTables));
+ gpuTableMariko++;
+ }
+ }
+
+ // CPU voltage range
+ if (value == 1120)
+ {
+ u32 value_cpu_min_volt = *(reinterpret_cast(i - sizeof(u32)));
+
+ for (u32 j = 0; j < sizeof(cpuMinVolt)/sizeof(u32); j++)
+ {
+ if (cpuMinVolt[j] == value_cpu_min_volt)
+ {
+ // acceptable cpu min volt, patch max volt
+ std::memcpy(reinterpret_cast(i), &CpuVoltageLimit, sizeof(CpuVoltageLimit));
+ cpuMaxVoltMariko++;
+ break;
+ }
+ }
+ }
+
+ // GPU Max Clock asm
+ {
+ // Pattern:
+ // *
+ // * MOV W11, #0x1000 MOV (wide immediate) 0x1000 0xB (11)
+ // * sf | opc | | hw | imm16 | Rd
+ // * #31 |30 29|28 27 26 25 24 23|22 21|20 19 18 17 16 15 14 13 12 11 10 9 8 7 6 5 |4 3 2 1 0
+ // * 0 | 1 0 | 1 0 0 1 0 1| 0 0| 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 |0 1 0 1 1
+ // *
+ // * MOVK W11, #0xE, LSL#16 16 0xE 0xB (11)
+ // * sf | opc | | hw | imm16 | Rd
+ // * #31 |30 29|28 27 26 25 24 23|22 21|20 19 18 17 16 15 14 13 12 11 10 9 8 7 6 5 |4 3 2 1 0
+ // * 0 | 1 1 | 1 0 0 1 0 1| 0 1| 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 0 |0 1 0 1 1
+
+ constexpr u32 mov_w_0x1000_hi = 0x52820000 >> 5;
+ constexpr u32 movk_w_0xE_shift16_hi = 0x72A001C0 >> 5;
+
+ u32 value_hi = value >> 5;
+ u32 value_lo = value & ((1 << 5) - 1);
+ if (value_hi == mov_w_0x1000_hi)
+ {
+ u32 value_next = *(reinterpret_cast(i + sizeof(u32)));
+ u32 value_next_hi = value_next >> 5;
+ u32 value_next_lo = value_next & ((1 << 5) - 1);
+ if (value_next_hi == movk_w_0xE_shift16_hi && value_next_lo == value_lo)
+ {
+ if (!gpuMaxClockMarikoRd)
+ gpuMaxClockMarikoRd = value_lo;
+ if (gpuMaxClockMarikoRd != value_lo)
+ AMS_ABORT("gpuMaxClockMarikoRd not consistent!");
+
+ gpuMaxClockPattern[0] |= gpuMaxClockMarikoRd;
+ gpuMaxClockPattern[1] |= gpuMaxClockMarikoRd;
+
+ std::memcpy(reinterpret_cast(i), gpuMaxClockPattern, sizeof(gpuMaxClockPattern));
+ gpuMaxClockMariko++;
+ }
+ }
+ }
+ }
+ #endif
+
+ {
+ // MEM
+ if (value == 1600'000)
+ {
+ if (isMariko)
+ {
+ u32 value_next = *(reinterpret_cast(i + sizeof(u32)));
+ u32 value_next2 = *(reinterpret_cast(i + sizeof(u32) * 2));
+
+ if (value_next == 1100) // min_volt in MtcTable
+ {
+ uintptr_t offset = i - offsetof(MarikoMtcTable, rate_khz);
+ uintptr_t offset_prev = offset - sizeof(MarikoMtcTable);
+
+ MarikoMtcTable* const mtc_table_new = reinterpret_cast(offset);
+ MarikoMtcTable* const mtc_table_old = reinterpret_cast(offset_prev);
+ if (mtc_table_new->rev != 3 || mtc_table_old->rev != 3 || mtc_table_old->rate_khz != 1331'200)
+ AMS_ABORT("mtc_table");
+
+ // Replace 1331 MHz with 1600 MHz
+ std::memcpy(reinterpret_cast(mtc_table_old), reinterpret_cast(mtc_table_new), sizeof(MarikoMtcTable));
+ mtcTableMariko++;
+
+ // Generate new table for Max MHz
+ // [!TODO] ref table is identical to new table, leaving some params unchanged
+ AdjustMtcTable(mtc_table_new, mtc_table_old);
+ }
+ else if (value_next2 == 675) // Mariko Dvb Table
+ {
+ u32 dvb_1331_offset = i - sizeof(emc_dvb_dvfs_table_t);
+
+ u32 value_1331_entry = *(reinterpret_cast(dvb_1331_offset));
+ if (value_1331_entry == 1331'200)
+ {
+ const u32 dvb_1600_clk = 1600'000;
+ std::memcpy(reinterpret_cast(dvb_1331_offset), &dvb_1600_clk, sizeof(dvb_1600_clk));
+ dvbTableMariko++;
+ }
+ }
+ }
+
+ // Patch Max Emc Clock for both Erista and Mariko
+ std::memcpy(reinterpret_cast(i), &EmcClock, sizeof(EmcClock));
+ emcMaxClock++;
+ }
+ }
+
+ i += sizeof(u32);
}
- /* EMC misc. configuration */
+ if (isMariko)
{
- /* ? Command Trigger: MRW, MRW2: MRW_OP - [PMC] data to be written ?
- *
- * EMC_MRW: MRW_OP
- * 1600 MHz: 0x54
- * 1331 MHz: 0x44
- * 1065 MHz: 0x34
- * 800 MHz: 0x34
- * 665 MHz: 0x14
- * 408 MHz: 0x04
- * 204 MHz: 0x04
- *
- * EMC_MRW2: MRW2_OP
- * 1600 MHz: 0x2D 45 5*9
- * 1331 MHz: 0x24 36 4*9
- * 1065 MHz: 0x1B 27 3*9
- * 800 MHz: 0x12 18 2*9
- * 665 MHz: 0x09 9 1*9
- * 408 MHz: 0x00
- * 204 MHz: 0x00
- */
- {
-
- }
-
- /* EMC_CFG_2 */
- /* BIT 5:3 - ZQ_EXTRA_DELAY: 6(1600MHz), 5(1331.2MHz), max possible value: 7 */
- {
- CLEAR_BIT(target_table->emc_cfg_2, 5,3)
- target_table->emc_cfg_2 |= 7 << 3;
- }
+ // if (cpuClockVddCpu != 1)
+ // AMS_ABORT("cpuClockVddCpu");
+ // if (cpuTableMariko != 1)
+ // AMS_ABORT("cpuTableMariko");
+ // if (gpuTableMariko != 1)
+ // AMS_ABORT("gpuTableMariko");
+ if (dvbTableMariko != 1)
+ AMS_ABORT("dvbTableMariko");
+ // if (cpuMaxVoltMariko > cpuMaxVoltMarikoMaxCnt || !cpuMaxVoltMariko)
+ // AMS_ABORT("cpuMaxVoltMariko");
+ if (mtcTableMariko > mtcTableMarikoMaxCnt || !mtcTableMariko)
+ AMS_ABORT("mtcTableMariko");
+ // if (gpuMaxClockMariko != gpuMaxClockMarikoReqCnt)
+ // AMS_ABORT("gpuMaxClockMariko");
+ }
+ {
+ if (emcMaxClock > emcMaxClockMaxCnt || !emcMaxClock)
+ AMS_ABORT("emcMaxClock");
+ }
+ }
+ #pragma GCC diagnostic error "-Wunused-variable"
+ }
+
+ namespace ptm {
+ void ApplyAutoPtmPatch(uintptr_t mapped_nso, size_t nso_size) {
+ /* No abort here as ptm is not that critical */
+ if (spl::GetSocType() == spl::SocType_Erista)
+ return;
+
+ uintptr_t emcOffsetStart = 0;
+ constexpr u32 OffsetInterval = 0x20;
+ constexpr u32 emcOffsetCnt = 16;
+ constexpr u32 EmcMaxClk = EmcClock * 1000;
+ constexpr u32 Emc1600Clk = 1600'000'000;
+
+ uintptr_t i = mapped_nso;
+
+ while (i <= mapped_nso + nso_size)
+ {
+ u32 value = *(reinterpret_cast(i));
+
+ if (value == 1600'000'000)
+ {
+ emcOffsetStart = i;
+ break;
+ }
+
+ i += sizeof(u32);
+ }
+
+ if (!emcOffsetStart)
+ return;
+
+ for (u32 j = 0; j < emcOffsetCnt; j++)
+ {
+ uintptr_t offset = emcOffsetStart + OffsetInterval * j;
+ uintptr_t offset_next = offset + sizeof(u32);
+
+ if (offset_next > mapped_nso + nso_size)
+ return;
+
+ u32 value = *(reinterpret_cast(offset));
+ u32 value_next = *(reinterpret_cast(offset_next));
+
+ if (value != value_next)
+ return;
+
+ u32 value_patched = value;
+
+ switch (value)
+ {
+ case 1600'000'000:
+ value_patched = EmcMaxClk;
+ break;
+ case 1331'200'000:
+ case 1065'600'000:
+ value_patched = Emc1600Clk;
+ break;
+ default:
+ return;
+ }
+
+ std::memcpy(reinterpret_cast(offset), &value_patched, sizeof(value_patched));
+ std::memcpy(reinterpret_cast(offset_next), &value_patched, sizeof(value_patched));
}
}
- #endif
}
- /* Unlock the second sub-partition for retail Mariko, and double the bandwidth (~60GB/s)
- * https://github.com/CTCaer/hekate/blob/01b6e645b3cb69ddf28cc9eff40c4b35bf03dbd4/bdk/mem/sdram.h#L30
- *
- * Sub-partitions are defined as ranks, so there is no other way than replacing DRAM chips.
- */
}
-
-namespace ptm {
- constexpr u32 EmcOffsetStart[] = {
- // 0xC5E24,
- 0xA032C,
- };
-
- constexpr u32 OffsetInterval = 0x20;
-
- constexpr u32 CpuBoostOffset = 0x170;
-}
\ No newline at end of file
diff --git a/Source/Atmosphere/stratosphere/loader/source/ldr_patcher.cpp b/Source/Atmosphere/stratosphere/loader/source/ldr_patcher.cpp
deleted file mode 100644
index cb06823f..00000000
--- a/Source/Atmosphere/stratosphere/loader/source/ldr_patcher.cpp
+++ /dev/null
@@ -1,232 +0,0 @@
-/*
- * Copyright (c) Atmosphère-NX
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see .
- */
-#include
-#include "ldr_patcher.hpp"
-
-namespace ams::ldr {
-
- namespace {
-
- constexpr const char *NsoPatchesDirectory = "exefs_patches";
-
- /* Exefs patches want to prevent modification of header, */
- /* and also want to adjust offset relative to mapped location. */
- constexpr size_t NsoPatchesProtectedSize = sizeof(NsoHeader);
- constexpr size_t NsoPatchesProtectedOffset = sizeof(NsoHeader);
-
- constexpr const char * const LoaderSdMountName = "#amsldr-sdpatch";
- static_assert(sizeof(LoaderSdMountName) <= fs::MountNameLengthMax);
-
- constinit os::SdkMutex g_ldr_sd_lock;
- constinit bool g_mounted_sd;
-
- constinit os::SdkMutex g_embedded_patch_lock;
- constinit bool g_got_embedded_patch_settings;
- constinit bool g_force_enable_usb30;
-
- bool EnsureSdCardMounted() {
- std::scoped_lock lk(g_ldr_sd_lock);
-
- if (g_mounted_sd) {
- return true;
- }
-
- if (!cfg::IsSdCardInitialized()) {
- return false;
- }
-
- if (R_FAILED(fs::MountSdCard(LoaderSdMountName))) {
- return false;
- }
-
- return (g_mounted_sd = true);
- }
-
- bool IsUsb30ForceEnabled() {
- std::scoped_lock lk(g_embedded_patch_lock);
-
- if (!g_got_embedded_patch_settings) {
- g_force_enable_usb30 = spl::IsUsb30ForceEnabled();
- g_got_embedded_patch_settings = true;
- }
-
- return g_force_enable_usb30;
- }
-
- u32 GetEmcClock() {
- // RAM freqs from Hekate:
- // 1862400, 1894400, 1932800, 1996800, 2064000, 2099200, 2131200
- // Other values might work as well
- // RAM overclock could be UNSTABLE and generate graphical glitches / instabilities / NAND corruption
- return 1862400;
- }
-
- // u32 GetCpuBoostClock() {
- // return 1963500;
- // }
-
- consteval u8 ParseNybble(char c) {
- AMS_ASSUME(('0' <= c && c <= '9') || ('A' <= c && c <= 'F') || ('a' <= c && c <= 'f'));
- if ('0' <= c && c <= '9') {
- return c - '0' + 0x0;
- } else if ('A' <= c && c <= 'F') {
- return c - 'A' + 0xA;
- } else /* if ('a' <= c && c <= 'f') */ {
- return c - 'a' + 0xa;
- }
- }
-
- consteval ro::ModuleId ParseModuleId(const char *str) {
- /* Parse a static module id. */
- ro::ModuleId module_id = {};
-
- size_t ofs = 0;
- while (str[0] != 0) {
- AMS_ASSUME(ofs < sizeof(module_id));
- AMS_ASSUME(str[1] != 0);
-
- module_id.data[ofs] = (ParseNybble(str[0]) << 4) | (ParseNybble(str[1]) << 0);
-
- str += 2;
- ofs++;
- }
-
- return module_id;
- }
-
- struct EmbeddedPatchEntry {
- uintptr_t offset;
- const void * const data;
- size_t size;
- };
-
- struct EmbeddedPatch {
- ro::ModuleId module_id;
- size_t num_entries;
- const EmbeddedPatchEntry *entries;
- };
-
- #include "ldr_embedded_usb_patches.inc"
-
- }
-
- #include "ldr_oc_patch.hpp"
-
- /* Apply IPS patches. */
- void LocateAndApplyIpsPatchesToModule(const u8 *module_id_data, uintptr_t mapped_nso, size_t mapped_size) {
- if (!EnsureSdCardMounted()) {
- return;
- }
-
- ro::ModuleId module_id;
- std::memcpy(std::addressof(module_id.data), module_id_data, sizeof(module_id.data));
- ams::patcher::LocateAndApplyIpsPatchesToModule(LoaderSdMountName, NsoPatchesDirectory, NsoPatchesProtectedSize, NsoPatchesProtectedOffset, std::addressof(module_id), reinterpret_cast(mapped_nso), mapped_size);
- }
-
- /* Apply embedded patches. */
- void ApplyEmbeddedPatchesToModule(const u8 *module_id_data, uintptr_t mapped_nso, size_t mapped_size) {
- /* Make module id. */
- ro::ModuleId module_id;
- std::memcpy(std::addressof(module_id.data), module_id_data, sizeof(module_id.data));
-
- if (IsUsb30ForceEnabled()) {
- for (const auto &patch : Usb30ForceEnablePatches) {
- if (std::memcmp(std::addressof(patch.module_id), std::addressof(module_id), sizeof(module_id)) == 0) {
- for (size_t i = 0; i < patch.num_entries; ++i) {
- const auto &entry = patch.entries[i];
- if (entry.offset + entry.size <= mapped_size) {
- std::memcpy(reinterpret_cast(mapped_nso + entry.offset), entry.data, entry.size);
- }
- }
- }
- }
- }
-
- u32 EmcClock = GetEmcClock();
- if (spl::GetSocType() == spl::SocType_Mariko && EmcClock) {
- for (u32 i = 0; i < sizeof(PcvModuleId)/sizeof(ro::ModuleId); i++) {
- if (std::memcmp(std::addressof(PcvModuleId[i]), std::addressof(module_id), sizeof(module_id)) == 0) {
- /* Add new CPU and GPU clock tables for Mariko */
- std::memcpy(reinterpret_cast(mapped_nso + pcv::CpuTablesFreeSpace[i]), pcv::NewCpuTables, sizeof(pcv::NewCpuTables));
- std::memcpy(reinterpret_cast(mapped_nso + pcv::GpuTablesFreeSpace[i]), pcv::NewGpuTables, sizeof(pcv::NewGpuTables));
-
- /* Patch Mariko max CPU and GPU clockrates */
- std::memcpy(reinterpret_cast(mapped_nso + pcv::MaxCpuClockOffset[i]), &pcv::NewMaxCpuClock, sizeof(pcv::NewMaxCpuClock));
- std::memcpy(reinterpret_cast(mapped_nso + pcv::Reg1MaxGpuOffset[i]), pcv::Reg1NewMaxGpuClock, sizeof(pcv::Reg1NewMaxGpuClock[i]));
- std::memcpy(reinterpret_cast(mapped_nso + pcv::Reg2MaxGpuOffset[i]), pcv::Reg2NewMaxGpuClock, sizeof(pcv::Reg2NewMaxGpuClock[i]));
-
- /* Patch max cpu voltage on Mariko */
- for (u32 j = 0; j < sizeof(pcv::CpuVoltageLimitOffsets[i])/sizeof(u32); j++) {
- std::memcpy(reinterpret_cast(mapped_nso + pcv::CpuVoltageLimitOffsets[i][j]), &pcv::NewCpuVoltageLimit, sizeof(pcv::NewCpuVoltageLimit));
- }
- for (u32 j = 0; j < sizeof(pcv::CpuVoltageOldTableCoeff[i])/sizeof(u32); j++) {
- std::memcpy(reinterpret_cast(mapped_nso + pcv::CpuVoltageOldTableCoeff[i][j]), &pcv::NewCpuVoltageScaled, sizeof(pcv::NewCpuVoltageScaled));
- }
-
- for (u32 j = 0; j < sizeof(pcv::MtcTable_1600[i])/sizeof(u32); j++) {
- pcv::MarikoMtcTable* mtc_table_new = reinterpret_cast(mapped_nso + pcv::MtcTable_1600[i][j]);
- pcv::MarikoMtcTable* mtc_table_old = reinterpret_cast(mapped_nso + pcv::MtcTable_1600[i][j] - pcv::MtcTableOffset);
-
- /* Replace 1331 MHz with 1600 MHz, not possible without proper timings for oc clock */
- std::memcpy(reinterpret_cast(mtc_table_old), reinterpret_cast(mtc_table_new), sizeof(pcv::MarikoMtcTable));
-
- /* Generate new table for OC MHz */
- pcv::AdjustMtcTable(mtc_table_new, mtc_table_old);
- }
-
- /* Patch RAM Clock */
- for (u32 j = 0; j < sizeof(pcv::EmcFreqOffsets[i])/sizeof(u32); j++) {
- std::memcpy(reinterpret_cast(mapped_nso + pcv::EmcFreqOffsets[i][j]), &EmcClock, sizeof(EmcClock));
- }
-
- /* Replace 1331 MHz with 1600 MHz in EmcDvbTable */
- const u32 mem1331 = 1600'000;
- std::memcpy(reinterpret_cast(mapped_nso + pcv::EmcDvb1331[i]), &mem1331, sizeof(mem1331));
- }
- }
-
- u32 PtmEmcClk1600 = GetEmcClock() * 1000;
- const u32 PtmEmcClk1331 = 1600'000'000;
-
- // u32 CpuBoostClock = GetCpuBoostClock() * 1000;
-
- /* Patch Ptm for coexistent of 1600 MHz and OC clock */
- for (u32 i = 0; i < sizeof(PtmModuleId)/sizeof(ro::ModuleId); i++) {
- if (std::memcmp(std::addressof(PtmModuleId[i]), std::addressof(module_id), sizeof(module_id)) == 0) {
- for (u32 j = 0; j < 6; j++) {
- std::memcpy(reinterpret_cast(mapped_nso + ptm::EmcOffsetStart[i] + ptm::OffsetInterval * j), &PtmEmcClk1600, sizeof(PtmEmcClk1600));
- std::memcpy(reinterpret_cast(mapped_nso + ptm::EmcOffsetStart[i] + ptm::OffsetInterval * j + 0x4), &PtmEmcClk1600, sizeof(PtmEmcClk1600));
- }
- for (u32 j = 6; j < 10; j++) {
- std::memcpy(reinterpret_cast(mapped_nso + ptm::EmcOffsetStart[i] + ptm::OffsetInterval * j), &PtmEmcClk1331, sizeof(PtmEmcClk1331));
- std::memcpy(reinterpret_cast(mapped_nso + ptm::EmcOffsetStart[i] + ptm::OffsetInterval * j + 0x4), &PtmEmcClk1331, sizeof(PtmEmcClk1331));
- }
- for (u32 j = 10; j < 16; j+=2) {
- std::memcpy(reinterpret_cast(mapped_nso + ptm::EmcOffsetStart[i] + ptm::OffsetInterval * j), &PtmEmcClk1600, sizeof(PtmEmcClk1600));
- std::memcpy(reinterpret_cast(mapped_nso + ptm::EmcOffsetStart[i] + ptm::OffsetInterval * j + 0x4), &PtmEmcClk1600, sizeof(PtmEmcClk1600));
- std::memcpy(reinterpret_cast(mapped_nso + ptm::EmcOffsetStart[i] + ptm::OffsetInterval * (j+1)), &PtmEmcClk1331, sizeof(PtmEmcClk1331));
- std::memcpy(reinterpret_cast(mapped_nso + ptm::EmcOffsetStart[i] + ptm::OffsetInterval * (j+1) + 0x4), &PtmEmcClk1331, sizeof(PtmEmcClk1331));
- }
- // for (u32 j = 0; j < 2; j++) {
- // std::memcpy(reinterpret_cast(mapped_nso + ptm::EmcOffsetStart[i] + ptm::CpuBoostOffset + ptm::OffsetInterval * j), &CpuBoostClock, sizeof(CpuBoostClock));
- // std::memcpy(reinterpret_cast(mapped_nso + ptm::EmcOffsetStart[i] + ptm::CpuBoostOffset + ptm::OffsetInterval * j + 0x4), &CpuBoostClock, sizeof(CpuBoostClock));
- // }
- }
- }
- }
- }
-
-}
\ No newline at end of file
diff --git a/Source/Atmosphere/stratosphere/loader/source/ldr_process_creation.cpp b/Source/Atmosphere/stratosphere/loader/source/ldr_process_creation.cpp
new file mode 100644
index 00000000..1c60f8c9
--- /dev/null
+++ b/Source/Atmosphere/stratosphere/loader/source/ldr_process_creation.cpp
@@ -0,0 +1,783 @@
+/*
+ * Copyright (c) Atmosphère-NX
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see .
+ */
+#include
+#include "ldr_auto_close.hpp"
+#include "ldr_capabilities.hpp"
+#include "ldr_content_management.hpp"
+#include "ldr_development_manager.hpp"
+#include "ldr_launch_record.hpp"
+#include "ldr_meta.hpp"
+#include "ldr_patcher.hpp"
+#include "ldr_process_creation.hpp"
+#include "ldr_ro_manager.hpp"
+#include "ldr_oc_patch.hpp"
+
+namespace ams::ldr {
+
+ namespace {
+
+ /* Convenience defines. */
+ constexpr size_t SystemResourceSizeMax = 0x1FE00000;
+
+ /* Types. */
+ enum NsoIndex {
+ Nso_Rtld = 0,
+ Nso_Main = 1,
+ Nso_SubSdk0 = 2,
+ Nso_SubSdk1 = 3,
+ Nso_SubSdk2 = 4,
+ Nso_SubSdk3 = 5,
+ Nso_SubSdk4 = 6,
+ Nso_SubSdk5 = 7,
+ Nso_SubSdk6 = 8,
+ Nso_SubSdk7 = 9,
+ Nso_SubSdk8 = 10,
+ Nso_SubSdk9 = 11,
+ Nso_Sdk = 12,
+ Nso_Count,
+ };
+
+ constexpr inline const char *NsoPaths[Nso_Count] = {
+ ENCODE_ATMOSPHERE_CODE_PATH("/rtld"),
+ ENCODE_ATMOSPHERE_CODE_PATH("/main"),
+ ENCODE_ATMOSPHERE_CODE_PATH("/subsdk0"),
+ ENCODE_ATMOSPHERE_CODE_PATH("/subsdk1"),
+ ENCODE_ATMOSPHERE_CODE_PATH("/subsdk2"),
+ ENCODE_ATMOSPHERE_CODE_PATH("/subsdk3"),
+ ENCODE_ATMOSPHERE_CODE_PATH("/subsdk4"),
+ ENCODE_ATMOSPHERE_CODE_PATH("/subsdk5"),
+ ENCODE_ATMOSPHERE_CODE_PATH("/subsdk6"),
+ ENCODE_ATMOSPHERE_CODE_PATH("/subsdk7"),
+ ENCODE_ATMOSPHERE_CODE_PATH("/subsdk8"),
+ ENCODE_ATMOSPHERE_CODE_PATH("/subsdk9"),
+ ENCODE_ATMOSPHERE_CODE_PATH("/sdk"),
+ };
+
+ constexpr const char *GetNsoPath(size_t idx) {
+ AMS_ABORT_UNLESS(idx < Nso_Count);
+ return NsoPaths[idx];
+ }
+
+ struct ProcessInfo {
+ os::NativeHandle process_handle;
+ uintptr_t args_address;
+ size_t args_size;
+ uintptr_t nso_address[Nso_Count];
+ size_t nso_size[Nso_Count];
+ };
+
+ /* Global NSO header cache. */
+ bool g_has_nso[Nso_Count];
+ NsoHeader g_nso_headers[Nso_Count];
+
+ /* Pcv/Ptm check cache. */
+ bool g_is_pcv;
+ bool g_is_ptm;
+
+ /* Anti-downgrade. */
+ #include "ldr_anti_downgrade_tables.inc"
+
+ Result ValidateProgramVersion(ncm::ProgramId program_id, u32 version) {
+ /* No version verification is done before 8.1.0. */
+ R_SUCCEED_IF(hos::GetVersion() < hos::Version_8_1_0);
+
+ /* No verification is done if development. */
+ R_SUCCEED_IF(IsDevelopmentForAntiDowngradeCheck());
+
+ /* Do version-dependent validation, if compiled to do so. */
+#ifdef LDR_VALIDATE_PROCESS_VERSION
+ const MinimumProgramVersion *entries = nullptr;
+ size_t num_entries = 0;
+
+ const auto hos_version = hos::GetVersion();
+ if (hos_version >= hos::Version_11_0_0) {
+ entries = g_MinimumProgramVersions1100;
+ num_entries = g_MinimumProgramVersionsCount1100;
+ } else if (hos_version >= hos::Version_10_1_0) {
+ entries = g_MinimumProgramVersions1010;
+ num_entries = g_MinimumProgramVersionsCount1010;
+ } else if (hos_version >= hos::Version_10_0_0) {
+ entries = g_MinimumProgramVersions1000;
+ num_entries = g_MinimumProgramVersionsCount1000;
+ } else if (hos_version >= hos::Version_9_1_0) {
+ entries = g_MinimumProgramVersions910;
+ num_entries = g_MinimumProgramVersionsCount910;
+ } else if (hos_version >= hos::Version_9_0_0) {
+ entries = g_MinimumProgramVersions900;
+ num_entries = g_MinimumProgramVersionsCount900;
+ } else if (hos_version >= hos::Version_8_1_0) {
+ entries = g_MinimumProgramVersions810;
+ num_entries = g_MinimumProgramVersionsCount810;
+ }
+
+ for (size_t i = 0; i < num_entries; i++) {
+ if (entries[i].program_id == program_id) {
+ R_UNLESS(entries[i].version <= version, ldr::ResultInvalidVersion());
+ }
+ }
+#else
+ AMS_UNUSED(program_id, version);
+#endif
+ return ResultSuccess();
+ }
+
+ /* Helpers. */
+ Result GetProgramInfoFromMeta(ProgramInfo *out, const Meta *meta) {
+ /* Copy basic info. */
+ out->main_thread_priority = meta->npdm->main_thread_priority;
+ out->default_cpu_id = meta->npdm->default_cpu_id;
+ out->main_thread_stack_size = meta->npdm->main_thread_stack_size;
+ out->program_id = meta->aci->program_id;
+
+ /* Copy access controls. */
+ size_t offset = 0;
+#define COPY_ACCESS_CONTROL(source, which) \
+ ({ \
+ const size_t size = meta->source->which##_size; \
+ R_UNLESS(offset + size <= sizeof(out->ac_buffer), ldr::ResultInternalError()); \
+ out->source##_##which##_size = size; \
+ std::memcpy(out->ac_buffer + offset, meta->source##_##which, size); \
+ offset += size; \
+ })
+
+ /* Copy all access controls to buffer. */
+ COPY_ACCESS_CONTROL(acid, sac);
+ COPY_ACCESS_CONTROL(aci, sac);
+ COPY_ACCESS_CONTROL(acid, fac);
+ COPY_ACCESS_CONTROL(aci, fah);
+#undef COPY_ACCESS_CONTROL
+
+ /* Copy flags. */
+ out->flags = MakeProgramInfoFlag(static_cast(meta->aci_kac), meta->aci->kac_size / sizeof(util::BitPack32));
+ return ResultSuccess();
+ }
+
+ bool IsApplet(const Meta *meta) {
+ return (MakeProgramInfoFlag(static_cast(meta->aci_kac), meta->aci->kac_size / sizeof(util::BitPack32)) & ProgramInfoFlag_ApplicationTypeMask) == ProgramInfoFlag_Applet;
+ }
+
+ bool IsApplication(const Meta *meta) {
+ return (MakeProgramInfoFlag(static_cast(meta->aci_kac), meta->aci->kac_size / sizeof(util::BitPack32)) & ProgramInfoFlag_ApplicationTypeMask) == ProgramInfoFlag_Application;
+ }
+
+ Npdm::AddressSpaceType GetAddressSpaceType(const Meta *meta) {
+ return static_cast((meta->npdm->flags & Npdm::MetaFlag_AddressSpaceTypeMask) >> Npdm::MetaFlag_AddressSpaceTypeShift);
+ }
+
+ Acid::PoolPartition GetPoolPartition(const Meta *meta) {
+ return static_cast((meta->acid->flags & Acid::AcidFlag_PoolPartitionMask) >> Acid::AcidFlag_PoolPartitionShift);
+ }
+
+ Result LoadNsoHeaders(NsoHeader *nso_headers, bool *has_nso) {
+ /* Clear NSOs. */
+ std::memset(nso_headers, 0, sizeof(*nso_headers) * Nso_Count);
+ std::memset(has_nso, 0, sizeof(*has_nso) * Nso_Count);
+
+ for (size_t i = 0; i < Nso_Count; i++) {
+ fs::FileHandle file;
+ if (R_SUCCEEDED(fs::OpenFile(std::addressof(file), GetNsoPath(i), fs::OpenMode_Read))) {
+ ON_SCOPE_EXIT { fs::CloseFile(file); };
+
+ /* Read NSO header. */
+ size_t read_size;
+ R_TRY(fs::ReadFile(std::addressof(read_size), file, 0, nso_headers + i, sizeof(*nso_headers)));
+ R_UNLESS(read_size == sizeof(*nso_headers), ldr::ResultInvalidNso());
+
+ has_nso[i] = true;
+ }
+ }
+
+ return ResultSuccess();
+ }
+
+ Result ValidateNsoHeaders(const NsoHeader *nso_headers, const bool *has_nso) {
+ /* We must always have a main. */
+ R_UNLESS(has_nso[Nso_Main], ldr::ResultInvalidNso());
+
+ /* If we don't have an RTLD, we must only have a main. */
+ if (!has_nso[Nso_Rtld]) {
+ for (size_t i = Nso_Main + 1; i < Nso_Count; i++) {
+ R_UNLESS(!has_nso[i], ldr::ResultInvalidNso());
+ }
+ }
+
+ /* All NSOs must have zero text offset. */
+ for (size_t i = 0; i < Nso_Count; i++) {
+ R_UNLESS(nso_headers[i].text_dst_offset == 0, ldr::ResultInvalidNso());
+ }
+
+ return ResultSuccess();
+ }
+
+ Result ValidateMeta(const Meta *meta, const ncm::ProgramLocation &loc, const fs::CodeVerificationData &code_verification_data) {
+ /* Validate version. */
+ R_TRY(ValidateProgramVersion(loc.program_id, meta->npdm->version));
+
+ /* Validate program id. */
+ R_UNLESS(meta->aci->program_id >= meta->acid->program_id_min, ldr::ResultInvalidProgramId());
+ R_UNLESS(meta->aci->program_id <= meta->acid->program_id_max, ldr::ResultInvalidProgramId());
+
+ /* Check if nca is pcv or ptm */
+ g_is_pcv = meta->aci->program_id == ncm::SystemProgramId::Pcv;
+ g_is_ptm = meta->aci->program_id == ncm::SystemProgramId::Ptm;
+
+ /* Validate the kernel capabilities. */
+ R_TRY(TestCapability(static_cast(meta->acid_kac), meta->acid->kac_size / sizeof(util::BitPack32), static_cast(meta->aci_kac), meta->aci->kac_size / sizeof(util::BitPack32)));
+
+ /* If we have data to validate, validate it. */
+ if (code_verification_data.has_data && meta->check_verification_data) {
+ const u8 *sig = code_verification_data.signature;
+ const size_t sig_size = sizeof(code_verification_data.signature);
+ const u8 *mod = static_cast(meta->modulus);
+ const size_t mod_size = crypto::Rsa2048PssSha256Verifier::ModulusSize;
+ const u8 *exp = fssystem::GetAcidSignatureKeyPublicExponent();
+ const size_t exp_size = fssystem::AcidSignatureKeyPublicExponentSize;
+ const u8 *hsh = code_verification_data.target_hash;
+ const size_t hsh_size = sizeof(code_verification_data.target_hash);
+ const bool is_signature_valid = crypto::VerifyRsa2048PssSha256WithHash(sig, sig_size, mod, mod_size, exp, exp_size, hsh, hsh_size);
+
+ R_UNLESS(is_signature_valid, ldr::ResultInvalidNcaSignature());
+ }
+
+ /* All good. */
+ return ResultSuccess();
+ }
+
+ Result GetCreateProcessFlags(u32 *out, const Meta *meta, const u32 ldr_flags) {
+ const u8 meta_flags = meta->npdm->flags;
+
+ u32 flags = 0;
+
+ /* Set Is64Bit. */
+ if (meta_flags & Npdm::MetaFlag_Is64Bit) {
+ flags |= svc::CreateProcessFlag_Is64Bit;
+ }
+
+ /* Set AddressSpaceType. */
+ switch (GetAddressSpaceType(meta)) {
+ case Npdm::AddressSpaceType_32Bit:
+ flags |= svc::CreateProcessFlag_AddressSpace32Bit;
+ break;
+ case Npdm::AddressSpaceType_64BitDeprecated:
+ flags |= svc::CreateProcessFlag_AddressSpace64BitDeprecated;
+ break;
+ case Npdm::AddressSpaceType_32BitWithoutAlias:
+ flags |= svc::CreateProcessFlag_AddressSpace32BitWithoutAlias;
+ break;
+ case Npdm::AddressSpaceType_64Bit:
+ flags |= svc::CreateProcessFlag_AddressSpace64Bit;
+ break;
+ default:
+ return ldr::ResultInvalidMeta();
+ }
+
+ /* Set Enable Debug. */
+ if (ldr_flags & CreateProcessFlag_EnableDebug) {
+ flags |= svc::CreateProcessFlag_EnableDebug;
+ }
+
+ /* Set Enable ASLR. */
+ if (!(ldr_flags & CreateProcessFlag_DisableAslr)) {
+ flags |= svc::CreateProcessFlag_EnableAslr;
+ }
+
+ /* Set Is Application. */
+ if (IsApplication(meta)) {
+ flags |= svc::CreateProcessFlag_IsApplication;
+
+ /* 7.0.0+: Set OptimizeMemoryAllocation if relevant. */
+ if (hos::GetVersion() >= hos::Version_7_0_0) {
+ if (meta_flags & Npdm::MetaFlag_OptimizeMemoryAllocation) {
+ flags |= svc::CreateProcessFlag_OptimizeMemoryAllocation;
+ }
+ }
+ }
+
+ /* 5.0.0+ Set Pool Partition. */
+ if (hos::GetVersion() >= hos::Version_5_0_0) {
+ switch (GetPoolPartition(meta)) {
+ case Acid::PoolPartition_Application:
+ if (IsApplet(meta)) {
+ flags |= svc::CreateProcessFlag_PoolPartitionApplet;
+ } else {
+ flags |= svc::CreateProcessFlag_PoolPartitionApplication;
+ }
+ break;
+ case Acid::PoolPartition_Applet:
+ flags |= svc::CreateProcessFlag_PoolPartitionApplet;
+ break;
+ case Acid::PoolPartition_System:
+ flags |= svc::CreateProcessFlag_PoolPartitionSystem;
+ break;
+ case Acid::PoolPartition_SystemNonSecure:
+ flags |= svc::CreateProcessFlag_PoolPartitionSystemNonSecure;
+ break;
+ default:
+ return ldr::ResultInvalidMeta();
+ }
+ } else if (hos::GetVersion() >= hos::Version_4_0_0) {
+ /* On 4.0.0+, the corresponding bit was simply "UseSecureMemory". */
+ if (meta->acid->flags & Acid::AcidFlag_DeprecatedUseSecureMemory) {
+ flags |= svc::CreateProcessFlag_DeprecatedUseSecureMemory;
+ }
+ }
+
+ /* 11.0.0+/meso Set Disable DAS merge. */
+ if (meta_flags & Npdm::MetaFlag_DisableDeviceAddressSpaceMerge) {
+ flags |= svc::CreateProcessFlag_DisableDeviceAddressSpaceMerge;
+ }
+
+ *out = flags;
+ return ResultSuccess();
+ }
+
+ Result GetCreateProcessParameter(svc::CreateProcessParameter *out, const Meta *meta, u32 flags, os::NativeHandle resource_limit) {
+ /* Clear output. */
+ std::memset(out, 0, sizeof(*out));
+
+ /* Set name, version, program id, resource limit handle. */
+ std::memcpy(out->name, meta->npdm->program_name, sizeof(out->name) - 1);
+ out->version = meta->npdm->version;
+ out->program_id = meta->aci->program_id.value;
+ out->reslimit = resource_limit;
+
+ /* Set flags. */
+ R_TRY(GetCreateProcessFlags(std::addressof(out->flags), meta, flags));
+
+ /* 3.0.0+ System Resource Size. */
+ if (hos::GetVersion() >= hos::Version_3_0_0) {
+ /* Validate size is aligned. */
+ R_UNLESS(util::IsAligned(meta->npdm->system_resource_size, os::MemoryBlockUnitSize), ldr::ResultInvalidSize());
+
+ /* Validate system resource usage. */
+ if (meta->npdm->system_resource_size) {
+ /* Process must be 64-bit. */
+ R_UNLESS((out->flags & svc::CreateProcessFlag_AddressSpace64Bit), ldr::ResultInvalidMeta());
+
+ /* Process must be application or applet. */
+ R_UNLESS(IsApplication(meta) || IsApplet(meta), ldr::ResultInvalidMeta());
+
+ /* Size must be less than or equal to max. */
+ R_UNLESS(meta->npdm->system_resource_size <= SystemResourceSizeMax, ldr::ResultInvalidMeta());
+ }
+ out->system_resource_num_pages = meta->npdm->system_resource_size >> 12;
+ }
+
+ return ResultSuccess();
+ }
+
+ ALWAYS_INLINE u64 GetCurrentProcessInfo(svc::InfoType info_type) {
+ u64 value;
+ R_ABORT_UNLESS(svc::GetInfo(std::addressof(value), info_type, svc::PseudoHandle::CurrentProcess, 0));
+ return value;
+ }
+
+ Result SearchFreeRegion(uintptr_t *out, size_t mapping_size) {
+ /* Get address space extents. */
+ const uintptr_t heap_start = GetCurrentProcessInfo(svc::InfoType_HeapRegionAddress);
+ const size_t heap_size = GetCurrentProcessInfo(svc::InfoType_HeapRegionSize);
+ const uintptr_t alias_start = GetCurrentProcessInfo(svc::InfoType_AliasRegionAddress);
+ const size_t alias_size = GetCurrentProcessInfo(svc::InfoType_AliasRegionSize);
+ const uintptr_t aslr_start = GetCurrentProcessInfo(svc::InfoType_AslrRegionAddress);
+ const size_t aslr_size = GetCurrentProcessInfo(svc::InfoType_AslrRegionSize);
+
+ /* Iterate upwards to find a free region. */
+ uintptr_t address = aslr_start;
+ while (true) {
+ /* Declare variables for memory querying. */
+ svc::MemoryInfo mem_info;
+ svc::PageInfo page_info;
+
+ /* Check that we're still within bounds. */
+ R_UNLESS(address < address + mapping_size, svc::ResultOutOfMemory());
+
+ /* If we're within the heap region, skip to the end of the heap region. */
+ if (heap_size != 0 && !(address + mapping_size - 1 < heap_start || heap_start + heap_size - 1 < address)) {
+ R_UNLESS(address < heap_start + heap_size, svc::ResultOutOfMemory());
+ address = heap_start + heap_size;
+ continue;
+ }
+
+ /* If we're within the alias region, skip to the end of the alias region. */
+ if (alias_size != 0 && !(address + mapping_size - 1 < alias_start || alias_start + alias_size - 1 < address)) {
+ R_UNLESS(address < alias_start + alias_size, svc::ResultOutOfMemory());
+ address = alias_start + alias_size;
+ continue;
+ }
+
+ /* Get the current memory range. */
+ R_ABORT_UNLESS(svc::QueryMemory(std::addressof(mem_info), std::addressof(page_info), address));
+
+ /* If the memory range is free and big enough, use it. */
+ if (mem_info.state == svc::MemoryState_Free && mapping_size <= ((mem_info.base_address + mem_info.size) - address)) {
+ *out = address;
+ return ResultSuccess();
+ }
+
+ /* Check that we can advance. */
+ R_UNLESS(address < mem_info.base_address + mem_info.size, svc::ResultOutOfMemory());
+ R_UNLESS(mem_info.base_address + mem_info.size - 1 < aslr_start + aslr_size - 1, svc::ResultOutOfMemory());
+
+ /* Advance. */
+ address = mem_info.base_address + mem_info.size;
+ }
+ }
+
+ Result DecideAddressSpaceLayout(ProcessInfo *out, svc::CreateProcessParameter *out_param, const NsoHeader *nso_headers, const bool *has_nso, const ArgumentStore::Entry *argument) {
+ /* Clear output. */
+ out->args_address = 0;
+ out->args_size = 0;
+ std::memset(out->nso_address, 0, sizeof(out->nso_address));
+ std::memset(out->nso_size, 0, sizeof(out->nso_size));
+
+ size_t total_size = 0;
+ bool argument_allocated = false;
+
+ /* Calculate base offsets. */
+ for (size_t i = 0; i < Nso_Count; i++) {
+ if (has_nso[i]) {
+ out->nso_address[i] = total_size;
+ const size_t text_end = nso_headers[i].text_dst_offset + nso_headers[i].text_size;
+ const size_t ro_end = nso_headers[i].ro_dst_offset + nso_headers[i].ro_size;
+ const size_t rw_end = nso_headers[i].rw_dst_offset + nso_headers[i].rw_size + nso_headers[i].bss_size;
+ out->nso_size[i] = text_end;
+ out->nso_size[i] = std::max(out->nso_size[i], ro_end);
+ out->nso_size[i] = std::max(out->nso_size[i], rw_end);
+ out->nso_size[i] = util::AlignUp(out->nso_size[i], os::MemoryPageSize);
+
+ total_size += out->nso_size[i];
+
+ if (!argument_allocated && argument != nullptr) {
+ out->args_address = total_size;
+ out->args_size = util::AlignUp(2 * sizeof(u32) + argument->argument_size * 2 + ArgumentStore::ArgumentBufferSize, os::MemoryPageSize);
+ total_size += out->args_size;
+ argument_allocated = true;
+ }
+ }
+ }
+
+ /* Calculate ASLR. */
+ uintptr_t aslr_start = 0;
+ size_t aslr_size = 0;
+ if (hos::GetVersion() >= hos::Version_2_0_0) {
+ switch (out_param->flags & svc::CreateProcessFlag_AddressSpaceMask) {
+ case svc::CreateProcessFlag_AddressSpace32Bit:
+ case svc::CreateProcessFlag_AddressSpace32BitWithoutAlias:
+ aslr_start = svc::AddressSmallMap32Start;
+ aslr_size = svc::AddressSmallMap32Size;
+ break;
+ case svc::CreateProcessFlag_AddressSpace64BitDeprecated:
+ aslr_start = svc::AddressSmallMap36Start;
+ aslr_size = svc::AddressSmallMap36Size;
+ break;
+ case svc::CreateProcessFlag_AddressSpace64Bit:
+ aslr_start = svc::AddressMap39Start;
+ aslr_size = svc::AddressMap39Size;
+ break;
+ AMS_UNREACHABLE_DEFAULT_CASE();
+ }
+ } else {
+ /* On 1.0.0, only 2 address space types existed. */
+ if (out_param->flags & svc::CreateProcessFlag_AddressSpace64BitDeprecated) {
+ aslr_start = svc::AddressSmallMap36Start;
+ aslr_size = svc::AddressSmallMap36Size;
+ } else {
+ aslr_start = svc::AddressSmallMap32Start;
+ aslr_size = svc::AddressSmallMap32Size;
+ }
+ }
+ R_UNLESS(total_size <= aslr_size, svc::ResultOutOfMemory());
+
+ /* Set Create Process output. */
+ uintptr_t aslr_slide = 0;
+ size_t free_size = (aslr_size - total_size);
+ if (out_param->flags & svc::CreateProcessFlag_EnableAslr) {
+ /* Nintendo uses MT19937 (not os::GenerateRandomBytes), but we'll just use TinyMT for now. */
+ aslr_slide = os::GenerateRandomU64(free_size / os::MemoryBlockUnitSize) * os::MemoryBlockUnitSize;
+ }
+
+ /* Set out. */
+ aslr_start += aslr_slide;
+ for (size_t i = 0; i < Nso_Count; i++) {
+ if (has_nso[i]) {
+ out->nso_address[i] += aslr_start;
+ }
+ }
+ if (out->args_address) {
+ out->args_address += aslr_start;
+ }
+
+ out_param->code_address = aslr_start;
+ out_param->code_num_pages = total_size >> 12;
+
+ return ResultSuccess();
+ }
+
+ Result CreateProcessImpl(ProcessInfo *out, const Meta *meta, const NsoHeader *nso_headers, const bool *has_nso, const ArgumentStore::Entry *argument, u32 flags, os::NativeHandle resource_limit) {
+ /* Get CreateProcessParameter. */
+ svc::CreateProcessParameter param;
+ R_TRY(GetCreateProcessParameter(std::addressof(param), meta, flags, resource_limit));
+
+ /* Decide on an NSO layout. */
+ R_TRY(DecideAddressSpaceLayout(out, std::addressof(param), nso_headers, has_nso, argument));
+
+ /* Actually create process. */
+ svc::Handle process_handle;
+ R_TRY(svc::CreateProcess(std::addressof(process_handle), std::addressof(param), static_cast(meta->aci_kac), meta->aci->kac_size / sizeof(u32)));
+
+ /* Set the output handle. */
+ out->process_handle = process_handle;
+
+ return ResultSuccess();
+ }
+
+ Result LoadNsoSegment(fs::FileHandle file, const NsoHeader::SegmentInfo *segment, size_t file_size, const u8 *file_hash, bool is_compressed, bool check_hash, uintptr_t map_base, uintptr_t map_end) {
+ /* Select read size based on compression. */
+ if (!is_compressed) {
+ file_size = segment->size;
+ }
+
+ /* Validate size. */
+ R_UNLESS(file_size <= segment->size, ldr::ResultInvalidNso());
+ R_UNLESS(segment->size <= std::numeric_limits::max(), ldr::ResultInvalidNso());
+
+ /* Load data from file. */
+ uintptr_t load_address = is_compressed ? map_end - file_size : map_base;
+ size_t read_size;
+ R_TRY(fs::ReadFile(std::addressof(read_size), file, segment->file_offset, reinterpret_cast(load_address), file_size));
+ R_UNLESS(read_size == file_size, ldr::ResultInvalidNso());
+
+ /* Uncompress if necessary. */
+ if (is_compressed) {
+ bool decompressed = (util::DecompressLZ4(reinterpret_cast(map_base), segment->size, reinterpret_cast(load_address), file_size) == static_cast(segment->size));
+ R_UNLESS(decompressed, ldr::ResultInvalidNso());
+ }
+
+ /* Check hash if necessary. */
+ if (check_hash) {
+ u8 hash[crypto::Sha256Generator::HashSize];
+ crypto::GenerateSha256Hash(hash, sizeof(hash), reinterpret_cast(map_base), segment->size);
+
+ R_UNLESS(std::memcmp(hash, file_hash, sizeof(hash)) == 0, ldr::ResultInvalidNso());
+ }
+
+ return ResultSuccess();
+ }
+
+ Result LoadAutoLoadModule(os::NativeHandle process_handle, fs::FileHandle file, uintptr_t map_address, const NsoHeader *nso_header, uintptr_t nso_address, size_t nso_size) {
+ /* Map and read data from file. */
+ {
+ AutoCloseMap map(map_address, process_handle, nso_address, nso_size);
+ R_TRY(map.GetResult());
+
+ /* Load NSO segments. */
+ R_TRY(LoadNsoSegment(file, std::addressof(nso_header->segments[NsoHeader::Segment_Text]), nso_header->text_compressed_size, nso_header->text_hash, (nso_header->flags & NsoHeader::Flag_CompressedText) != 0,
+ (nso_header->flags & NsoHeader::Flag_CheckHashText) != 0, map_address + nso_header->text_dst_offset, map_address + nso_size));
+ R_TRY(LoadNsoSegment(file, std::addressof(nso_header->segments[NsoHeader::Segment_Ro]), nso_header->ro_compressed_size, nso_header->ro_hash, (nso_header->flags & NsoHeader::Flag_CompressedRo) != 0,
+ (nso_header->flags & NsoHeader::Flag_CheckHashRo) != 0, map_address + nso_header->ro_dst_offset, map_address + nso_size));
+ R_TRY(LoadNsoSegment(file, std::addressof(nso_header->segments[NsoHeader::Segment_Rw]), nso_header->rw_compressed_size, nso_header->rw_hash, (nso_header->flags & NsoHeader::Flag_CompressedRw) != 0,
+ (nso_header->flags & NsoHeader::Flag_CheckHashRw) != 0, map_address + nso_header->rw_dst_offset, map_address + nso_size));
+
+ /* Clear unused space to zero. */
+ const size_t text_end = nso_header->text_dst_offset + nso_header->text_size;
+ const size_t ro_end = nso_header->ro_dst_offset + nso_header->ro_size;
+ const size_t rw_end = nso_header->rw_dst_offset + nso_header->rw_size;
+ std::memset(reinterpret_cast(map_address), 0, nso_header->text_dst_offset);
+ std::memset(reinterpret_cast(map_address + text_end), 0, nso_header->ro_dst_offset - text_end);
+ std::memset(reinterpret_cast(map_address + ro_end), 0, nso_header->rw_dst_offset - ro_end);
+ std::memset(reinterpret_cast(map_address + rw_end), 0, nso_header->bss_size);
+
+ /* Apply embedded patches. */
+ ApplyEmbeddedPatchesToModule(nso_header->module_id, map_address, nso_size);
+
+ /* Apply IPS patches. */
+ LocateAndApplyIpsPatchesToModule(nso_header->module_id, map_address, nso_size);
+
+ /* Apply pcv and ptm patches. */
+ if (g_is_pcv) {
+ pcv::ApplyAutoPcvPatch(map_address, nso_size);
+ }
+ if (g_is_ptm) {
+ ptm::ApplyAutoPtmPatch(map_address, nso_size);
+ }
+ }
+
+ /* Set permissions. */
+ const size_t text_size = util::AlignUp(nso_header->text_size, os::MemoryPageSize);
+ const size_t ro_size = util::AlignUp(nso_header->ro_size, os::MemoryPageSize);
+ const size_t rw_size = util::AlignUp(nso_header->rw_size + nso_header->bss_size, os::MemoryPageSize);
+ if (text_size) {
+ R_TRY(svc::SetProcessMemoryPermission(process_handle, nso_address + nso_header->text_dst_offset, text_size, svc::MemoryPermission_ReadExecute));
+ }
+ if (ro_size) {
+ R_TRY(svc::SetProcessMemoryPermission(process_handle, nso_address + nso_header->ro_dst_offset, ro_size, svc::MemoryPermission_Read));
+ }
+ if (rw_size) {
+ R_TRY(svc::SetProcessMemoryPermission(process_handle, nso_address + nso_header->rw_dst_offset, rw_size, svc::MemoryPermission_ReadWrite));
+ }
+
+ return ResultSuccess();
+ }
+
+ Result LoadAutoLoadModules(const ProcessInfo *process_info, const NsoHeader *nso_headers, const bool *has_nso, const ArgumentStore::Entry *argument) {
+ /* Load each NSO. */
+ for (size_t i = 0; i < Nso_Count; i++) {
+ if (has_nso[i]) {
+ fs::FileHandle file;
+ R_TRY(fs::OpenFile(std::addressof(file), GetNsoPath(i), fs::OpenMode_Read));
+ ON_SCOPE_EXIT { fs::CloseFile(file); };
+
+ uintptr_t map_address;
+ R_TRY(SearchFreeRegion(std::addressof(map_address), process_info->nso_size[i]));
+
+ R_TRY(LoadAutoLoadModule(process_info->process_handle, file, map_address, nso_headers + i, process_info->nso_address[i], process_info->nso_size[i]));
+ }
+ }
+
+ /* Load arguments, if present. */
+ if (argument != nullptr) {
+ /* Write argument data into memory. */
+ {
+ uintptr_t map_address;
+ R_TRY(SearchFreeRegion(std::addressof(map_address), process_info->args_size));
+
+ AutoCloseMap map(map_address, process_info->process_handle, process_info->args_address, process_info->args_size);
+ R_TRY(map.GetResult());
+
+ ProgramArguments *args = reinterpret_cast(map_address);
+ std::memset(args, 0, sizeof(*args));
+ args->allocated_size = process_info->args_size;
+ args->arguments_size = argument->argument_size;
+ std::memcpy(args->arguments, argument->argument, argument->argument_size);
+ }
+
+ /* Set argument region permissions. */
+ R_TRY(svc::SetProcessMemoryPermission(process_info->process_handle, process_info->args_address, process_info->args_size, svc::MemoryPermission_ReadWrite));
+ }
+
+ return ResultSuccess();
+ }
+
+ }
+
+ /* Process Creation API. */
+ Result CreateProcess(os::NativeHandle *out, PinId pin_id, const ncm::ProgramLocation &loc, const cfg::OverrideStatus &override_status, const char *path, const ArgumentStore::Entry *argument, u32 flags, os::NativeHandle resource_limit) {
+ /* Mount code. */
+ AMS_UNUSED(path);
+ ScopedCodeMount mount(loc, override_status);
+ R_TRY(mount.GetResult());
+
+ /* Load meta, possibly from cache. */
+ Meta meta;
+ R_TRY(LoadMetaFromCache(std::addressof(meta), loc, override_status));
+
+ /* Validate meta. */
+ R_TRY(ValidateMeta(std::addressof(meta), loc, mount.GetCodeVerificationData()));
+
+ /* Load, validate NSOs. */
+ R_TRY(LoadNsoHeaders(g_nso_headers, g_has_nso));
+ R_TRY(ValidateNsoHeaders(g_nso_headers, g_has_nso));
+
+ /* Actually create process. */
+ ProcessInfo info;
+ R_TRY(CreateProcessImpl(std::addressof(info), std::addressof(meta), g_nso_headers, g_has_nso, argument, flags, resource_limit));
+
+ /* Load NSOs into process memory. */
+ {
+ /* Ensure we close the process handle, if we fail. */
+ auto process_guard = SCOPE_GUARD { os::CloseNativeHandle(info.process_handle); };
+
+ /* Load all NSOs. */
+ R_TRY(LoadAutoLoadModules(std::addressof(info), g_nso_headers, g_has_nso, argument));
+
+ /* We don't need to close the process handle, since we succeeded. */
+ process_guard.Cancel();
+ }
+
+ /* Register NSOs with the RoManager. */
+ {
+ /* Nintendo doesn't validate this get, but we do. */
+ os::ProcessId process_id = os::GetProcessId(info.process_handle);
+
+ /* Register new process. */
+ const auto as_type = GetAddressSpaceType(std::addressof(meta));
+ RoManager::GetInstance().RegisterProcess(pin_id, process_id, meta.aci->program_id, as_type == Npdm::AddressSpaceType_64Bit || as_type == Npdm::AddressSpaceType_64BitDeprecated);
+
+ /* Register all NSOs. */
+ for (size_t i = 0; i < Nso_Count; i++) {
+ if (g_has_nso[i]) {
+ RoManager::GetInstance().AddNso(pin_id, g_nso_headers[i].module_id, info.nso_address[i], info.nso_size[i]);
+ }
+ }
+ }
+
+ /* If we're overriding for HBL, perform HTML document redirection. */
+ if (override_status.IsHbl()) {
+ /* Don't validate result, failure is okay. */
+ RedirectHtmlDocumentPathForHbl(loc);
+ }
+
+ /* Clear the external code for the program. */
+ fssystem::DestroyExternalCode(loc.program_id);
+
+ /* Note that we've created the program. */
+ SetLaunchedBootProgram(loc.program_id);
+
+ /* Move the process handle to output. */
+ *out = info.process_handle;
+
+ return ResultSuccess();
+ }
+
+ Result GetProgramInfo(ProgramInfo *out, cfg::OverrideStatus *out_status, const ncm::ProgramLocation &loc, const char *path) {
+ Meta meta;
+
+ /* Load Meta. */
+ {
+ AMS_UNUSED(path);
+
+ ScopedCodeMount mount(loc);
+ R_TRY(mount.GetResult());
+ R_TRY(LoadMeta(std::addressof(meta), loc, mount.GetOverrideStatus()));
+ if (out_status != nullptr) {
+ *out_status = mount.GetOverrideStatus();
+ }
+ }
+
+ return GetProgramInfoFromMeta(out, std::addressof(meta));
+ }
+
+ Result PinProgram(PinId *out_id, const ncm::ProgramLocation &loc, const cfg::OverrideStatus &override_status) {
+ R_UNLESS(RoManager::GetInstance().Allocate(out_id, loc, override_status), ldr::ResultMaxProcess());
+ return ResultSuccess();
+ }
+
+ Result UnpinProgram(PinId id) {
+ R_UNLESS(RoManager::GetInstance().Free(id), ldr::ResultNotPinned());
+ return ResultSuccess();
+ }
+
+ Result GetProcessModuleInfo(u32 *out_count, ldr::ModuleInfo *out, size_t max_out_count, os::ProcessId process_id) {
+ R_UNLESS(RoManager::GetInstance().GetProcessModuleInfo(out_count, out, max_out_count, process_id), ldr::ResultNotPinned());
+ return ResultSuccess();
+ }
+
+ Result GetProgramLocationAndOverrideStatusFromPinId(ncm::ProgramLocation *out, cfg::OverrideStatus *out_status, PinId pin_id) {
+ R_UNLESS(RoManager::GetInstance().GetProgramLocationAndStatus(out, out_status, pin_id), ldr::ResultNotPinned());
+ return ResultSuccess();
+ }
+
+}