diff --git a/Source/Atmosphere/stratosphere/loader/source/ldr_oc_patch.hpp b/Source/Atmosphere/stratosphere/loader/source/ldr_oc_patch.hpp index 7c097e93..de8f7947 100644 --- a/Source/Atmosphere/stratosphere/loader/source/ldr_oc_patch.hpp +++ b/Source/Atmosphere/stratosphere/loader/source/ldr_oc_patch.hpp @@ -1,800 +1,949 @@ +/* + * Copyright (C) Switch-OC-Suite + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + //#define EXPERIMENTAL -constexpr ro::ModuleId PcvModuleId[] = { - // ParseModuleId("91D61D59D7002378E35584FC0B38C7693A3ABAB5"), //11.0.0 - // ParseModuleId("C503E96550F302E121873136B814A529863D949B"), //12.x - ParseModuleId("2058C97C551571506656AA04EC85E2B1B01B155C"), //13.0.0-13.2.0 -}; +#pragma once +#include -constexpr ro::ModuleId PtmModuleId[] = { - // ParseModuleId("A79706954C6C45568B0FFE610627E2E89D8FB0D4"), //12.x - ParseModuleId("2CA78D4066C1C11317CC2705EBADA9A51D3AC981"), //13.0.0-13.2.0 -}; +namespace ams::ldr { + // RAM(Emc) clockrates: + // 1862400, 1894400, 1932800, 1996800, 2064000, 2099200, 2131200 + // Other values might work as well + // RAM overclock could be UNSTABLE and generate graphical glitches / instabilities / NAND corruption + // 1862400/1996800 has been tested stable for all DRAM chips + constexpr u32 EmcClock = 1996800; -namespace pcv { - typedef struct { - s32 c0 = 0; - s32 c1 = 0; - s32 c2 = 0; - s32 c3 = 0; - s32 c4 = 0; - s32 c5 = 0; - } cvb_coefficients; + // CPU max clockrate: + // >= 2193000 will enable overvolting + constexpr u32 CpuMaxClock = 2397000; - typedef struct { - u64 freq = 0; - cvb_coefficients cvb_dfll_param; - cvb_coefficients cvb_pll_param; // only c0 is reserved - } cpu_freq_cvb_table_t; + // CPU max voltage + constexpr u32 CpuVoltageLimit = 1220; // default max 1120mV + static_assert(CpuVoltageLimit <= 1250); - typedef struct { - u64 freq = 0; - cvb_coefficients cvb_dfll_param; // empty, dfll clock source not selected - cvb_coefficients cvb_pll_param; - } gpu_cvb_pll_table_t; + namespace pcv { + typedef struct { + s32 c0 = 0; + s32 c1 = 0; + s32 c2 = 0; + s32 c3 = 0; + s32 c4 = 0; + s32 c5 = 0; + } cvb_coefficients; - typedef struct { - u64 freq; - s32 volt[4] = {0}; - } emc_dvb_dvfs_table_t; + typedef struct { + u64 freq; + cvb_coefficients cvb_dfll_param; + cvb_coefficients cvb_pll_param; // only c0 is reserved + } cpu_freq_cvb_table_t; - /* CPU */ - constexpr u32 CpuVoltageLimitOffsets[][11] = { - // { 0xE1A8C, 0xE1A98, 0xE1AA4, 0xE1AB0, 0xE1AF8, 0xE1B04, 0xE1B10, 0xE1B1C, 0xE1B28, 0xE1B34, 0xE1F4C }, - // { 0xF08DC, 0xF08E8, 0xF08F4, 0xF0900, 0xF0948, 0xF0954, 0xF0960, 0xF096C, 0xF0978, 0xF0984, 0xF0D9C }, - { 0xF092C, 0xF0938, 0xF0944, 0xF0950, 0xF0998, 0xF09A4, 0xF09B0, 0xF09BC, 0xF09C8, 0xF09D4, 0xF0DEC }, - }; - constexpr u32 NewCpuVoltageLimit = 1220; - static_assert(NewCpuVoltageLimit <= 1300); //1300mV hangs for me + typedef struct { + u64 freq; + cvb_coefficients cvb_dfll_param; // empty, dfll clock source not selected + cvb_coefficients cvb_pll_param; + } gpu_cvb_pll_table_t; - constexpr u32 CpuVoltageOldTableCoeff[][10] = { - // { 0xE2140, 0xE2178, 0xE21B0, 0xE21E8, 0xE2220, 0xE2258, 0xE2290, 0xE22C8, 0xE2300, 0xE2338 }, - // { 0xF0F90, 0xF0FC8, 0xF1000, 0xF1038, 0xF1070, 0xF10A8, 0xF10E0, 0xF1118, 0xF1150, 0xF1188 }, - { 0xF0FE0, 0xF1018, 0xF1050, 0xF1088, 0xF10C0, 0xF10F8, 0xF1130, 0xF1168, 0xF11A0, 0xF11D8 }, - }; - constexpr u32 CpuVoltageScale = 1000; - constexpr u32 NewCpuVoltageScaled = NewCpuVoltageLimit * CpuVoltageScale; + typedef struct { + u64 freq; + s32 volt[4] = {0}; + } emc_dvb_dvfs_table_t; - constexpr u32 CpuTablesFreeSpace[] = { - // 0xE2350, - // 0xF11A0, - 0xF11F0, - }; - constexpr cpu_freq_cvb_table_t NewCpuTables[] = { - // OldCpuTables - // { 204000, { 721589, -12695, 27 }, { 1120000 } }, - // { 306000, { 747134, -14195, 27 }, { 1120000 } }, - // { 408000, { 776324, -15705, 27 }, { 1120000 } }, - // { 510000, { 809160, -17205, 27 }, { 1120000 } }, - // { 612000, { 845641, -18715, 27 }, { 1120000 } }, - // { 714000, { 885768, -20215, 27 }, { 1120000 } }, - // { 816000, { 929540, -21725, 27 }, { 1120000 } }, - // { 918000, { 976958, -23225, 27 }, { 1120000 } }, - // { 1020000, { 1028021, -24725, 27 }, { 1120000 } }, - // { 1122000, { 1082730, -26235, 27 }, { 1120000 } }, - // { 1224000, { 1141084, -27735, 27 }, { 1120000 } }, - // { 1326000, { 1203084, -29245, 27 }, { 1120000 } }, - // { 1428000, { 1268729, -30745, 27 }, { 1120000 } }, - // { 1581000, { 1374032, -33005, 27 }, { 1120000 } }, - // { 1683000, { 1448791, -34505, 27 }, { 1120000 } }, - // { 1785000, { 1527196, -36015, 27 }, { 1120000 } }, - // { 1887000, { 1609246, -37515, 27 }, { 1120000 } }, - // { 1963500, { 1675751, -38635, 27 }, { 1120000 } }, - { 2091000, { 1785520, -40523, 27 }, { NewCpuVoltageScaled } }, - { 2193000, { 1878755, -42027, 27 }, { NewCpuVoltageScaled } }, - { 2295000, { 1975655, -43531, 27 }, { NewCpuVoltageScaled } }, - { 2397000, { 2076220, -45036, 27 }, { NewCpuVoltageScaled } }, - }; - static_assert(sizeof(NewCpuTables) <= sizeof(cpu_freq_cvb_table_t)*14); + /* CPU */ + constexpr u32 NewCpuVoltageScaled = CpuVoltageLimit * 1000; - constexpr u32 MaxCpuClockOffset[] = { - // 0xE2740, - // 0xF1590, - 0xF15E0, - }; - constexpr u32 NewMaxCpuClock = 2397000; + // TODO: correctly derive c0-c1 dfll coefficients + constexpr cpu_freq_cvb_table_t NewCpuTables[] = { + // OldCpuTables + // { 204000, { 721589, -12695, 27 }, { 1120000 } }, + // { 306000, { 747134, -14195, 27 }, { 1120000 } }, + // { 408000, { 776324, -15705, 27 }, { 1120000 } }, + // { 510000, { 809160, -17205, 27 }, { 1120000 } }, + // { 612000, { 845641, -18715, 27 }, { 1120000 } }, + // { 714000, { 885768, -20215, 27 }, { 1120000 } }, + // { 816000, { 929540, -21725, 27 }, { 1120000 } }, + // { 918000, { 976958, -23225, 27 }, { 1120000 } }, + // { 1020000, { 1028021, -24725, 27 }, { 1120000 } }, + // { 1122000, { 1082730, -26235, 27 }, { 1120000 } }, + // { 1224000, { 1141084, -27735, 27 }, { 1120000 } }, + // { 1326000, { 1203084, -29245, 27 }, { 1120000 } }, + // { 1428000, { 1268729, -30745, 27 }, { 1120000 } }, + // { 1581000, { 1374032, -33005, 27 }, { 1120000 } }, + // { 1683000, { 1448791, -34505, 27 }, { 1120000 } }, + // { 1785000, { 1527196, -36015, 27 }, { 1120000 } }, + // { 1887000, { 1609246, -37515, 27 }, { 1120000 } }, + // { 1963500, { 1675751, -38635, 27 }, { 1120000 } }, + { 2091000, { 1785520, -40523, 27 }, { NewCpuVoltageScaled } }, + { 2193000, { 1878755, -42027, 27 }, { NewCpuVoltageScaled } }, + { 2295000, { 1975655, -43531, 27 }, { NewCpuVoltageScaled } }, + { 2397000, { 2076220, -45036, 27 }, { NewCpuVoltageScaled } }, + }; + static_assert(sizeof(NewCpuTables) <= sizeof(cpu_freq_cvb_table_t)*14); - /* GPU */ - // constexpr u32 GpuVoltageLimitOffsets[] = { - // // 0xE3044, - // // 0xF1E94, - // 0xF1EE4, - // }; - // constexpr u32 NewGpuVoltageLimit = 1170; // default max 1050mV + /* GPU */ + constexpr gpu_cvb_pll_table_t NewGpuTables[] = { + // OldGpuTables + // { 76800, {}, { 610000, } }, + // { 153600, {}, { 610000, } }, + // { 230400, {}, { 610000, } }, + // { 307200, {}, { 610000, } }, + // { 460800, {}, { 610000, } }, + // { 537600, {}, { 801688, -10900, -163, 298, -10599, 162 } }, + // { 614400, {}, { 824214, -5743, -452, 238, -6325, 81 } }, + // { 691200, {}, { 848830, -3903, -552, 119, -4030, -2 } }, + // { 768000, {}, { 891575, -4409, -584, 0, -2849, 39 } }, + // { 844800, {}, { 940071, -5367, -602, -60, -63, -93 } }, + // { 921600, {}, { 986765, -6637, -614, -179, 1905, -13 } }, + // { 998400, {}, { 1098475, -13529, -497, -179, 3626, 9 } }, + // { 1075200, {}, { 1163644, -12688, -648, 0, 1077, 40 } }, + // { 1152000, {}, { 1204812, -9908, -830, 0, 1469, 110 } }, + // { 1228800, {}, { 1277303, -11675, -859, 0, 3722, 313 } }, + // { 1267200, {}, { 1335531, -12567, -867, 0, 3681, 559 } }, + { 1305600, {}, { 1374130, -13725, -859, 0, 4442, 576 } }, + }; + static_assert(sizeof(NewGpuTables) <= sizeof(gpu_cvb_pll_table_t)*15); - constexpr u32 GpuTablesFreeSpace[] = { - // 0xE3410, - // 0xF2260, - 0xF22B0, - }; - // No way to correctly derive c0-c5 coefficients, as coefficients >= 1152000 only make sense - constexpr gpu_cvb_pll_table_t NewGpuTables[] = { - // OldGpuTables - // { 537600, {}, { 801688, -10900, -163, 298, -10599, 162 } }, - // { 614400, {}, { 824214, -5743, -452, 238, -6325, 81 } }, - // { 691200, {}, { 848830, -3903, -552, 119, -4030, -2 } }, - // { 768000, {}, { 891575, -4409, -584, 0, -2849, 39 } }, - // { 844800, {}, { 940071, -5367, -602, -60, -63, -93 } }, - // { 921600, {}, { 986765, -6637, -614, -179, 1905, -13 } }, - // { 998400, {}, { 1098475, -13529, -497, -179, 3626, 9 } }, - // { 1075200, {}, { 1163644, -12688, -648, 0, 1077, 40 } }, - // { 1152000, {}, { 1204812, -9908, -830, 0, 1469, 110 } }, - // { 1228800, {}, { 1277303, -11675, -859, 0, 3722, 313 } }, - // { 1267200, {}, { 1335531, -12567, -867, 0, 3681, 559 } }, - { 1305600, {}, { 1374130, -13725, -859, 0, 4442, 576 } }, - }; - static_assert(sizeof(NewGpuTables) <= sizeof(gpu_cvb_pll_table_t)*15); + /* EMC */ - constexpr u32 Reg1MaxGpuOffset[] = { - // 0x2E0AC, - // 0x3F6CC, - 0x3F12C, - }; - constexpr u8 Reg1NewMaxGpuClock[][0xC] = { - /* Original: 1228.8MHz - * - * MOV W13,#0x1000 - * MOVK W13,#0xE,LSL #16 - * ADD X13, X13, #0x4B,LSL#12 - * - * Bump to 1536MHz - * - * MOV W13,#0x7000 - * MOVK W13,#0x17,LSL #16 - * NOP - */ - // { 0x0D, 0x00, 0x8E, 0x52, 0xED, 0x02, 0xA0, 0x72, 0x1F, 0x20, 0x03, 0xD5 }, - // { 0x0B, 0x00, 0x8E, 0x52, 0xEB, 0x02, 0xA0, 0x72, 0x1F, 0x20, 0x03, 0xD5 }, - { 0x0B, 0x00, 0x8E, 0x52, 0xEB, 0x02, 0xA0, 0x72, 0x1F, 0x20, 0x03, 0xD5 }, - }; + // DvbTable is all about frequency scaling along with CPU core voltage, no need to care about this for now. - constexpr u32 Reg2MaxGpuOffset[] = { - // 0x2E110, - // 0x3F730, - 0x3F190, - }; - constexpr u8 Reg2NewMaxGpuClock[][0x8] = { - /* Original: 921.6MHz - * - * MOV W13,#0x1000 - * MOVK W13,#0xE,LSL #16 - * - * Bump to 1536MHz - * - * MOV W13,#0x7000 - * MOVK W13,#0x17,LSL #16 - */ - // { 0x0D, 0x00, 0x8E, 0x52, 0xED, 0x02, 0xA0, 0x72, }, - // { 0x0B, 0x00, 0x8E, 0x52, 0xEB, 0x02, 0xA0, 0x72, }, - { 0x0B, 0x00, 0x8E, 0x52, 0xEB, 0x02, 0xA0, 0x72, }, - }; + // constexpr emc_dvb_dvfs_table_t EmcDvbTable[6] = + // { + // { 204000, { 637, 637, 637, } }, + // { 408000, { 637, 637, 637, } }, + // { 800000, { 637, 637, 637, } }, + // { 1065600, { 637, 637, 637, } }, + // { 1331200, { 650, 637, 637, } }, + // { 1600000, { 675, 650, 637, } }, + // }; - /* EMC */ + // Mariko have 3 mtc tables (204/1331/1600 MHz), only these 3 frequencies could be set. - // DvbTable is all about frequency scaling along with CPU core voltage, no need to care about this for now. + // Mariko mtc tables starting from rev, see mtc_timing_table.hpp for parameters. + // All mariko mtc tables will be patched to simplify the procedure. + #include "mtc_timing_table.hpp" - // constexpr u32 EmcDvbTableOffsets[] = - // { - // 0xFFFFFFFF, - // 0xFFFFFFFF, - // 0xF0628, - // }; - - // constexpr emc_dvb_dvfs_table_t EmcDvbTable[6] = - // { - // { 204000, { 637, 637, 637, } }, - // { 408000, { 637, 637, 637, } }, - // { 800000, { 637, 637, 637, } }, - // { 1065600, { 637, 637, 637, } }, - // { 1331200, { 650, 637, 637, } }, - // { 1600000, { 675, 650, 637, } }, - // }; - - constexpr u32 EmcDvb1331[] = { - 0xF0688, - }; - - // Sourced from 13.x pcv module - // 1st regulator table, 0x142778 - 0x143BB4, if mask = 0b0110101 - // 2nd regulator table, 0x143BB8 - 0x144FF4, if mask = 0b1010011 - - // Access pattern: - // BL 0x6C390 // read mask from 0x195588 (.bss section) and return X0 (address of regulator table) - // MOV W8, #0x120 // offset per entry - // (S)MADD(L) X8, X22, X8, X0 // X8 = X22 * X8 + X0, X22 is regulator entry ID (0x11 for max77812_dram) - // LDR W8, [X8, #0x10] // read maxim regulator identifier - // CMP W8, #3 - // B.EQ ... - - // 1st regulator table: - // 0x143A98 2 #0x0 - // 0x143A9C 0 #0x4 - // 0x143AA0 "max77812_dram" #0x8 - // 0x143AA8 3 #0x10 // maxim regulator identifier ( 1 = max77620, 2 = max77621, 3 = max77812) - // 0x143AAC 0 #0x14 - // 0x143AB0 5000 #0x18 // voltage step - // 0x143AB4 0 #0x1C - // 0x143AB8 250000 #0x20 // min voltage - // 0x143ABC 1525000 #0x24 // max voltage - // 0x143AC0 0 #0x28 // voltage multiplier ( * step ) - // 0x143AC4 600000 #0x2C - - // 0x142898 1 #0x0 - // 0x14289C 0 #0x4 - // 0x1428A0 "max77620_sd1" #0x8 - // 0x1428A8 1 #0x10 // maxim regulator identifier ( 1 = max77620, 2 = max77621, 3 = max77812) - // 0x1428AC 23 #0x14 - // 0x1428B0 12500 #0x18 // voltage step - // 0x1428B4 600000 #0x1C - // 0x1428B8 1125000 #0x20 // min voltage, default Vddq for Erista EMC - // 0x1428BC 1125000 #0x24 // max voltage, default Vddq for Erista EMC - // 0x1428C0 0 #0x28 - // 0x1428C4 0 #0x2C - - // HOS does not seem to change DRAM voltage on Mariko (validate only) - - // void EnableVddMemory() in Atmosphere/libraries/libexosphere/source/pmic/pmic_api.cpp: - // /* On Erista, set Sd1 voltage. */ - // if (soc_type == fuse::SocType_Erista) { - // SetVoltage(Max77620RegisterSd1, 1100); - // } - - // in hekate/bdk/power/max77812.h: - // #define MAX77812_REG_M3_VOUT 0x25 // DRAM on PHASE211. - // 3 outputs (CPU/GPU/DRAM) from max77812. Does PHASE31 mode exist? - // If so, read/query max77812 pmic via i2c for voltage info in hekate and get DRAM reg on PHASE31. - // max77812 document: https://datasheets.maximintegrated.com/en/ds/MAX77812.pdf - - // Mariko have 3 mtc tables (204/1331/1600 MHz), only these 3 frequencies could be set. - constexpr u32 EmcFreqOffsets[][30] = { - // { 0xD7C60, 0xD7C68, 0xD7C70, 0xD7C78, 0xD7C80, 0xD7C88, 0xD7C90, 0xD7C98, 0xD7CA0, 0xD7CA8, 0xE1800, 0xEEFA0, 0xF2478, 0xFE284, 0x10A304, 0x10D7DC, 0x110A40, 0x113CA4, 0x116F08, 0x11A16C, 0x11D3D0, 0x120634, 0x123898, 0x126AFC, 0x129D60, 0x12CFC4, 0x130228, 0x13BFE0, 0x140D00, 0x140D50, }, - // { 0xE1810, 0xE6530, 0xE6580, 0xE6AB0, 0xE6AB8, 0xE6AC0, 0xE6AC8, 0xE6AD0, 0xE6AD8, 0xE6AE0, 0xE6AE8, 0xE6AF0, 0xE6AF8, 0xF0650, 0xFDDF0, 0x1012C8, 0x10D0D4, 0x119154, 0x11C62C, 0x11F890, 0x122AF4, 0x125D58, 0x128FBC, 0x12C220, 0x12F484, 0x1326E8, 0x13594C, 0x138BB0, 0x13BE14, 0x13F078, }, - { 0xE1860, 0xE6580, 0xE65D0, 0xE6B00, 0xE6B08, 0xE6B10, 0xE6B18, 0xE6B20, 0xE6B28, 0xE6B30, 0xE6B38, 0xE6B40, 0xE6B48, 0xF06A0, 0xFDE40, 0x101318, 0x10D124, 0x1191A4, 0x11C67C, 0x11F8E0, 0x122B44, 0x125DA8, 0x12900C, 0x12C270, 0x12F4D4, 0x132738, 0x13599C, 0x138C00, 0x13BE64, 0x13F0C8, }, - }; - - // Mariko mtc tables starting from rev, see mtc_timing_table.hpp for parameters. - // All mariko mtc tables will be patched to simplify the procedure. - constexpr u32 MtcTable_1600[][13] = { - { 0x1012D8, 0x11C63C, 0x11F8A0, 0x122B04, 0x125D68, 0x128FCC, 0x12C230, 0x12F494, 0x1326F8, 0x13595C, 0x138BC0, 0x13BE24, 0x13F088 }, - }; - - constexpr u32 MtcTableOffset = 0x10CC; - - #include "mtc_timing_table.hpp" - - void AdjustMtcTable(MarikoMtcTable* table, MarikoMtcTable* ref) - { - /* Official Tegra X1 TRM, sign up for nvidia developer program (free) to download: - * https://developer.nvidia.com/embedded/dlc/tegra-x1-technical-reference-manual - * Section 18.11: MC Registers - * - * Retail Mariko: 200FBGA 16Gb DDP LPDDR4X SDRAM x 2 - * x16/Ch, 1Ch/die, Double-die, 2Ch, 1CS(rank), 8Gb density per die - * 64Mb x 16DQ x 8banks x 2channels = 2048MB (x32DQ) per package - * - * Devkit Mariko: 200FBGA 32Gb DDP LPDDR4X SDRAM x 2 - * x16/Ch, 1Ch/die, Quad-die, 2Ch, 2CS(rank), 8Gb density per die - * X1+ EMC can R/W to both ranks at the same time, resulting in doubled DQ - * 64Mb x 32DQ x 8banks x 2channels = 4096MB (x64DQ) per package - * - * If you have access to LPDDR4(X) specs or datasheets (from manufacturers or Google), - * you'd better calculate timings yourself rather than relying on following algorithm. - */ - - #define ADJUST_PARAM(TARGET, REF) TARGET = std::ceil(REF + ((GetEmcClock()-1331200)*(TARGET-REF))/(1600000-1331200)); - - #define ADJUST_PARAM_TABLE(TABLE, PARAM, REF) ADJUST_PARAM(TABLE->PARAM, REF->PARAM) - - #define ADJUST_PARAM_ALL_REG(TABLE, PARAM, REF) \ - ADJUST_PARAM_TABLE(TABLE, burst_regs.PARAM, REF) \ - ADJUST_PARAM_TABLE(TABLE, shadow_regs_ca_train.PARAM, REF) \ - ADJUST_PARAM_TABLE(TABLE, shadow_regs_rdwr_train.PARAM, REF) - - #define WRITE_PARAM_ALL_REG(TABLE, PARAM, VALUE)\ - TABLE->burst_regs.PARAM = VALUE; \ - TABLE->shadow_regs_ca_train.PARAM = VALUE; \ - TABLE->shadow_regs_rdwr_train.PARAM = VALUE; - - // tCK_avg (average clock period) in ns (10E-3 ns) - const double tCK_avg = GetEmcClock() == 2131200 ? 0.468 : 1000'000. / GetEmcClock(); - // tRPpb (row precharge time per bank) in ns - const u32 tRPpb = 18; - // tRPab (row precharge time all banks) in ns - const u32 tRPab = 21; - // tRAS (row active time) in ns - const u32 tRAS = 42; - // tRC (ACTIVATE-ACTIVATE command period same bank) in ns - const u32 tRC = tRPpb + tRAS; - // tRFCab (refresh cycle time all banks) in ns for 8Gb density - const u32 tRFCab = 280; - // tRFCpb (refresh cycle time per bank) in ns for 8Gb density - const u32 tRFCpb = 140; - // tRCD (RAS-CAS delay) in ns - const u32 tRCD = 18; - // tRRD (Active bank-A to Active bank-B) in ns - const double tRRD = GetEmcClock() == 2131200 ? 7.5 : 10.; - // tREFpb (average refresh interval per bank) in ns for 8Gb density - const u32 tREFpb = 488; - // tREFab (average refresh interval all 8 banks) in ns for 8Gb density - // const u32 tREFab = tREFpb * 8; - // #_of_rows per die for 8Gb density - const u32 numOfRows = 65536; - // {REFRESH, REFRESH_LO} = max[(tREF/#_of_rows) / (emc_clk_period) - 64, (tREF/#_of_rows) / (emc_clk_period) * 97%] - // emc_clk_period = dram_clk / 2; - // 1600 MHz: 5894, but N' set to 6176 (~4.8% margin) - const u32 REFRESH = std::ceil((double(tREFpb) * GetEmcClock() / numOfRows * (1.048) / 2 - 64)) / 4 * 4; - // tPDEX2WR, tPDEX2RD (timing delay from exiting powerdown mode to a write/read command) in ns - const u32 tPDEX2 = 10; - // [Guessed] tACT2PDEN (timing delay from an activate, MRS or EMRS command to power-down entry) in ns - const u32 tACT2PDEN = 14; - // [Guessed] tPDEX2MRR (timing delay from exiting powerdown mode to MRR command) in ns - const double tPDEX2MRR = 28.75; - // [Guessed] tCKE2PDEN (timing delay from turning off CKE to power-down entry) in ns - const double tCKE2PDEN = 8.5; - // tXSR (SELF REFRESH exit to next valid command delay) in ns - const double tXSR = tRFCab + 7.5; - // tCKE (minimum CKE high pulse width) in ns - const u32 tCKE = 8; - // tCKELPD (minimum CKE low pulse width in SELF REFRESH) in ns - const u32 tCKELPD = 15; - // [Guessed] tPD (minimum CKE low pulse width in power-down mode) in ns - const double tPD = 7.5; - // tFAW (Four-bank Activate Window) in ns - const u32 tFAW = GetEmcClock() == 2131200 ? 30 : 40; - - #define GET_CYCLE_CEIL(PARAM) std::ceil(double(PARAM) / tCK_avg) - - WRITE_PARAM_ALL_REG(table, emc_rc, GET_CYCLE_CEIL(tRC)); - WRITE_PARAM_ALL_REG(table, emc_rfc, GET_CYCLE_CEIL(tRFCab)); - WRITE_PARAM_ALL_REG(table, emc_rfcpb, GET_CYCLE_CEIL(tRFCpb)); - WRITE_PARAM_ALL_REG(table, emc_ras, GET_CYCLE_CEIL(tRAS)); - WRITE_PARAM_ALL_REG(table, emc_rp, GET_CYCLE_CEIL(tRPpb)); - ADJUST_PARAM_ALL_REG(table, emc_r2w, ref); - ADJUST_PARAM_ALL_REG(table, emc_w2r, ref); - ADJUST_PARAM_ALL_REG(table, emc_r2p, ref); - ADJUST_PARAM_ALL_REG(table, emc_w2p, ref); - ADJUST_PARAM_ALL_REG(table, emc_trtm, ref); - ADJUST_PARAM_ALL_REG(table, emc_twtm, ref); - ADJUST_PARAM_ALL_REG(table, emc_tratm, ref); - ADJUST_PARAM_ALL_REG(table, emc_twatm, ref); - WRITE_PARAM_ALL_REG(table, emc_rd_rcd, GET_CYCLE_CEIL(tRCD)); - WRITE_PARAM_ALL_REG(table, emc_wr_rcd, GET_CYCLE_CEIL(tRCD)); - WRITE_PARAM_ALL_REG(table, emc_rrd, GET_CYCLE_CEIL(tRRD)); - WRITE_PARAM_ALL_REG(table, emc_refresh, REFRESH); - WRITE_PARAM_ALL_REG(table, emc_pre_refresh_req_cnt, REFRESH / 4); - WRITE_PARAM_ALL_REG(table, emc_pdex2wr, GET_CYCLE_CEIL(tPDEX2)); - WRITE_PARAM_ALL_REG(table, emc_pdex2rd, GET_CYCLE_CEIL(tPDEX2)); - WRITE_PARAM_ALL_REG(table, emc_act2pden,GET_CYCLE_CEIL(tACT2PDEN)); - ADJUST_PARAM_ALL_REG(table, emc_rw2pden, ref); - WRITE_PARAM_ALL_REG(table, emc_cke2pden,GET_CYCLE_CEIL(tCKE2PDEN)); - WRITE_PARAM_ALL_REG(table, emc_pdex2mrr,GET_CYCLE_CEIL(tPDEX2MRR)); - WRITE_PARAM_ALL_REG(table, emc_txsr, GET_CYCLE_CEIL(tXSR)); - WRITE_PARAM_ALL_REG(table, emc_txsrdll, GET_CYCLE_CEIL(tXSR)); - WRITE_PARAM_ALL_REG(table, emc_tcke, GET_CYCLE_CEIL(tCKE)); - WRITE_PARAM_ALL_REG(table, emc_tckesr, GET_CYCLE_CEIL(tCKELPD)); - WRITE_PARAM_ALL_REG(table, emc_tpd, GET_CYCLE_CEIL(tPD)); - WRITE_PARAM_ALL_REG(table, emc_tfaw, GET_CYCLE_CEIL(tFAW)); - WRITE_PARAM_ALL_REG(table, emc_trpab, GET_CYCLE_CEIL(tRPab)); - ADJUST_PARAM_ALL_REG(table, emc_tclkstop, ref); - WRITE_PARAM_ALL_REG(table, emc_trefbw, REFRESH + 64); - - ADJUST_PARAM_ALL_REG(table, emc_pmacro_dll_cfg_2, ref); // EMC_DLL_CFG_2_0: level select for VDDA? - - // ADJUST_PARAM_TABLE(table, dram_timings.rl); // not used on Mariko - - constexpr u32 DIV = 4; // ? - table->burst_mc_regs.mc_emem_arb_timing_rcd = std::ceil(GET_CYCLE_CEIL(tRCD) / DIV - 2); - table->burst_mc_regs.mc_emem_arb_timing_rp = std::ceil(GET_CYCLE_CEIL(tRPpb) / DIV - 1); - table->burst_mc_regs.mc_emem_arb_timing_rc = std::ceil(std::max(GET_CYCLE_CEIL(tRC), GET_CYCLE_CEIL(tRAS)+GET_CYCLE_CEIL(tRPpb))/ DIV); - table->burst_mc_regs.mc_emem_arb_timing_ras = std::ceil(GET_CYCLE_CEIL(tRAS) / DIV - 2); - table->burst_mc_regs.mc_emem_arb_timing_faw = std::ceil(GET_CYCLE_CEIL(tFAW) / DIV - 1); - table->burst_mc_regs.mc_emem_arb_timing_rrd = std::ceil(GET_CYCLE_CEIL(tRRD) / DIV - 1); - table->burst_mc_regs.mc_emem_arb_timing_rap2pre = std::ceil(table->burst_regs.emc_r2p / DIV); - table->burst_mc_regs.mc_emem_arb_timing_wap2pre = std::ceil(table->burst_regs.emc_w2p / DIV); - table->burst_mc_regs.mc_emem_arb_timing_r2w = std::ceil(table->burst_regs.emc_r2w / DIV + 1); - table->burst_mc_regs.mc_emem_arb_timing_w2r = std::ceil(table->burst_regs.emc_w2r / DIV + 1); - table->burst_mc_regs.mc_emem_arb_timing_rfcpb = std::ceil(GET_CYCLE_CEIL(tRFCpb) / DIV + 1); // ? - - ADJUST_PARAM_TABLE(table, la_scale_regs.mc_mll_mpcorer_ptsa_rate, ref); - ADJUST_PARAM_TABLE(table, la_scale_regs.mc_ptsa_grant_decrement, ref); - - // ADJUST_PARAM_TABLE(table, min_mrs_wait); // not used on LPDDR4X - // ADJUST_PARAM_TABLE(table, latency); // not used - - // Calculate DIVM and DIVN (clock DIVisors) - // Common PLL oscillator is 38.4 MHz - // PLLMB_OUT = 38.4 MHz / PLLLMB_DIVM * PLLMB_DIVN - u32 divm = 1; - u32 divn = GetEmcClock() / 38400; - u32 remainder = GetEmcClock() % 38400; - if (remainder >= 38400 * (3/4)) { - divm = 4; - divn = divn * divm + 3; - } else - if (remainder >= 38400 * (2/3)) { - divm = 3; - divn = divn * divm + 2; - } else - if (remainder >= 38400 * (1/2)) { - divm = 2; - divn = divn * divm + 1; - } else - if (remainder >= 38400 * (1/3)) { - divm = 3; - divn = divn * divm + 1; - } else - if (remainder >= 38400 * (1/4)) { - divm = 4; - divn = divn * divm + 1; - } - - /* Patch PLLMB divisors */ - table->pllmb_divm = divm; - table->pllmb_divn = divn; - - #ifdef EXPERIMENTAL + void AdjustMtcTable(MarikoMtcTable* table, MarikoMtcTable* ref) { - #define ADJUST_PARAM_ROUND2_ALL_REG(TARGET_TABLE, REF_TABLE, PARAM) \ - TARGET_TABLE->burst_regs.PARAM = \ - ((ADJUST_PROP(TARGET_TABLE->burst_regs.PARAM, REF_TABLE->burst_regs.PARAM) + 1) >> 1) << 1; \ - TARGET_TABLE->shadow_regs_ca_train.PARAM = \ - ((ADJUST_PROP(TARGET_TABLE->shadow_regs_ca_train.PARAM, REF_TABLE->shadow_regs_ca_train.PARAM) + 1) >> 1) << 1; \ - TARGET_TABLE->shadow_regs_rdwr_train.PARAM = \ - ((ADJUST_PROP(TARGET_TABLE->shadow_regs_rdwr_train.PARAM, REF_TABLE->shadow_regs_rdwr_train.PARAM) + 1) >> 1) << 1; + /* Official Tegra X1 TRM, sign up for nvidia developer program (free) to download: + * https://developer.nvidia.com/embedded/dlc/tegra-x1-technical-reference-manual + * Section 18.11: MC Registers + * + * Retail Mariko: 200FBGA 16Gb DDP LPDDR4X SDRAM x 2 + * x16/Ch, 1Ch/die, Double-die, 2Ch, 1CS(rank), 8Gb density per die + * 64Mb x 16DQ x 8banks x 2channels = 2048MB (x32DQ) per package + * + * Devkit Mariko: 200FBGA 32Gb DDP LPDDR4X SDRAM x 2 + * x16/Ch, 1Ch/die, Quad-die, 2Ch, 2CS(rank), 8Gb density per die + * X1+ EMC can R/W to both ranks at the same time, resulting in doubled DQ + * 64Mb x 32DQ x 8banks x 2channels = 4096MB (x64DQ) per package + * + * If you have access to LPDDR4(X) specs or datasheets (from manufacturers or Google), + * you'd better calculate timings yourself rather than relying on following algorithm. + */ - #define ADJUST_PARAM(TARGET_PARAM, REF_PARAM) \ - TARGET_PARAM = ADJUST_PROP(TARGET_PARAM, REF_PARAM); + #define ADJUST_PARAM(TARGET, REF) TARGET = std::ceil(REF + ((EmcClock-1331200)*(TARGET-REF))/(1600000-1331200)); - #define ADJUST_PARAM_TABLE(TARGET_TABLE, REF_TABLE, PARAM) \ - ADJUST_PARAM(TARGET_TABLE->PARAM, REF_TABLE->PARAM) + #define ADJUST_PARAM_TABLE(TABLE, PARAM, REF) ADJUST_PARAM(TABLE->PARAM, REF->PARAM) - #define ADJUST_PARAM_ALL_REG(TARGET_TABLE, REF_TABLE, PARAM) \ - ADJUST_PARAM_TABLE(TARGET_TABLE, REF_TABLE, burst_regs.PARAM) \ - ADJUST_PARAM_TABLE(TARGET_TABLE, REF_TABLE, shadow_regs_ca_train.PARAM) \ - ADJUST_PARAM_TABLE(TARGET_TABLE, REF_TABLE, shadow_regs_rdwr_train.PARAM) + #define ADJUST_PARAM_ALL_REG(TABLE, PARAM, REF) \ + ADJUST_PARAM_TABLE(TABLE, burst_regs.PARAM, REF) \ + ADJUST_PARAM_TABLE(TABLE, shadow_regs_ca_train.PARAM, REF) \ + ADJUST_PARAM_TABLE(TABLE, shadow_regs_rdwr_train.PARAM, REF) - #define TRIM_BIT(IN_BITS, HIGH, LOW) \ - ((IN_BITS >> LOW) & ( (1u << (HIGH - LOW + 1u)) - 1u )) + #define WRITE_PARAM_ALL_REG(TABLE, PARAM, VALUE)\ + TABLE->burst_regs.PARAM = VALUE; \ + TABLE->shadow_regs_ca_train.PARAM = VALUE; \ + TABLE->shadow_regs_rdwr_train.PARAM = VALUE; - #define ADJUST_BIT(TARGET_PARAM, REF_PARAM, HIGH, LOW) \ - ADJUST_PROP(TRIM_BIT(TARGET_PARAM, HIGH, LOW), TRIM_BIT(REF_PARAM, HIGH, LOW)) - - #define CLEAR_BIT(BITS, HIGH, LOW) \ - BITS = BITS & ~( ((1u << HIGH) << 1u) - (1u << LOW) ); - - #define ADJUST_BIT_ALL_REG_SINGLE_OP(TARGET_TABLE, REF_TABLE, PARAM, HIGH, LOW, OPERATION) \ - TARGET_TABLE->burst_regs.PARAM = \ - (ADJUST_BIT(TARGET_TABLE->burst_regs.PARAM, REF_TABLE->burst_regs.PARAM, HIGH, LOW) << LOW) OPERATION; \ - TARGET_TABLE->shadow_regs_ca_train.PARAM = \ - (ADJUST_BIT(TARGET_TABLE->shadow_regs_ca_train.PARAM, REF_TABLE->shadow_regs_ca_train.PARAM, HIGH, LOW)) << LOW OPERATION; \ - TARGET_TABLE->shadow_regs_rdwr_train.PARAM = \ - (ADJUST_BIT(TARGET_TABLE->shadow_regs_rdwr_train.PARAM, REF_TABLE->shadow_regs_rdwr_train.PARAM, HIGH, LOW)) << LOW OPERATION; - - #define ADJUST_BIT_ALL_REG_PAIR(TARGET_TABLE, REF_TABLE, PARAM, HIGH1, LOW1, HIGH2, LOW2) \ - TARGET_TABLE->burst_regs.PARAM = \ - ADJUST_BIT(TARGET_TABLE->burst_regs.PARAM, REF_TABLE->burst_regs.PARAM, HIGH1, LOW1) << LOW1 \ - | ADJUST_BIT(TARGET_TABLE->burst_regs.PARAM, REF_TABLE->burst_regs.PARAM, HIGH2, LOW2) << LOW2; \ - TARGET_TABLE->shadow_regs_ca_train.PARAM = \ - ADJUST_BIT(TARGET_TABLE->shadow_regs_ca_train.PARAM, REF_TABLE->shadow_regs_ca_train.PARAM, HIGH1, LOW1) << LOW1 \ - | ADJUST_BIT(TARGET_TABLE->shadow_regs_ca_train.PARAM, REF_TABLE->shadow_regs_ca_train.PARAM, HIGH2, LOW2) << LOW2; \ - TARGET_TABLE->shadow_regs_rdwr_train.PARAM = \ - ADJUST_BIT(TARGET_TABLE->shadow_regs_rdwr_train.PARAM, REF_TABLE->shadow_regs_rdwr_train.PARAM, HIGH1, LOW1) << LOW1 \ - | ADJUST_BIT(TARGET_TABLE->shadow_regs_rdwr_train.PARAM, REF_TABLE->shadow_regs_rdwr_train.PARAM, HIGH2, LOW2) << LOW2; - - /* For latency allowance */ - #define ADJUST_INVERSE(TARGET) ((TARGET*1000) / (GetEmcClock()/1600)) - - /* emc_wdv, emc_wsv, emc_wev, emc_wdv_mask, - emc_quse, emc_quse_width, emc_ibdly, emc_obdly, - emc_einput, emc_einput_duration, emc_qrst, emc_qsafe, - emc_rdv, emc_rdv_mask, emc_rdv_early, emc_rdv_early_mask */ - ADJUST_PARAM_ROUND2_ALL_REG(target_table, ref_table, emc_wdv); - ADJUST_PARAM_ROUND2_ALL_REG(target_table, ref_table, emc_wsv); - ADJUST_PARAM_ROUND2_ALL_REG(target_table, ref_table, emc_wev); - ADJUST_PARAM_ROUND2_ALL_REG(target_table, ref_table, emc_wdv_mask); - - ADJUST_PARAM_ALL_REG(target_table, ref_table, emc_quse); - ADJUST_PARAM_ALL_REG(target_table, ref_table, emc_quse_width); - - ADJUST_BIT_ALL_REG_SINGLE_OP(target_table, ref_table, emc_ibdly, 6,0, | (1 << 28)); - ADJUST_BIT_ALL_REG_SINGLE_OP(target_table, ref_table, emc_obdly, 5,0, | (1 << 28)); - - ADJUST_PARAM_ALL_REG(target_table, ref_table, emc_einput); - ADJUST_PARAM_ALL_REG(target_table, ref_table, emc_einput_duration); - - ADJUST_BIT_ALL_REG_SINGLE_OP(target_table, ref_table, emc_qrst, 6,0, | (6 << 16)); - ADJUST_PARAM_ALL_REG(target_table, ref_table, emc_qsafe); - - ADJUST_PARAM_ALL_REG(target_table, ref_table, emc_rdv); - target_table->burst_regs.emc_rdv_mask = target_table->burst_regs.emc_rdv + 2; - target_table->shadow_regs_ca_train.emc_rdv_mask = target_table->shadow_regs_ca_train.emc_rdv + 2; - target_table->shadow_regs_rdwr_train.emc_rdv_mask = target_table->shadow_regs_rdwr_train.emc_rdv + 2; - - ADJUST_PARAM_ALL_REG(target_table, ref_table, emc_rdv_early); - target_table->burst_regs.emc_rdv_early_mask = target_table->burst_regs.emc_rdv_early + 2; - target_table->shadow_regs_ca_train.emc_rdv_early_mask = target_table->shadow_regs_ca_train.emc_rdv_early + 2; - target_table->shadow_regs_rdwr_train.emc_rdv_early_mask = target_table->shadow_regs_rdwr_train.emc_rdv_early + 2; - - /* emc_pmacro_..., - emc_zcal_wait_cnt, emc_mrs_wait_cnt(2), - emc_pmacro_autocal_cfg_common, emc_dyn_self_ref_control, emc_qpop, emc_pmacro_cmd_pad_tx_ctrl, - emc_tr_timing_0, emc_tr_rdv, emc_tr_qpop, emc_tr_rdv_mask, emc_tr_qsafe, emc_tr_qrst, - emc_training_vref_settle */ - /* DDLL values */ + /* Timings that are available in or can be derived from LPDDR4X datasheet or TRM */ { - #define OFFSET_ALL_REG(PARAM) \ - offsetof(MarikoMtcTable, burst_regs.PARAM), \ - offsetof(MarikoMtcTable, shadow_regs_ca_train.PARAM), \ - offsetof(MarikoMtcTable, shadow_regs_rdwr_train.PARAM) \ + // tCK_avg (average clock period) in ns + const double tCK_avg = (EmcClock == 2131200) ? 0.468 : 1000'000. / EmcClock; + // tRPpb (row precharge time per bank) in ns + const u32 tRPpb = 18; + // tRPab (row precharge time all banks) in ns + const u32 tRPab = 21; + // tRAS (row active time) in ns + const u32 tRAS = 42; + // tRC (ACTIVATE-ACTIVATE command period same bank) in ns + const u32 tRC = tRPpb + tRAS; + // tRFCab (refresh cycle time all banks) in ns for 8Gb density + const u32 tRFCab = 280; + // tRFCpb (refresh cycle time per bank) in ns for 8Gb density + const u32 tRFCpb = 140; + // tRCD (RAS-CAS delay) in ns + const u32 tRCD = 18; + // tRRD (Active bank-A to Active bank-B) in ns + const double tRRD = (EmcClock == 2131200) ? 7.5 : 10.; + // tREFpb (average refresh interval per bank) in ns for 8Gb density + const u32 tREFpb = 488; + // tREFab (average refresh interval all 8 banks) in ns for 8Gb density + // const u32 tREFab = tREFpb * 8; + // #_of_rows per die for 8Gb density + const u32 numOfRows = 65536; + // {REFRESH, REFRESH_LO} = max[(tREF/#_of_rows) / (emc_clk_period) - 64, (tREF/#_of_rows) / (emc_clk_period) * 97%] + // emc_clk_period = dram_clk / 2; + // 1600 MHz: 5894, but N' set to 6176 (~4.8% margin) + const u32 REFRESH = std::ceil((double(tREFpb) * EmcClock / numOfRows * (1.048) / 2 - 64)) / 4 * 4; + // tPDEX2WR, tPDEX2RD (timing delay from exiting powerdown mode to a write/read command) in ns + const u32 tPDEX2 = 10; + // [Guessed] tACT2PDEN (timing delay from an activate, MRS or EMRS command to power-down entry) in ns + const u32 tACT2PDEN = 14; + // [Guessed] tPDEX2MRR (timing delay from exiting powerdown mode to MRR command) in ns + const double tPDEX2MRR = 28.75; + // [Guessed] tCKE2PDEN (timing delay from turning off CKE to power-down entry) in ns + const double tCKE2PDEN = 8.5; + // tXSR (SELF REFRESH exit to next valid command delay) in ns + const double tXSR = tRFCab + 7.5; + // tCKE (minimum CKE high pulse width) in ns + const u32 tCKE = 8; + // tCKELPD (minimum CKE low pulse width in SELF REFRESH) in ns + const u32 tCKELPD = 15; + // [Guessed] tPD (minimum CKE low pulse width in power-down mode) in ns + const double tPD = 7.5; + // tFAW (Four-bank Activate Window) in ns + const u32 tFAW = (EmcClock == 2131200) ? 30 : 40; - /* Section 1: adjust HI bits: BIT 26:16 */ - const uint32_t ddll_high[] = { - OFFSET_ALL_REG(emc_pmacro_ob_ddll_long_dq_rank1_4), - OFFSET_ALL_REG(emc_pmacro_ob_ddll_long_dq_rank1_5), - OFFSET_ALL_REG(emc_pmacro_ob_ddll_long_dqs_rank0_4), - OFFSET_ALL_REG(emc_pmacro_ob_ddll_long_dqs_rank0_5), - OFFSET_ALL_REG(emc_pmacro_ob_ddll_long_dqs_rank1_4), - OFFSET_ALL_REG(emc_pmacro_ob_ddll_long_dqs_rank1_5), - OFFSET_ALL_REG(emc_pmacro_ddll_long_cmd_0), - OFFSET_ALL_REG(emc_pmacro_ddll_long_cmd_1), - OFFSET_ALL_REG(emc_pmacro_ddll_long_cmd_2), - OFFSET_ALL_REG(emc_pmacro_ddll_long_cmd_3), - OFFSET_ALL_REG(emc_pmacro_ddll_long_cmd_4), - offsetof(MarikoMtcTable, trim_regs.emc_pmacro_ob_ddll_long_dq_rank0_0), - offsetof(MarikoMtcTable, trim_regs.emc_pmacro_ob_ddll_long_dq_rank0_1), - offsetof(MarikoMtcTable, trim_regs.emc_pmacro_ob_ddll_long_dq_rank0_2), - offsetof(MarikoMtcTable, trim_regs.emc_pmacro_ob_ddll_long_dq_rank0_3), - offsetof(MarikoMtcTable, trim_regs.emc_pmacro_ob_ddll_long_dq_rank0_4), - offsetof(MarikoMtcTable, trim_regs.emc_pmacro_ob_ddll_long_dq_rank0_5), - offsetof(MarikoMtcTable, trim_regs.emc_pmacro_ob_ddll_long_dq_rank1_0), - offsetof(MarikoMtcTable, trim_regs.emc_pmacro_ob_ddll_long_dq_rank1_1), - offsetof(MarikoMtcTable, trim_regs.emc_pmacro_ob_ddll_long_dq_rank1_2), - offsetof(MarikoMtcTable, trim_regs.emc_pmacro_ob_ddll_long_dq_rank1_3), - }; - for (uint32_t i = 0; i < sizeof(ddll_high)/sizeof(uint32_t); i++) - { - uint32_t *ddll = reinterpret_cast(reinterpret_cast(target_table) + ddll_high[i]); - uint32_t *ddll_ref = reinterpret_cast(reinterpret_cast(ref_table) + ddll_high[i]); - uint16_t adjusted_ddll = ADJUST_BIT(*ddll, *ddll_ref, 26,16) & ((1 << 10) - 1); - CLEAR_BIT(*ddll, 26,16) - *ddll |= adjusted_ddll << 16; - } + #define GET_CYCLE_CEIL(PARAM) std::ceil(double(PARAM) / tCK_avg) - /* Section 2: adjust LOW bits: BIT 10:0 */ - const uint32_t ddll_low[] = { - OFFSET_ALL_REG(emc_pmacro_ob_ddll_long_dq_rank1_4), - OFFSET_ALL_REG(emc_pmacro_ob_ddll_long_dq_rank1_5), - OFFSET_ALL_REG(emc_pmacro_ob_ddll_long_dqs_rank0_0), - OFFSET_ALL_REG(emc_pmacro_ob_ddll_long_dqs_rank0_1), - OFFSET_ALL_REG(emc_pmacro_ob_ddll_long_dqs_rank0_3), - OFFSET_ALL_REG(emc_pmacro_ob_ddll_long_dqs_rank0_4), - OFFSET_ALL_REG(emc_pmacro_ob_ddll_long_dqs_rank1_0), - OFFSET_ALL_REG(emc_pmacro_ob_ddll_long_dqs_rank1_1), - OFFSET_ALL_REG(emc_pmacro_ob_ddll_long_dqs_rank1_3), - OFFSET_ALL_REG(emc_pmacro_ob_ddll_long_dqs_rank1_4), - OFFSET_ALL_REG(emc_pmacro_ddll_long_cmd_0), - OFFSET_ALL_REG(emc_pmacro_ddll_long_cmd_1), - OFFSET_ALL_REG(emc_pmacro_ddll_long_cmd_2), - OFFSET_ALL_REG(emc_pmacro_ddll_long_cmd_3), - OFFSET_ALL_REG(emc_pmacro_ddll_long_cmd_4), - offsetof(MarikoMtcTable, trim_regs.emc_pmacro_ob_ddll_long_dq_rank0_0), - offsetof(MarikoMtcTable, trim_regs.emc_pmacro_ob_ddll_long_dq_rank0_1), - offsetof(MarikoMtcTable, trim_regs.emc_pmacro_ob_ddll_long_dq_rank0_2), - offsetof(MarikoMtcTable, trim_regs.emc_pmacro_ob_ddll_long_dq_rank0_3), - offsetof(MarikoMtcTable, trim_regs.emc_pmacro_ob_ddll_long_dq_rank0_4), - offsetof(MarikoMtcTable, trim_regs.emc_pmacro_ob_ddll_long_dq_rank0_5), - offsetof(MarikoMtcTable, trim_regs.emc_pmacro_ob_ddll_long_dq_rank1_0), - offsetof(MarikoMtcTable, trim_regs.emc_pmacro_ob_ddll_long_dq_rank1_1), - offsetof(MarikoMtcTable, trim_regs.emc_pmacro_ob_ddll_long_dq_rank1_2), - offsetof(MarikoMtcTable, trim_regs.emc_pmacro_ob_ddll_long_dq_rank1_3), - }; - for (uint32_t i = 0; i < sizeof(ddll_low)/sizeof(uint32_t); i++) - { - uint32_t *ddll = reinterpret_cast(reinterpret_cast(target_table) + ddll_low[i]); - uint32_t *ddll_ref = reinterpret_cast(reinterpret_cast(ref_table) + ddll_low[i]); - uint16_t adjusted_ddll = ADJUST_BIT(*ddll, *ddll_ref, 10,0) & ((1 << 10) - 1); - CLEAR_BIT(*ddll, 10,0) - *ddll |= adjusted_ddll; - } + WRITE_PARAM_ALL_REG(table, emc_rc, GET_CYCLE_CEIL(tRC)); + WRITE_PARAM_ALL_REG(table, emc_rfc, GET_CYCLE_CEIL(tRFCab)); + WRITE_PARAM_ALL_REG(table, emc_rfcpb, GET_CYCLE_CEIL(tRFCpb)); + WRITE_PARAM_ALL_REG(table, emc_ras, GET_CYCLE_CEIL(tRAS)); + WRITE_PARAM_ALL_REG(table, emc_rp, GET_CYCLE_CEIL(tRPpb)); + WRITE_PARAM_ALL_REG(table, emc_rd_rcd, GET_CYCLE_CEIL(tRCD)); + WRITE_PARAM_ALL_REG(table, emc_wr_rcd, GET_CYCLE_CEIL(tRCD)); + WRITE_PARAM_ALL_REG(table, emc_rrd, GET_CYCLE_CEIL(tRRD)); + WRITE_PARAM_ALL_REG(table, emc_refresh, REFRESH); + WRITE_PARAM_ALL_REG(table, emc_pre_refresh_req_cnt, REFRESH / 4); + WRITE_PARAM_ALL_REG(table, emc_pdex2wr, GET_CYCLE_CEIL(tPDEX2)); + WRITE_PARAM_ALL_REG(table, emc_pdex2rd, GET_CYCLE_CEIL(tPDEX2)); + WRITE_PARAM_ALL_REG(table, emc_act2pden,GET_CYCLE_CEIL(tACT2PDEN)); + WRITE_PARAM_ALL_REG(table, emc_cke2pden,GET_CYCLE_CEIL(tCKE2PDEN)); + WRITE_PARAM_ALL_REG(table, emc_pdex2mrr,GET_CYCLE_CEIL(tPDEX2MRR)); + WRITE_PARAM_ALL_REG(table, emc_txsr, GET_CYCLE_CEIL(tXSR)); + WRITE_PARAM_ALL_REG(table, emc_txsrdll, GET_CYCLE_CEIL(tXSR)); + WRITE_PARAM_ALL_REG(table, emc_tcke, GET_CYCLE_CEIL(tCKE)); + WRITE_PARAM_ALL_REG(table, emc_tckesr, GET_CYCLE_CEIL(tCKELPD)); + WRITE_PARAM_ALL_REG(table, emc_tpd, GET_CYCLE_CEIL(tPD)); + WRITE_PARAM_ALL_REG(table, emc_tfaw, GET_CYCLE_CEIL(tFAW)); + WRITE_PARAM_ALL_REG(table, emc_trpab, GET_CYCLE_CEIL(tRPab)); + WRITE_PARAM_ALL_REG(table, emc_trefbw, REFRESH + 64); + + constexpr u32 MC_ARB_DIV = 4; // ? + table->burst_mc_regs.mc_emem_arb_timing_rcd = std::ceil(GET_CYCLE_CEIL(tRCD) / MC_ARB_DIV - 2); + table->burst_mc_regs.mc_emem_arb_timing_rp = std::ceil(GET_CYCLE_CEIL(tRPpb) / MC_ARB_DIV - 1); + table->burst_mc_regs.mc_emem_arb_timing_rc = std::ceil(std::max(GET_CYCLE_CEIL(tRC), GET_CYCLE_CEIL(tRAS)+GET_CYCLE_CEIL(tRPpb))/ MC_ARB_DIV); + table->burst_mc_regs.mc_emem_arb_timing_ras = std::ceil(GET_CYCLE_CEIL(tRAS) / MC_ARB_DIV - 2); + table->burst_mc_regs.mc_emem_arb_timing_faw = std::ceil(GET_CYCLE_CEIL(tFAW) / MC_ARB_DIV - 1); + table->burst_mc_regs.mc_emem_arb_timing_rrd = std::ceil(GET_CYCLE_CEIL(tRRD) / MC_ARB_DIV - 1); + table->burst_mc_regs.mc_emem_arb_timing_rap2pre = std::ceil(table->burst_regs.emc_r2p / MC_ARB_DIV); + table->burst_mc_regs.mc_emem_arb_timing_wap2pre = std::ceil(table->burst_regs.emc_w2p / MC_ARB_DIV); + table->burst_mc_regs.mc_emem_arb_timing_r2w = std::ceil(table->burst_regs.emc_r2w / MC_ARB_DIV + 1); + table->burst_mc_regs.mc_emem_arb_timing_w2r = std::ceil(table->burst_regs.emc_w2r / MC_ARB_DIV + 1); + table->burst_mc_regs.mc_emem_arb_timing_rfcpb = std::ceil(GET_CYCLE_CEIL(tRFCpb) / MC_ARB_DIV + 1); // ? } - ADJUST_BIT_ALL_REG_PAIR(target_table, ref_table, emc_zcal_wait_cnt, 21,16, 10,0) - ADJUST_BIT_ALL_REG_PAIR(target_table, ref_table, emc_mrs_wait_cnt, 21,16, 10,0) - ADJUST_BIT_ALL_REG_PAIR(target_table, ref_table, emc_mrs_wait_cnt2, 21,16, 10,0) + ADJUST_PARAM_ALL_REG(table, emc_r2w, ref); + ADJUST_PARAM_ALL_REG(table, emc_w2r, ref); + ADJUST_PARAM_ALL_REG(table, emc_r2p, ref); + ADJUST_PARAM_ALL_REG(table, emc_w2p, ref); + ADJUST_PARAM_ALL_REG(table, emc_trtm, ref); + ADJUST_PARAM_ALL_REG(table, emc_twtm, ref); + ADJUST_PARAM_ALL_REG(table, emc_tratm, ref); + ADJUST_PARAM_ALL_REG(table, emc_twatm, ref); - ADJUST_BIT_ALL_REG_SINGLE_OP(target_table, ref_table, emc_auto_cal_channel, 5,0, | 0xC1E00300) - ADJUST_BIT_ALL_REG_SINGLE_OP(target_table, ref_table, emc_pmacro_autocal_cfg_common, 5,0, | 8 << 8) + ADJUST_PARAM_ALL_REG(table, emc_rw2pden, ref); - ADJUST_BIT_ALL_REG_PAIR(target_table, ref_table, emc_dyn_self_ref_control, 31,31, 15,0) + ADJUST_PARAM_ALL_REG(table, emc_tclkstop, ref); - ADJUST_PARAM_ALL_REG(target_table, ref_table, emc_qpop); + ADJUST_PARAM_ALL_REG(table, emc_pmacro_dll_cfg_2, ref); // EMC_DLL_CFG_2_0: level select for VDDA? - ADJUST_BIT_ALL_REG_SINGLE_OP(target_table, ref_table, emc_tr_timing_0, 9,0, | 0x1186100) + // ADJUST_PARAM_TABLE(table, dram_timings.rl); // not used on Mariko - ADJUST_PARAM_ALL_REG(target_table, ref_table, emc_tr_rdv); - target_table->burst_regs.emc_tr_rdv_mask = target_table->burst_regs.emc_tr_rdv + 2; - target_table->shadow_regs_ca_train.emc_tr_rdv_mask = target_table->shadow_regs_ca_train.emc_tr_rdv + 2; - target_table->shadow_regs_rdwr_train.emc_tr_rdv_mask = target_table->shadow_regs_rdwr_train.emc_tr_rdv + 2; + ADJUST_PARAM_TABLE(table, la_scale_regs.mc_mll_mpcorer_ptsa_rate, ref); + ADJUST_PARAM_TABLE(table, la_scale_regs.mc_ptsa_grant_decrement, ref); - ADJUST_PARAM_ALL_REG(target_table, ref_table, emc_tr_qpop); - ADJUST_PARAM_ALL_REG(target_table, ref_table, emc_tr_qsafe); - ADJUST_BIT_ALL_REG_SINGLE_OP(target_table, ref_table, emc_tr_qrst, 6,0, | (6 << 16)); + // ADJUST_PARAM_TABLE(table, min_mrs_wait); // not used on LPDDR4X + // ADJUST_PARAM_TABLE(table, latency); // not used - ADJUST_BIT_ALL_REG_SINGLE_OP(target_table, ref_table, emc_training_vref_settle, 15,0, | (4 << 16)); - - /* External Memory Arbitration Configuration */ - /* BIT 20:16 - EXTRA_TICKS_PER_UPDATE: 0 */ - /* BIT 8:0 - CYCLES_PER_UPDATE: 12(1600MHz), 10(1331.2MHz) */ - ADJUST_PARAM_TABLE(target_table, ref_table, burst_mc_regs.mc_emem_arb_cfg); - - /* External Memory Arbitration Configuration: Direction Arbiter: Turns */ - /* BIT 31:24 - W2R_TURN: approx. mc_emem_arb_timing_w2r */ - /* BIT 23:16 - R2W_TURN: approx. mc_emem_arb_timing_r2w */ - /* BIT 15:8 - W2W_TURN: 0 */ - /* BIT 7:0 - R2R_TURN: 0 */ + /* Patch PLLMB divisors */ { - uint32_t param_1600 = target_table->burst_mc_regs.mc_emem_arb_da_turns; - uint32_t param_1331 = ref_table->burst_mc_regs.mc_emem_arb_da_turns; - uint8_t w2r_turn = ADJUST_BIT(param_1600, param_1331, 31,24); - uint8_t r2w_turn = ADJUST_BIT(param_1600, param_1331, 23,16); - target_table->burst_mc_regs.mc_emem_arb_da_turns = w2r_turn << 24 | r2w_turn << 16; - } - - /* External Memory Arbitration Configuration: Direction Arbiter: Covers */ - /* BIT 23:16 - RCD_W_COVER: 13(1600MHz), 11(1331.2MHz) */ - /* BIT 15:8 - RCD_R_COVER: 8(1600MHz), 7(1331.2MHz) */ - /* BIT 7:0 - RC_COVER: approx. mc_emem_arb_timing_rc, 12(1600MHz), 9(1331.2MHz) */ - { - uint32_t param_1600 = target_table->burst_mc_regs.mc_emem_arb_da_covers; - uint32_t param_1331 = ref_table->burst_mc_regs.mc_emem_arb_da_covers; - uint8_t rcd_w_cover = ADJUST_BIT(param_1600, param_1331, 23,16); - uint8_t rcd_r_cover = ADJUST_BIT(param_1600, param_1331, 15,8); - uint8_t rc_cover = ADJUST_BIT(param_1600, param_1331, 7,0); - target_table->burst_mc_regs.mc_emem_arb_da_covers = rcd_w_cover << 16 | rcd_r_cover << 8 | rc_cover; - } - - /* External Memory Arbitration Configuration: Miscellaneous Thresholds (0) */ - /* BIT 20:16 - PRIORITY_INVERSION_ISO_THRESHOLD: 12(1600MHz), 10(1331.2MHz) */ - /* BIT 14:8 - PRIORITY_INVERSION_THRESHOLD: 36(1600MHz), 30(1331.2MHz) */ - /* BIT 7:0 - BC2AA_HOLDOFF_THRESHOLD: set to mc_emem_arb_timing_rc */ - { - uint32_t param_1600 = target_table->burst_mc_regs.mc_emem_arb_misc0; - uint32_t param_1331 = ref_table->burst_mc_regs.mc_emem_arb_misc0; - uint8_t priority_inversion_iso_threshold = ADJUST_BIT(param_1600, param_1331, 20,16); - uint8_t priority_inversion_threshold = ADJUST_BIT(param_1600, param_1331, 14,8); - uint8_t bc2aa_holdoff_threshold = target_table->burst_mc_regs.mc_emem_arb_timing_rc; - CLEAR_BIT(target_table->burst_mc_regs.mc_emem_arb_misc0, 20,16) - CLEAR_BIT(target_table->burst_mc_regs.mc_emem_arb_misc0, 14,8) - CLEAR_BIT(target_table->burst_mc_regs.mc_emem_arb_misc0, 7,0) - target_table->burst_mc_regs.mc_emem_arb_misc0 |= - (priority_inversion_iso_threshold << 16 | priority_inversion_threshold << 8 | bc2aa_holdoff_threshold); - } - - /* Latency allowance settings */ - { - /* Section 1: adjust write latency */ - /* BIT 23:16 - ALLOWANCE_WRITE: 128(1600MHz), 153(1331.2MHz) */ - const uint32_t latency_write_offset[] = { - offsetof(MarikoMtcTable, la_scale_regs.mc_latency_allowance_xusb_0), - offsetof(MarikoMtcTable, la_scale_regs.mc_latency_allowance_xusb_1), - offsetof(MarikoMtcTable, la_scale_regs.mc_latency_allowance_tsec_0), - offsetof(MarikoMtcTable, la_scale_regs.mc_latency_allowance_sdmmca_0), - offsetof(MarikoMtcTable, la_scale_regs.mc_latency_allowance_sdmmcaa_0), - offsetof(MarikoMtcTable, la_scale_regs.mc_latency_allowance_sdmmc_0), - offsetof(MarikoMtcTable, la_scale_regs.mc_latency_allowance_sdmmcab_0), - offsetof(MarikoMtcTable, la_scale_regs.mc_latency_allowance_ppcs_1), - offsetof(MarikoMtcTable, la_scale_regs.mc_latency_allowance_mpcore_0), - offsetof(MarikoMtcTable, la_scale_regs.mc_latency_allowance_avpc_0), - offsetof(MarikoMtcTable, la_scale_regs.mc_latency_allowance_gpu_0), - offsetof(MarikoMtcTable, la_scale_regs.mc_latency_allowance_gpu2_0), - offsetof(MarikoMtcTable, la_scale_regs.mc_latency_allowance_nvenc_0), - offsetof(MarikoMtcTable, la_scale_regs.mc_latency_allowance_nvdec_0), - offsetof(MarikoMtcTable, la_scale_regs.mc_latency_allowance_vic_0), - offsetof(MarikoMtcTable, la_scale_regs.mc_latency_allowance_isp2_1), - }; - for (uint32_t i = 0; i < sizeof(latency_write_offset)/sizeof(uint32_t); i++) - { - uint32_t *latency = reinterpret_cast(reinterpret_cast(target_table) + latency_write_offset[i]); - CLEAR_BIT(*latency, 23,16) - *latency |= ADJUST_INVERSE(128) << 16; + // Calculate DIVM and DIVN (clock divisors) + // Common PLL oscillator is 38.4 MHz + // PLLMB_OUT = 38.4 MHz / PLLLMB_DIVM * PLLMB_DIVN + u32 divm = 1; + u32 divn = EmcClock / 38400; + u32 remainder = EmcClock % 38400; + if (remainder >= 38400 * (3/4)) { + divm = 4; + divn = divn * divm + 3; + } else + if (remainder >= 38400 * (2/3)) { + divm = 3; + divn = divn * divm + 2; + } else + if (remainder >= 38400 * (1/2)) { + divm = 2; + divn = divn * divm + 1; + } else + if (remainder >= 38400 * (1/3)) { + divm = 3; + divn = divn * divm + 1; + } else + if (remainder >= 38400 * (1/4)) { + divm = 4; + divn = divn * divm + 1; } - /* Section 2: adjust read latency */ - /* BIT 7:0 - ALLOWANCE_READ */ - const uint32_t latency_read_offset[] = { - offsetof(MarikoMtcTable, la_scale_regs.mc_latency_allowance_hc_0), - offsetof(MarikoMtcTable, la_scale_regs.mc_latency_allowance_hc_1), - offsetof(MarikoMtcTable, la_scale_regs.mc_latency_allowance_gpu_0), - offsetof(MarikoMtcTable, la_scale_regs.mc_latency_allowance_gpu2_0), - offsetof(MarikoMtcTable, la_scale_regs.mc_latency_allowance_vic_0), - offsetof(MarikoMtcTable, la_scale_regs.mc_latency_allowance_vi2_0), - offsetof(MarikoMtcTable, la_scale_regs.mc_latency_allowance_isp2_1), - }; - for (uint32_t i = 0; i < sizeof(latency_read_offset)/sizeof(uint32_t); i++) - { - uint32_t *latency = reinterpret_cast(reinterpret_cast(target_table) + latency_read_offset[i]); - uint8_t adjusted_latency = ADJUST_INVERSE(TRIM_BIT(*latency, 7,0)); - CLEAR_BIT(*latency, 7,0) - *latency |= adjusted_latency; - } + table->pllmb_divm = divm; + table->pllmb_divn = divn; } - /* PLLM and PLLMB control */ + #ifdef EXPERIMENTAL { - /* - * CLK_RST_CONTROLLER_PLLM_SS_CTRL1: - * BIT 31:16 : PLLM_SDM_SSC_MAX - * BIT 15:0 : PLLM_SDM_SSC_MIN - * - * CLK_RST_CONTROLLER_PLLM_SS_CTRL2: - * BIT 31:16 : PLLM_SDM_SSC_STEP - * BIT 15:0 : PLLM_SDM_DIN - * - * pllm(b)_ss_ctrl1: - * 1365, 342 (1600MHz) - * 0xFAAB, 0xF404 (1331MHz) - * - * pllm(b)_ss_ctrl2: - * 2, 1365 (1600MHz) - * 6, 0xFAAB (1331MHz) - * - * No need to care about this if Spread Spectrum (SS) is disabled - */ - // Disable PLL Spread Spectrum Control - table->pll_en_ssc = 0; - table->pllm_ss_cfg = 1 << 30; + #define ADJUST_PARAM_ROUND2_ALL_REG(TARGET_TABLE, REF_TABLE, PARAM) \ + TARGET_TABLE->burst_regs.PARAM = \ + ((ADJUST_PROP(TARGET_TABLE->burst_regs.PARAM, REF_TABLE->burst_regs.PARAM) + 1) >> 1) << 1; \ + TARGET_TABLE->shadow_regs_ca_train.PARAM = \ + ((ADJUST_PROP(TARGET_TABLE->shadow_regs_ca_train.PARAM, REF_TABLE->shadow_regs_ca_train.PARAM) + 1) >> 1) << 1; \ + TARGET_TABLE->shadow_regs_rdwr_train.PARAM = \ + ((ADJUST_PROP(TARGET_TABLE->shadow_regs_rdwr_train.PARAM, REF_TABLE->shadow_regs_rdwr_train.PARAM) + 1) >> 1) << 1; + + #define ADJUST_PARAM(TARGET_PARAM, REF_PARAM) \ + TARGET_PARAM = ADJUST_PROP(TARGET_PARAM, REF_PARAM); + + #define ADJUST_PARAM_TABLE(TARGET_TABLE, REF_TABLE, PARAM) \ + ADJUST_PARAM(TARGET_TABLE->PARAM, REF_TABLE->PARAM) + + #define ADJUST_PARAM_ALL_REG(TARGET_TABLE, REF_TABLE, PARAM) \ + ADJUST_PARAM_TABLE(TARGET_TABLE, REF_TABLE, burst_regs.PARAM) \ + ADJUST_PARAM_TABLE(TARGET_TABLE, REF_TABLE, shadow_regs_ca_train.PARAM) \ + ADJUST_PARAM_TABLE(TARGET_TABLE, REF_TABLE, shadow_regs_rdwr_train.PARAM) + + #define TRIM_BIT(IN_BITS, HIGH, LOW) \ + ((IN_BITS >> LOW) & ( (1u << (HIGH - LOW + 1u)) - 1u )) + + #define ADJUST_BIT(TARGET_PARAM, REF_PARAM, HIGH, LOW) \ + ADJUST_PROP(TRIM_BIT(TARGET_PARAM, HIGH, LOW), TRIM_BIT(REF_PARAM, HIGH, LOW)) + + #define CLEAR_BIT(BITS, HIGH, LOW) \ + BITS = BITS & ~( ((1u << HIGH) << 1u) - (1u << LOW) ); + + #define ADJUST_BIT_ALL_REG_SINGLE_OP(TARGET_TABLE, REF_TABLE, PARAM, HIGH, LOW, OPERATION) \ + TARGET_TABLE->burst_regs.PARAM = \ + (ADJUST_BIT(TARGET_TABLE->burst_regs.PARAM, REF_TABLE->burst_regs.PARAM, HIGH, LOW) << LOW) OPERATION; \ + TARGET_TABLE->shadow_regs_ca_train.PARAM = \ + (ADJUST_BIT(TARGET_TABLE->shadow_regs_ca_train.PARAM, REF_TABLE->shadow_regs_ca_train.PARAM, HIGH, LOW)) << LOW OPERATION; \ + TARGET_TABLE->shadow_regs_rdwr_train.PARAM = \ + (ADJUST_BIT(TARGET_TABLE->shadow_regs_rdwr_train.PARAM, REF_TABLE->shadow_regs_rdwr_train.PARAM, HIGH, LOW)) << LOW OPERATION; + + #define ADJUST_BIT_ALL_REG_PAIR(TARGET_TABLE, REF_TABLE, PARAM, HIGH1, LOW1, HIGH2, LOW2) \ + TARGET_TABLE->burst_regs.PARAM = \ + ADJUST_BIT(TARGET_TABLE->burst_regs.PARAM, REF_TABLE->burst_regs.PARAM, HIGH1, LOW1) << LOW1 \ + | ADJUST_BIT(TARGET_TABLE->burst_regs.PARAM, REF_TABLE->burst_regs.PARAM, HIGH2, LOW2) << LOW2; \ + TARGET_TABLE->shadow_regs_ca_train.PARAM = \ + ADJUST_BIT(TARGET_TABLE->shadow_regs_ca_train.PARAM, REF_TABLE->shadow_regs_ca_train.PARAM, HIGH1, LOW1) << LOW1 \ + | ADJUST_BIT(TARGET_TABLE->shadow_regs_ca_train.PARAM, REF_TABLE->shadow_regs_ca_train.PARAM, HIGH2, LOW2) << LOW2; \ + TARGET_TABLE->shadow_regs_rdwr_train.PARAM = \ + ADJUST_BIT(TARGET_TABLE->shadow_regs_rdwr_train.PARAM, REF_TABLE->shadow_regs_rdwr_train.PARAM, HIGH1, LOW1) << LOW1 \ + | ADJUST_BIT(TARGET_TABLE->shadow_regs_rdwr_train.PARAM, REF_TABLE->shadow_regs_rdwr_train.PARAM, HIGH2, LOW2) << LOW2; + + /* For latency allowance */ + #define ADJUST_INVERSE(TARGET) ((TARGET*1000) / (EmcClock/1600)) + + /* emc_wdv, emc_wsv, emc_wev, emc_wdv_mask, + emc_quse, emc_quse_width, emc_ibdly, emc_obdly, + emc_einput, emc_einput_duration, emc_qrst, emc_qsafe, + emc_rdv, emc_rdv_mask, emc_rdv_early, emc_rdv_early_mask */ + ADJUST_PARAM_ROUND2_ALL_REG(target_table, ref_table, emc_wdv); + ADJUST_PARAM_ROUND2_ALL_REG(target_table, ref_table, emc_wsv); + ADJUST_PARAM_ROUND2_ALL_REG(target_table, ref_table, emc_wev); + ADJUST_PARAM_ROUND2_ALL_REG(target_table, ref_table, emc_wdv_mask); + + ADJUST_PARAM_ALL_REG(target_table, ref_table, emc_quse); + ADJUST_PARAM_ALL_REG(target_table, ref_table, emc_quse_width); + + ADJUST_BIT_ALL_REG_SINGLE_OP(target_table, ref_table, emc_ibdly, 6,0, | (1 << 28)); + ADJUST_BIT_ALL_REG_SINGLE_OP(target_table, ref_table, emc_obdly, 5,0, | (1 << 28)); + + ADJUST_PARAM_ALL_REG(target_table, ref_table, emc_einput); + ADJUST_PARAM_ALL_REG(target_table, ref_table, emc_einput_duration); + + ADJUST_BIT_ALL_REG_SINGLE_OP(target_table, ref_table, emc_qrst, 6,0, | (6 << 16)); + ADJUST_PARAM_ALL_REG(target_table, ref_table, emc_qsafe); + + ADJUST_PARAM_ALL_REG(target_table, ref_table, emc_rdv); + target_table->burst_regs.emc_rdv_mask = target_table->burst_regs.emc_rdv + 2; + target_table->shadow_regs_ca_train.emc_rdv_mask = target_table->shadow_regs_ca_train.emc_rdv + 2; + target_table->shadow_regs_rdwr_train.emc_rdv_mask = target_table->shadow_regs_rdwr_train.emc_rdv + 2; + + ADJUST_PARAM_ALL_REG(target_table, ref_table, emc_rdv_early); + target_table->burst_regs.emc_rdv_early_mask = target_table->burst_regs.emc_rdv_early + 2; + target_table->shadow_regs_ca_train.emc_rdv_early_mask = target_table->shadow_regs_ca_train.emc_rdv_early + 2; + target_table->shadow_regs_rdwr_train.emc_rdv_early_mask = target_table->shadow_regs_rdwr_train.emc_rdv_early + 2; + + /* emc_pmacro_..., + emc_zcal_wait_cnt, emc_mrs_wait_cnt(2), + emc_pmacro_autocal_cfg_common, emc_dyn_self_ref_control, emc_qpop, emc_pmacro_cmd_pad_tx_ctrl, + emc_tr_timing_0, emc_tr_rdv, emc_tr_qpop, emc_tr_rdv_mask, emc_tr_qsafe, emc_tr_qrst, + emc_training_vref_settle */ + /* DDLL values */ + { + #define OFFSET_ALL_REG(PARAM) \ + offsetof(MarikoMtcTable, burst_regs.PARAM), \ + offsetof(MarikoMtcTable, shadow_regs_ca_train.PARAM), \ + offsetof(MarikoMtcTable, shadow_regs_rdwr_train.PARAM) \ + + /* Section 1: adjust HI bits: BIT 26:16 */ + const uint32_t ddll_high[] = { + OFFSET_ALL_REG(emc_pmacro_ob_ddll_long_dq_rank1_4), + OFFSET_ALL_REG(emc_pmacro_ob_ddll_long_dq_rank1_5), + OFFSET_ALL_REG(emc_pmacro_ob_ddll_long_dqs_rank0_4), + OFFSET_ALL_REG(emc_pmacro_ob_ddll_long_dqs_rank0_5), + OFFSET_ALL_REG(emc_pmacro_ob_ddll_long_dqs_rank1_4), + OFFSET_ALL_REG(emc_pmacro_ob_ddll_long_dqs_rank1_5), + OFFSET_ALL_REG(emc_pmacro_ddll_long_cmd_0), + OFFSET_ALL_REG(emc_pmacro_ddll_long_cmd_1), + OFFSET_ALL_REG(emc_pmacro_ddll_long_cmd_2), + OFFSET_ALL_REG(emc_pmacro_ddll_long_cmd_3), + OFFSET_ALL_REG(emc_pmacro_ddll_long_cmd_4), + offsetof(MarikoMtcTable, trim_regs.emc_pmacro_ob_ddll_long_dq_rank0_0), + offsetof(MarikoMtcTable, trim_regs.emc_pmacro_ob_ddll_long_dq_rank0_1), + offsetof(MarikoMtcTable, trim_regs.emc_pmacro_ob_ddll_long_dq_rank0_2), + offsetof(MarikoMtcTable, trim_regs.emc_pmacro_ob_ddll_long_dq_rank0_3), + offsetof(MarikoMtcTable, trim_regs.emc_pmacro_ob_ddll_long_dq_rank0_4), + offsetof(MarikoMtcTable, trim_regs.emc_pmacro_ob_ddll_long_dq_rank0_5), + offsetof(MarikoMtcTable, trim_regs.emc_pmacro_ob_ddll_long_dq_rank1_0), + offsetof(MarikoMtcTable, trim_regs.emc_pmacro_ob_ddll_long_dq_rank1_1), + offsetof(MarikoMtcTable, trim_regs.emc_pmacro_ob_ddll_long_dq_rank1_2), + offsetof(MarikoMtcTable, trim_regs.emc_pmacro_ob_ddll_long_dq_rank1_3), + }; + for (uint32_t i = 0; i < sizeof(ddll_high)/sizeof(uint32_t); i++) + { + uint32_t *ddll = reinterpret_cast(reinterpret_cast(target_table) + ddll_high[i]); + uint32_t *ddll_ref = reinterpret_cast(reinterpret_cast(ref_table) + ddll_high[i]); + uint16_t adjusted_ddll = ADJUST_BIT(*ddll, *ddll_ref, 26,16) & ((1 << 10) - 1); + CLEAR_BIT(*ddll, 26,16) + *ddll |= adjusted_ddll << 16; + } + + /* Section 2: adjust LOW bits: BIT 10:0 */ + const uint32_t ddll_low[] = { + OFFSET_ALL_REG(emc_pmacro_ob_ddll_long_dq_rank1_4), + OFFSET_ALL_REG(emc_pmacro_ob_ddll_long_dq_rank1_5), + OFFSET_ALL_REG(emc_pmacro_ob_ddll_long_dqs_rank0_0), + OFFSET_ALL_REG(emc_pmacro_ob_ddll_long_dqs_rank0_1), + OFFSET_ALL_REG(emc_pmacro_ob_ddll_long_dqs_rank0_3), + OFFSET_ALL_REG(emc_pmacro_ob_ddll_long_dqs_rank0_4), + OFFSET_ALL_REG(emc_pmacro_ob_ddll_long_dqs_rank1_0), + OFFSET_ALL_REG(emc_pmacro_ob_ddll_long_dqs_rank1_1), + OFFSET_ALL_REG(emc_pmacro_ob_ddll_long_dqs_rank1_3), + OFFSET_ALL_REG(emc_pmacro_ob_ddll_long_dqs_rank1_4), + OFFSET_ALL_REG(emc_pmacro_ddll_long_cmd_0), + OFFSET_ALL_REG(emc_pmacro_ddll_long_cmd_1), + OFFSET_ALL_REG(emc_pmacro_ddll_long_cmd_2), + OFFSET_ALL_REG(emc_pmacro_ddll_long_cmd_3), + OFFSET_ALL_REG(emc_pmacro_ddll_long_cmd_4), + offsetof(MarikoMtcTable, trim_regs.emc_pmacro_ob_ddll_long_dq_rank0_0), + offsetof(MarikoMtcTable, trim_regs.emc_pmacro_ob_ddll_long_dq_rank0_1), + offsetof(MarikoMtcTable, trim_regs.emc_pmacro_ob_ddll_long_dq_rank0_2), + offsetof(MarikoMtcTable, trim_regs.emc_pmacro_ob_ddll_long_dq_rank0_3), + offsetof(MarikoMtcTable, trim_regs.emc_pmacro_ob_ddll_long_dq_rank0_4), + offsetof(MarikoMtcTable, trim_regs.emc_pmacro_ob_ddll_long_dq_rank0_5), + offsetof(MarikoMtcTable, trim_regs.emc_pmacro_ob_ddll_long_dq_rank1_0), + offsetof(MarikoMtcTable, trim_regs.emc_pmacro_ob_ddll_long_dq_rank1_1), + offsetof(MarikoMtcTable, trim_regs.emc_pmacro_ob_ddll_long_dq_rank1_2), + offsetof(MarikoMtcTable, trim_regs.emc_pmacro_ob_ddll_long_dq_rank1_3), + }; + for (uint32_t i = 0; i < sizeof(ddll_low)/sizeof(uint32_t); i++) + { + uint32_t *ddll = reinterpret_cast(reinterpret_cast(target_table) + ddll_low[i]); + uint32_t *ddll_ref = reinterpret_cast(reinterpret_cast(ref_table) + ddll_low[i]); + uint16_t adjusted_ddll = ADJUST_BIT(*ddll, *ddll_ref, 10,0) & ((1 << 10) - 1); + CLEAR_BIT(*ddll, 10,0) + *ddll |= adjusted_ddll; + } + } + + ADJUST_BIT_ALL_REG_PAIR(target_table, ref_table, emc_zcal_wait_cnt, 21,16, 10,0) + ADJUST_BIT_ALL_REG_PAIR(target_table, ref_table, emc_mrs_wait_cnt, 21,16, 10,0) + ADJUST_BIT_ALL_REG_PAIR(target_table, ref_table, emc_mrs_wait_cnt2, 21,16, 10,0) + + ADJUST_BIT_ALL_REG_SINGLE_OP(target_table, ref_table, emc_auto_cal_channel, 5,0, | 0xC1E00300) + ADJUST_BIT_ALL_REG_SINGLE_OP(target_table, ref_table, emc_pmacro_autocal_cfg_common, 5,0, | 8 << 8) + + ADJUST_BIT_ALL_REG_PAIR(target_table, ref_table, emc_dyn_self_ref_control, 31,31, 15,0) + + ADJUST_PARAM_ALL_REG(target_table, ref_table, emc_qpop); + + ADJUST_BIT_ALL_REG_SINGLE_OP(target_table, ref_table, emc_tr_timing_0, 9,0, | 0x1186100) + + ADJUST_PARAM_ALL_REG(target_table, ref_table, emc_tr_rdv); + target_table->burst_regs.emc_tr_rdv_mask = target_table->burst_regs.emc_tr_rdv + 2; + target_table->shadow_regs_ca_train.emc_tr_rdv_mask = target_table->shadow_regs_ca_train.emc_tr_rdv + 2; + target_table->shadow_regs_rdwr_train.emc_tr_rdv_mask = target_table->shadow_regs_rdwr_train.emc_tr_rdv + 2; + + ADJUST_PARAM_ALL_REG(target_table, ref_table, emc_tr_qpop); + ADJUST_PARAM_ALL_REG(target_table, ref_table, emc_tr_qsafe); + ADJUST_BIT_ALL_REG_SINGLE_OP(target_table, ref_table, emc_tr_qrst, 6,0, | (6 << 16)); + + ADJUST_BIT_ALL_REG_SINGLE_OP(target_table, ref_table, emc_training_vref_settle, 15,0, | (4 << 16)); + + /* External Memory Arbitration Configuration */ + /* BIT 20:16 - EXTRA_TICKS_PER_UPDATE: 0 */ + /* BIT 8:0 - CYCLES_PER_UPDATE: 12(1600MHz), 10(1331.2MHz) */ + ADJUST_PARAM_TABLE(target_table, ref_table, burst_mc_regs.mc_emem_arb_cfg); + + /* External Memory Arbitration Configuration: Direction Arbiter: Turns */ + /* BIT 31:24 - W2R_TURN: approx. mc_emem_arb_timing_w2r */ + /* BIT 23:16 - R2W_TURN: approx. mc_emem_arb_timing_r2w */ + /* BIT 15:8 - W2W_TURN: 0 */ + /* BIT 7:0 - R2R_TURN: 0 */ + { + uint32_t param_1600 = target_table->burst_mc_regs.mc_emem_arb_da_turns; + uint32_t param_1331 = ref_table->burst_mc_regs.mc_emem_arb_da_turns; + uint8_t w2r_turn = ADJUST_BIT(param_1600, param_1331, 31,24); + uint8_t r2w_turn = ADJUST_BIT(param_1600, param_1331, 23,16); + target_table->burst_mc_regs.mc_emem_arb_da_turns = w2r_turn << 24 | r2w_turn << 16; + } + + /* External Memory Arbitration Configuration: Direction Arbiter: Covers */ + /* BIT 23:16 - RCD_W_COVER: 13(1600MHz), 11(1331.2MHz) */ + /* BIT 15:8 - RCD_R_COVER: 8(1600MHz), 7(1331.2MHz) */ + /* BIT 7:0 - RC_COVER: approx. mc_emem_arb_timing_rc, 12(1600MHz), 9(1331.2MHz) */ + { + uint32_t param_1600 = target_table->burst_mc_regs.mc_emem_arb_da_covers; + uint32_t param_1331 = ref_table->burst_mc_regs.mc_emem_arb_da_covers; + uint8_t rcd_w_cover = ADJUST_BIT(param_1600, param_1331, 23,16); + uint8_t rcd_r_cover = ADJUST_BIT(param_1600, param_1331, 15,8); + uint8_t rc_cover = ADJUST_BIT(param_1600, param_1331, 7,0); + target_table->burst_mc_regs.mc_emem_arb_da_covers = rcd_w_cover << 16 | rcd_r_cover << 8 | rc_cover; + } + + /* External Memory Arbitration Configuration: Miscellaneous Thresholds (0) */ + /* BIT 20:16 - PRIORITY_INVERSION_ISO_THRESHOLD: 12(1600MHz), 10(1331.2MHz) */ + /* BIT 14:8 - PRIORITY_INVERSION_THRESHOLD: 36(1600MHz), 30(1331.2MHz) */ + /* BIT 7:0 - BC2AA_HOLDOFF_THRESHOLD: set to mc_emem_arb_timing_rc */ + { + uint32_t param_1600 = target_table->burst_mc_regs.mc_emem_arb_misc0; + uint32_t param_1331 = ref_table->burst_mc_regs.mc_emem_arb_misc0; + uint8_t priority_inversion_iso_threshold = ADJUST_BIT(param_1600, param_1331, 20,16); + uint8_t priority_inversion_threshold = ADJUST_BIT(param_1600, param_1331, 14,8); + uint8_t bc2aa_holdoff_threshold = target_table->burst_mc_regs.mc_emem_arb_timing_rc; + CLEAR_BIT(target_table->burst_mc_regs.mc_emem_arb_misc0, 20,16) + CLEAR_BIT(target_table->burst_mc_regs.mc_emem_arb_misc0, 14,8) + CLEAR_BIT(target_table->burst_mc_regs.mc_emem_arb_misc0, 7,0) + target_table->burst_mc_regs.mc_emem_arb_misc0 |= + (priority_inversion_iso_threshold << 16 | priority_inversion_threshold << 8 | bc2aa_holdoff_threshold); + } + + /* Latency allowance settings */ + { + /* Section 1: adjust write latency */ + /* BIT 23:16 - ALLOWANCE_WRITE: 128(1600MHz), 153(1331.2MHz) */ + const uint32_t latency_write_offset[] = { + offsetof(MarikoMtcTable, la_scale_regs.mc_latency_allowance_xusb_0), + offsetof(MarikoMtcTable, la_scale_regs.mc_latency_allowance_xusb_1), + offsetof(MarikoMtcTable, la_scale_regs.mc_latency_allowance_tsec_0), + offsetof(MarikoMtcTable, la_scale_regs.mc_latency_allowance_sdmmca_0), + offsetof(MarikoMtcTable, la_scale_regs.mc_latency_allowance_sdmmcaa_0), + offsetof(MarikoMtcTable, la_scale_regs.mc_latency_allowance_sdmmc_0), + offsetof(MarikoMtcTable, la_scale_regs.mc_latency_allowance_sdmmcab_0), + offsetof(MarikoMtcTable, la_scale_regs.mc_latency_allowance_ppcs_1), + offsetof(MarikoMtcTable, la_scale_regs.mc_latency_allowance_mpcore_0), + offsetof(MarikoMtcTable, la_scale_regs.mc_latency_allowance_avpc_0), + offsetof(MarikoMtcTable, la_scale_regs.mc_latency_allowance_gpu_0), + offsetof(MarikoMtcTable, la_scale_regs.mc_latency_allowance_gpu2_0), + offsetof(MarikoMtcTable, la_scale_regs.mc_latency_allowance_nvenc_0), + offsetof(MarikoMtcTable, la_scale_regs.mc_latency_allowance_nvdec_0), + offsetof(MarikoMtcTable, la_scale_regs.mc_latency_allowance_vic_0), + offsetof(MarikoMtcTable, la_scale_regs.mc_latency_allowance_isp2_1), + }; + for (uint32_t i = 0; i < sizeof(latency_write_offset)/sizeof(uint32_t); i++) + { + uint32_t *latency = reinterpret_cast(reinterpret_cast(target_table) + latency_write_offset[i]); + CLEAR_BIT(*latency, 23,16) + *latency |= ADJUST_INVERSE(128) << 16; + } + + /* Section 2: adjust read latency */ + /* BIT 7:0 - ALLOWANCE_READ */ + const uint32_t latency_read_offset[] = { + offsetof(MarikoMtcTable, la_scale_regs.mc_latency_allowance_hc_0), + offsetof(MarikoMtcTable, la_scale_regs.mc_latency_allowance_hc_1), + offsetof(MarikoMtcTable, la_scale_regs.mc_latency_allowance_gpu_0), + offsetof(MarikoMtcTable, la_scale_regs.mc_latency_allowance_gpu2_0), + offsetof(MarikoMtcTable, la_scale_regs.mc_latency_allowance_vic_0), + offsetof(MarikoMtcTable, la_scale_regs.mc_latency_allowance_vi2_0), + offsetof(MarikoMtcTable, la_scale_regs.mc_latency_allowance_isp2_1), + }; + for (uint32_t i = 0; i < sizeof(latency_read_offset)/sizeof(uint32_t); i++) + { + uint32_t *latency = reinterpret_cast(reinterpret_cast(target_table) + latency_read_offset[i]); + uint8_t adjusted_latency = ADJUST_INVERSE(TRIM_BIT(*latency, 7,0)); + CLEAR_BIT(*latency, 7,0) + *latency |= adjusted_latency; + } + } + + /* PLLM and PLLMB control */ + { + /* + * CLK_RST_CONTROLLER_PLLM_SS_CTRL1: + * BIT 31:16 : PLLM_SDM_SSC_MAX + * BIT 15:0 : PLLM_SDM_SSC_MIN + * + * CLK_RST_CONTROLLER_PLLM_SS_CTRL2: + * BIT 31:16 : PLLM_SDM_SSC_STEP + * BIT 15:0 : PLLM_SDM_DIN + * + * pllm(b)_ss_ctrl1: + * 1365, 342 (1600MHz) + * 0xFAAB, 0xF404 (1331MHz) + * + * pllm(b)_ss_ctrl2: + * 2, 1365 (1600MHz) + * 6, 0xFAAB (1331MHz) + * + * No need to care about this if Spread Spectrum (SS) is disabled + */ + // Disable PLL Spread Spectrum Control + table->pll_en_ssc = 0; + table->pllm_ss_cfg = 1 << 30; + } + + /* EMC misc. configuration */ + { + /* ? Command Trigger: MRW, MRW2: MRW_OP - [PMC] data to be written ? + * + * EMC_MRW: MRW_OP + * 1600 MHz: 0x54 + * 1331 MHz: 0x44 + * 1065 MHz: 0x34 + * 800 MHz: 0x34 + * 665 MHz: 0x14 + * 408 MHz: 0x04 + * 204 MHz: 0x04 + * + * EMC_MRW2: MRW2_OP + * 1600 MHz: 0x2D 45 5*9 + * 1331 MHz: 0x24 36 4*9 + * 1065 MHz: 0x1B 27 3*9 + * 800 MHz: 0x12 18 2*9 + * 665 MHz: 0x09 9 1*9 + * 408 MHz: 0x00 + * 204 MHz: 0x00 + */ + { + + } + + /* EMC_CFG_2 */ + /* BIT 5:3 - ZQ_EXTRA_DELAY: 6(1600MHz), 5(1331.2MHz), max possible value: 7 */ + { + CLEAR_BIT(target_table->emc_cfg_2, 5,3) + target_table->emc_cfg_2 |= 7 << 3; + } + } + } + #endif + } + + #pragma GCC diagnostic ignored "-Wunused-variable" + void ApplyAutoPcvPatch(uintptr_t mapped_nso, size_t nso_size) { + /* Abort immediately once something goes wrong */ + bool isMariko = (spl::GetSocType() == spl::SocType_Mariko); + + constexpr u32 emcMaxClockMaxCnt = 30; + constexpr u32 cpuMaxVoltMarikoMaxCnt = 13; + constexpr u32 mtcTableMarikoMaxCnt = 13; + constexpr u32 gpuMaxClockMarikoReqCnt = 2; + + constexpr u32 cpuClockVddCpuPatternNext = 0; + constexpr u32 cpuTableMarikoPatternNext = 1527196; + // constexpr u32 cpuTableEristaPatternNext = 1227500; + + constexpr u32 cpuMinVolt[] = { 800, 637, 620, 610 }; + + u8 emcMaxClock {}; + u8 cpuClockVddCpu {}; + u8 cpuTableMariko {}; + // u8 cpuTableErista {}; + u8 gpuTableMariko {}; + u8 cpuMaxVoltMariko {}; + u8 mtcTableMariko {}; + u8 dvbTableMariko {}; + u8 gpuMaxClockMariko {}; + u8 gpuMaxClockMarikoRd {}; + u32 gpuMaxClockPattern[2] = { 0x528E0000, 0x72A002E0 }; // 1536 MHz + + uintptr_t i = mapped_nso; + while (i <= mapped_nso + nso_size - sizeof(MarikoMtcTable)) + { + u32 value = *(reinterpret_cast(i)); + + #ifdef EXPERIMENTAL + if (isMariko) + { + // CPU Table + if (value == 1785'000) + { + u32 value_next2 = *(reinterpret_cast(i + sizeof(u32) * 2)); + if (value_next2 == cpuClockVddCpuPatternNext) + { + u32 value_next = *(reinterpret_cast(i + sizeof(u32))); + if (value_next == cpuClockVddCpuPatternNext) + { + std::memcpy(reinterpret_cast(i), &CpuMaxClock, sizeof(CpuMaxClock)); + cpuClockVddCpu++; + } + } + + if (value_next2 == cpuTableMarikoPatternNext) + { + uintptr_t entry_1963 = i + 2 * sizeof(cpu_freq_cvb_table_t); + uintptr_t free_space = entry_1963 + sizeof(cpu_freq_cvb_table_t); + uintptr_t entry_204 = free_space - 18 * sizeof(cpu_freq_cvb_table_t); + + if ( *(reinterpret_cast(entry_1963)) == 1963'500 + && *(reinterpret_cast(free_space)) == 0 + && *(reinterpret_cast(entry_204)) == 204'000 ) + { + std::memcpy(reinterpret_cast(free_space), NewCpuTables, sizeof(NewCpuTables)); + cpuTableMariko++; + + // Patch CPU max volt (1120'000) in CPU dvfs table + for (u32 i = 0; i < 18; i++) + { + void* max_volt_dvfs = reinterpret_cast(free_space - i * sizeof(cpu_freq_cvb_table_t) - sizeof(cvb_coefficients)); + std::memcpy(max_volt_dvfs, &NewCpuVoltageScaled, sizeof(NewCpuVoltageScaled)); + } + } + } + } + + // GPU Table + if (value == 1267'200) + { + u32 free_space = i + sizeof(gpu_cvb_pll_table_t); + if (*(reinterpret_cast(free_space)) == 0) + { + std::memcpy(reinterpret_cast(free_space), NewGpuTables, sizeof(NewGpuTables)); + gpuTableMariko++; + } + } + + // CPU voltage range + if (value == 1120) + { + u32 value_cpu_min_volt = *(reinterpret_cast(i - sizeof(u32))); + + for (u32 j = 0; j < sizeof(cpuMinVolt)/sizeof(u32); j++) + { + if (cpuMinVolt[j] == value_cpu_min_volt) + { + // acceptable cpu min volt, patch max volt + std::memcpy(reinterpret_cast(i), &CpuVoltageLimit, sizeof(CpuVoltageLimit)); + cpuMaxVoltMariko++; + break; + } + } + } + + // GPU Max Clock asm + { + // Pattern: + // * + // * MOV W11, #0x1000 MOV (wide immediate) 0x1000 0xB (11) + // * sf | opc | | hw | imm16 | Rd + // * #31 |30 29|28 27 26 25 24 23|22 21|20 19 18 17 16 15 14 13 12 11 10 9 8 7 6 5 |4 3 2 1 0 + // * 0 | 1 0 | 1 0 0 1 0 1| 0 0| 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 |0 1 0 1 1 + // * + // * MOVK W11, #0xE, LSL#16 16 0xE 0xB (11) + // * sf | opc | | hw | imm16 | Rd + // * #31 |30 29|28 27 26 25 24 23|22 21|20 19 18 17 16 15 14 13 12 11 10 9 8 7 6 5 |4 3 2 1 0 + // * 0 | 1 1 | 1 0 0 1 0 1| 0 1| 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 0 |0 1 0 1 1 + + constexpr u32 mov_w_0x1000_hi = 0x52820000 >> 5; + constexpr u32 movk_w_0xE_shift16_hi = 0x72A001C0 >> 5; + + u32 value_hi = value >> 5; + u32 value_lo = value & ((1 << 5) - 1); + if (value_hi == mov_w_0x1000_hi) + { + u32 value_next = *(reinterpret_cast(i + sizeof(u32))); + u32 value_next_hi = value_next >> 5; + u32 value_next_lo = value_next & ((1 << 5) - 1); + if (value_next_hi == movk_w_0xE_shift16_hi && value_next_lo == value_lo) + { + if (!gpuMaxClockMarikoRd) + gpuMaxClockMarikoRd = value_lo; + if (gpuMaxClockMarikoRd != value_lo) + AMS_ABORT("gpuMaxClockMarikoRd not consistent!"); + + gpuMaxClockPattern[0] |= gpuMaxClockMarikoRd; + gpuMaxClockPattern[1] |= gpuMaxClockMarikoRd; + + std::memcpy(reinterpret_cast(i), gpuMaxClockPattern, sizeof(gpuMaxClockPattern)); + gpuMaxClockMariko++; + } + } + } + } + #endif + + { + // MEM + if (value == 1600'000) + { + if (isMariko) + { + u32 value_next = *(reinterpret_cast(i + sizeof(u32))); + u32 value_next2 = *(reinterpret_cast(i + sizeof(u32) * 2)); + + if (value_next == 1100) // min_volt in MtcTable + { + uintptr_t offset = i - offsetof(MarikoMtcTable, rate_khz); + uintptr_t offset_prev = offset - sizeof(MarikoMtcTable); + + MarikoMtcTable* const mtc_table_new = reinterpret_cast(offset); + MarikoMtcTable* const mtc_table_old = reinterpret_cast(offset_prev); + if (mtc_table_new->rev != 3 || mtc_table_old->rev != 3 || mtc_table_old->rate_khz != 1331'200) + AMS_ABORT("mtc_table"); + + // Replace 1331 MHz with 1600 MHz + std::memcpy(reinterpret_cast(mtc_table_old), reinterpret_cast(mtc_table_new), sizeof(MarikoMtcTable)); + mtcTableMariko++; + + // Generate new table for Max MHz + // [!TODO] ref table is identical to new table, leaving some params unchanged + AdjustMtcTable(mtc_table_new, mtc_table_old); + } + else if (value_next2 == 675) // Mariko Dvb Table + { + u32 dvb_1331_offset = i - sizeof(emc_dvb_dvfs_table_t); + + u32 value_1331_entry = *(reinterpret_cast(dvb_1331_offset)); + if (value_1331_entry == 1331'200) + { + const u32 dvb_1600_clk = 1600'000; + std::memcpy(reinterpret_cast(dvb_1331_offset), &dvb_1600_clk, sizeof(dvb_1600_clk)); + dvbTableMariko++; + } + } + } + + // Patch Max Emc Clock for both Erista and Mariko + std::memcpy(reinterpret_cast(i), &EmcClock, sizeof(EmcClock)); + emcMaxClock++; + } + } + + i += sizeof(u32); } - /* EMC misc. configuration */ + if (isMariko) { - /* ? Command Trigger: MRW, MRW2: MRW_OP - [PMC] data to be written ? - * - * EMC_MRW: MRW_OP - * 1600 MHz: 0x54 - * 1331 MHz: 0x44 - * 1065 MHz: 0x34 - * 800 MHz: 0x34 - * 665 MHz: 0x14 - * 408 MHz: 0x04 - * 204 MHz: 0x04 - * - * EMC_MRW2: MRW2_OP - * 1600 MHz: 0x2D 45 5*9 - * 1331 MHz: 0x24 36 4*9 - * 1065 MHz: 0x1B 27 3*9 - * 800 MHz: 0x12 18 2*9 - * 665 MHz: 0x09 9 1*9 - * 408 MHz: 0x00 - * 204 MHz: 0x00 - */ - { - - } - - /* EMC_CFG_2 */ - /* BIT 5:3 - ZQ_EXTRA_DELAY: 6(1600MHz), 5(1331.2MHz), max possible value: 7 */ - { - CLEAR_BIT(target_table->emc_cfg_2, 5,3) - target_table->emc_cfg_2 |= 7 << 3; - } + // if (cpuClockVddCpu != 1) + // AMS_ABORT("cpuClockVddCpu"); + // if (cpuTableMariko != 1) + // AMS_ABORT("cpuTableMariko"); + // if (gpuTableMariko != 1) + // AMS_ABORT("gpuTableMariko"); + if (dvbTableMariko != 1) + AMS_ABORT("dvbTableMariko"); + // if (cpuMaxVoltMariko > cpuMaxVoltMarikoMaxCnt || !cpuMaxVoltMariko) + // AMS_ABORT("cpuMaxVoltMariko"); + if (mtcTableMariko > mtcTableMarikoMaxCnt || !mtcTableMariko) + AMS_ABORT("mtcTableMariko"); + // if (gpuMaxClockMariko != gpuMaxClockMarikoReqCnt) + // AMS_ABORT("gpuMaxClockMariko"); + } + { + if (emcMaxClock > emcMaxClockMaxCnt || !emcMaxClock) + AMS_ABORT("emcMaxClock"); + } + } + #pragma GCC diagnostic error "-Wunused-variable" + } + + namespace ptm { + void ApplyAutoPtmPatch(uintptr_t mapped_nso, size_t nso_size) { + /* No abort here as ptm is not that critical */ + if (spl::GetSocType() == spl::SocType_Erista) + return; + + uintptr_t emcOffsetStart = 0; + constexpr u32 OffsetInterval = 0x20; + constexpr u32 emcOffsetCnt = 16; + constexpr u32 EmcMaxClk = EmcClock * 1000; + constexpr u32 Emc1600Clk = 1600'000'000; + + uintptr_t i = mapped_nso; + + while (i <= mapped_nso + nso_size) + { + u32 value = *(reinterpret_cast(i)); + + if (value == 1600'000'000) + { + emcOffsetStart = i; + break; + } + + i += sizeof(u32); + } + + if (!emcOffsetStart) + return; + + for (u32 j = 0; j < emcOffsetCnt; j++) + { + uintptr_t offset = emcOffsetStart + OffsetInterval * j; + uintptr_t offset_next = offset + sizeof(u32); + + if (offset_next > mapped_nso + nso_size) + return; + + u32 value = *(reinterpret_cast(offset)); + u32 value_next = *(reinterpret_cast(offset_next)); + + if (value != value_next) + return; + + u32 value_patched = value; + + switch (value) + { + case 1600'000'000: + value_patched = EmcMaxClk; + break; + case 1331'200'000: + case 1065'600'000: + value_patched = Emc1600Clk; + break; + default: + return; + } + + std::memcpy(reinterpret_cast(offset), &value_patched, sizeof(value_patched)); + std::memcpy(reinterpret_cast(offset_next), &value_patched, sizeof(value_patched)); } } - #endif } - /* Unlock the second sub-partition for retail Mariko, and double the bandwidth (~60GB/s) - * https://github.com/CTCaer/hekate/blob/01b6e645b3cb69ddf28cc9eff40c4b35bf03dbd4/bdk/mem/sdram.h#L30 - * - * Sub-partitions are defined as ranks, so there is no other way than replacing DRAM chips. - */ } - -namespace ptm { - constexpr u32 EmcOffsetStart[] = { - // 0xC5E24, - 0xA032C, - }; - - constexpr u32 OffsetInterval = 0x20; - - constexpr u32 CpuBoostOffset = 0x170; -} \ No newline at end of file diff --git a/Source/Atmosphere/stratosphere/loader/source/ldr_patcher.cpp b/Source/Atmosphere/stratosphere/loader/source/ldr_patcher.cpp deleted file mode 100644 index cb06823f..00000000 --- a/Source/Atmosphere/stratosphere/loader/source/ldr_patcher.cpp +++ /dev/null @@ -1,232 +0,0 @@ -/* - * Copyright (c) Atmosphère-NX - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ -#include -#include "ldr_patcher.hpp" - -namespace ams::ldr { - - namespace { - - constexpr const char *NsoPatchesDirectory = "exefs_patches"; - - /* Exefs patches want to prevent modification of header, */ - /* and also want to adjust offset relative to mapped location. */ - constexpr size_t NsoPatchesProtectedSize = sizeof(NsoHeader); - constexpr size_t NsoPatchesProtectedOffset = sizeof(NsoHeader); - - constexpr const char * const LoaderSdMountName = "#amsldr-sdpatch"; - static_assert(sizeof(LoaderSdMountName) <= fs::MountNameLengthMax); - - constinit os::SdkMutex g_ldr_sd_lock; - constinit bool g_mounted_sd; - - constinit os::SdkMutex g_embedded_patch_lock; - constinit bool g_got_embedded_patch_settings; - constinit bool g_force_enable_usb30; - - bool EnsureSdCardMounted() { - std::scoped_lock lk(g_ldr_sd_lock); - - if (g_mounted_sd) { - return true; - } - - if (!cfg::IsSdCardInitialized()) { - return false; - } - - if (R_FAILED(fs::MountSdCard(LoaderSdMountName))) { - return false; - } - - return (g_mounted_sd = true); - } - - bool IsUsb30ForceEnabled() { - std::scoped_lock lk(g_embedded_patch_lock); - - if (!g_got_embedded_patch_settings) { - g_force_enable_usb30 = spl::IsUsb30ForceEnabled(); - g_got_embedded_patch_settings = true; - } - - return g_force_enable_usb30; - } - - u32 GetEmcClock() { - // RAM freqs from Hekate: - // 1862400, 1894400, 1932800, 1996800, 2064000, 2099200, 2131200 - // Other values might work as well - // RAM overclock could be UNSTABLE and generate graphical glitches / instabilities / NAND corruption - return 1862400; - } - - // u32 GetCpuBoostClock() { - // return 1963500; - // } - - consteval u8 ParseNybble(char c) { - AMS_ASSUME(('0' <= c && c <= '9') || ('A' <= c && c <= 'F') || ('a' <= c && c <= 'f')); - if ('0' <= c && c <= '9') { - return c - '0' + 0x0; - } else if ('A' <= c && c <= 'F') { - return c - 'A' + 0xA; - } else /* if ('a' <= c && c <= 'f') */ { - return c - 'a' + 0xa; - } - } - - consteval ro::ModuleId ParseModuleId(const char *str) { - /* Parse a static module id. */ - ro::ModuleId module_id = {}; - - size_t ofs = 0; - while (str[0] != 0) { - AMS_ASSUME(ofs < sizeof(module_id)); - AMS_ASSUME(str[1] != 0); - - module_id.data[ofs] = (ParseNybble(str[0]) << 4) | (ParseNybble(str[1]) << 0); - - str += 2; - ofs++; - } - - return module_id; - } - - struct EmbeddedPatchEntry { - uintptr_t offset; - const void * const data; - size_t size; - }; - - struct EmbeddedPatch { - ro::ModuleId module_id; - size_t num_entries; - const EmbeddedPatchEntry *entries; - }; - - #include "ldr_embedded_usb_patches.inc" - - } - - #include "ldr_oc_patch.hpp" - - /* Apply IPS patches. */ - void LocateAndApplyIpsPatchesToModule(const u8 *module_id_data, uintptr_t mapped_nso, size_t mapped_size) { - if (!EnsureSdCardMounted()) { - return; - } - - ro::ModuleId module_id; - std::memcpy(std::addressof(module_id.data), module_id_data, sizeof(module_id.data)); - ams::patcher::LocateAndApplyIpsPatchesToModule(LoaderSdMountName, NsoPatchesDirectory, NsoPatchesProtectedSize, NsoPatchesProtectedOffset, std::addressof(module_id), reinterpret_cast(mapped_nso), mapped_size); - } - - /* Apply embedded patches. */ - void ApplyEmbeddedPatchesToModule(const u8 *module_id_data, uintptr_t mapped_nso, size_t mapped_size) { - /* Make module id. */ - ro::ModuleId module_id; - std::memcpy(std::addressof(module_id.data), module_id_data, sizeof(module_id.data)); - - if (IsUsb30ForceEnabled()) { - for (const auto &patch : Usb30ForceEnablePatches) { - if (std::memcmp(std::addressof(patch.module_id), std::addressof(module_id), sizeof(module_id)) == 0) { - for (size_t i = 0; i < patch.num_entries; ++i) { - const auto &entry = patch.entries[i]; - if (entry.offset + entry.size <= mapped_size) { - std::memcpy(reinterpret_cast(mapped_nso + entry.offset), entry.data, entry.size); - } - } - } - } - } - - u32 EmcClock = GetEmcClock(); - if (spl::GetSocType() == spl::SocType_Mariko && EmcClock) { - for (u32 i = 0; i < sizeof(PcvModuleId)/sizeof(ro::ModuleId); i++) { - if (std::memcmp(std::addressof(PcvModuleId[i]), std::addressof(module_id), sizeof(module_id)) == 0) { - /* Add new CPU and GPU clock tables for Mariko */ - std::memcpy(reinterpret_cast(mapped_nso + pcv::CpuTablesFreeSpace[i]), pcv::NewCpuTables, sizeof(pcv::NewCpuTables)); - std::memcpy(reinterpret_cast(mapped_nso + pcv::GpuTablesFreeSpace[i]), pcv::NewGpuTables, sizeof(pcv::NewGpuTables)); - - /* Patch Mariko max CPU and GPU clockrates */ - std::memcpy(reinterpret_cast(mapped_nso + pcv::MaxCpuClockOffset[i]), &pcv::NewMaxCpuClock, sizeof(pcv::NewMaxCpuClock)); - std::memcpy(reinterpret_cast(mapped_nso + pcv::Reg1MaxGpuOffset[i]), pcv::Reg1NewMaxGpuClock, sizeof(pcv::Reg1NewMaxGpuClock[i])); - std::memcpy(reinterpret_cast(mapped_nso + pcv::Reg2MaxGpuOffset[i]), pcv::Reg2NewMaxGpuClock, sizeof(pcv::Reg2NewMaxGpuClock[i])); - - /* Patch max cpu voltage on Mariko */ - for (u32 j = 0; j < sizeof(pcv::CpuVoltageLimitOffsets[i])/sizeof(u32); j++) { - std::memcpy(reinterpret_cast(mapped_nso + pcv::CpuVoltageLimitOffsets[i][j]), &pcv::NewCpuVoltageLimit, sizeof(pcv::NewCpuVoltageLimit)); - } - for (u32 j = 0; j < sizeof(pcv::CpuVoltageOldTableCoeff[i])/sizeof(u32); j++) { - std::memcpy(reinterpret_cast(mapped_nso + pcv::CpuVoltageOldTableCoeff[i][j]), &pcv::NewCpuVoltageScaled, sizeof(pcv::NewCpuVoltageScaled)); - } - - for (u32 j = 0; j < sizeof(pcv::MtcTable_1600[i])/sizeof(u32); j++) { - pcv::MarikoMtcTable* mtc_table_new = reinterpret_cast(mapped_nso + pcv::MtcTable_1600[i][j]); - pcv::MarikoMtcTable* mtc_table_old = reinterpret_cast(mapped_nso + pcv::MtcTable_1600[i][j] - pcv::MtcTableOffset); - - /* Replace 1331 MHz with 1600 MHz, not possible without proper timings for oc clock */ - std::memcpy(reinterpret_cast(mtc_table_old), reinterpret_cast(mtc_table_new), sizeof(pcv::MarikoMtcTable)); - - /* Generate new table for OC MHz */ - pcv::AdjustMtcTable(mtc_table_new, mtc_table_old); - } - - /* Patch RAM Clock */ - for (u32 j = 0; j < sizeof(pcv::EmcFreqOffsets[i])/sizeof(u32); j++) { - std::memcpy(reinterpret_cast(mapped_nso + pcv::EmcFreqOffsets[i][j]), &EmcClock, sizeof(EmcClock)); - } - - /* Replace 1331 MHz with 1600 MHz in EmcDvbTable */ - const u32 mem1331 = 1600'000; - std::memcpy(reinterpret_cast(mapped_nso + pcv::EmcDvb1331[i]), &mem1331, sizeof(mem1331)); - } - } - - u32 PtmEmcClk1600 = GetEmcClock() * 1000; - const u32 PtmEmcClk1331 = 1600'000'000; - - // u32 CpuBoostClock = GetCpuBoostClock() * 1000; - - /* Patch Ptm for coexistent of 1600 MHz and OC clock */ - for (u32 i = 0; i < sizeof(PtmModuleId)/sizeof(ro::ModuleId); i++) { - if (std::memcmp(std::addressof(PtmModuleId[i]), std::addressof(module_id), sizeof(module_id)) == 0) { - for (u32 j = 0; j < 6; j++) { - std::memcpy(reinterpret_cast(mapped_nso + ptm::EmcOffsetStart[i] + ptm::OffsetInterval * j), &PtmEmcClk1600, sizeof(PtmEmcClk1600)); - std::memcpy(reinterpret_cast(mapped_nso + ptm::EmcOffsetStart[i] + ptm::OffsetInterval * j + 0x4), &PtmEmcClk1600, sizeof(PtmEmcClk1600)); - } - for (u32 j = 6; j < 10; j++) { - std::memcpy(reinterpret_cast(mapped_nso + ptm::EmcOffsetStart[i] + ptm::OffsetInterval * j), &PtmEmcClk1331, sizeof(PtmEmcClk1331)); - std::memcpy(reinterpret_cast(mapped_nso + ptm::EmcOffsetStart[i] + ptm::OffsetInterval * j + 0x4), &PtmEmcClk1331, sizeof(PtmEmcClk1331)); - } - for (u32 j = 10; j < 16; j+=2) { - std::memcpy(reinterpret_cast(mapped_nso + ptm::EmcOffsetStart[i] + ptm::OffsetInterval * j), &PtmEmcClk1600, sizeof(PtmEmcClk1600)); - std::memcpy(reinterpret_cast(mapped_nso + ptm::EmcOffsetStart[i] + ptm::OffsetInterval * j + 0x4), &PtmEmcClk1600, sizeof(PtmEmcClk1600)); - std::memcpy(reinterpret_cast(mapped_nso + ptm::EmcOffsetStart[i] + ptm::OffsetInterval * (j+1)), &PtmEmcClk1331, sizeof(PtmEmcClk1331)); - std::memcpy(reinterpret_cast(mapped_nso + ptm::EmcOffsetStart[i] + ptm::OffsetInterval * (j+1) + 0x4), &PtmEmcClk1331, sizeof(PtmEmcClk1331)); - } - // for (u32 j = 0; j < 2; j++) { - // std::memcpy(reinterpret_cast(mapped_nso + ptm::EmcOffsetStart[i] + ptm::CpuBoostOffset + ptm::OffsetInterval * j), &CpuBoostClock, sizeof(CpuBoostClock)); - // std::memcpy(reinterpret_cast(mapped_nso + ptm::EmcOffsetStart[i] + ptm::CpuBoostOffset + ptm::OffsetInterval * j + 0x4), &CpuBoostClock, sizeof(CpuBoostClock)); - // } - } - } - } - } - -} \ No newline at end of file diff --git a/Source/Atmosphere/stratosphere/loader/source/ldr_process_creation.cpp b/Source/Atmosphere/stratosphere/loader/source/ldr_process_creation.cpp new file mode 100644 index 00000000..1c60f8c9 --- /dev/null +++ b/Source/Atmosphere/stratosphere/loader/source/ldr_process_creation.cpp @@ -0,0 +1,783 @@ +/* + * Copyright (c) Atmosphère-NX + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +#include +#include "ldr_auto_close.hpp" +#include "ldr_capabilities.hpp" +#include "ldr_content_management.hpp" +#include "ldr_development_manager.hpp" +#include "ldr_launch_record.hpp" +#include "ldr_meta.hpp" +#include "ldr_patcher.hpp" +#include "ldr_process_creation.hpp" +#include "ldr_ro_manager.hpp" +#include "ldr_oc_patch.hpp" + +namespace ams::ldr { + + namespace { + + /* Convenience defines. */ + constexpr size_t SystemResourceSizeMax = 0x1FE00000; + + /* Types. */ + enum NsoIndex { + Nso_Rtld = 0, + Nso_Main = 1, + Nso_SubSdk0 = 2, + Nso_SubSdk1 = 3, + Nso_SubSdk2 = 4, + Nso_SubSdk3 = 5, + Nso_SubSdk4 = 6, + Nso_SubSdk5 = 7, + Nso_SubSdk6 = 8, + Nso_SubSdk7 = 9, + Nso_SubSdk8 = 10, + Nso_SubSdk9 = 11, + Nso_Sdk = 12, + Nso_Count, + }; + + constexpr inline const char *NsoPaths[Nso_Count] = { + ENCODE_ATMOSPHERE_CODE_PATH("/rtld"), + ENCODE_ATMOSPHERE_CODE_PATH("/main"), + ENCODE_ATMOSPHERE_CODE_PATH("/subsdk0"), + ENCODE_ATMOSPHERE_CODE_PATH("/subsdk1"), + ENCODE_ATMOSPHERE_CODE_PATH("/subsdk2"), + ENCODE_ATMOSPHERE_CODE_PATH("/subsdk3"), + ENCODE_ATMOSPHERE_CODE_PATH("/subsdk4"), + ENCODE_ATMOSPHERE_CODE_PATH("/subsdk5"), + ENCODE_ATMOSPHERE_CODE_PATH("/subsdk6"), + ENCODE_ATMOSPHERE_CODE_PATH("/subsdk7"), + ENCODE_ATMOSPHERE_CODE_PATH("/subsdk8"), + ENCODE_ATMOSPHERE_CODE_PATH("/subsdk9"), + ENCODE_ATMOSPHERE_CODE_PATH("/sdk"), + }; + + constexpr const char *GetNsoPath(size_t idx) { + AMS_ABORT_UNLESS(idx < Nso_Count); + return NsoPaths[idx]; + } + + struct ProcessInfo { + os::NativeHandle process_handle; + uintptr_t args_address; + size_t args_size; + uintptr_t nso_address[Nso_Count]; + size_t nso_size[Nso_Count]; + }; + + /* Global NSO header cache. */ + bool g_has_nso[Nso_Count]; + NsoHeader g_nso_headers[Nso_Count]; + + /* Pcv/Ptm check cache. */ + bool g_is_pcv; + bool g_is_ptm; + + /* Anti-downgrade. */ + #include "ldr_anti_downgrade_tables.inc" + + Result ValidateProgramVersion(ncm::ProgramId program_id, u32 version) { + /* No version verification is done before 8.1.0. */ + R_SUCCEED_IF(hos::GetVersion() < hos::Version_8_1_0); + + /* No verification is done if development. */ + R_SUCCEED_IF(IsDevelopmentForAntiDowngradeCheck()); + + /* Do version-dependent validation, if compiled to do so. */ +#ifdef LDR_VALIDATE_PROCESS_VERSION + const MinimumProgramVersion *entries = nullptr; + size_t num_entries = 0; + + const auto hos_version = hos::GetVersion(); + if (hos_version >= hos::Version_11_0_0) { + entries = g_MinimumProgramVersions1100; + num_entries = g_MinimumProgramVersionsCount1100; + } else if (hos_version >= hos::Version_10_1_0) { + entries = g_MinimumProgramVersions1010; + num_entries = g_MinimumProgramVersionsCount1010; + } else if (hos_version >= hos::Version_10_0_0) { + entries = g_MinimumProgramVersions1000; + num_entries = g_MinimumProgramVersionsCount1000; + } else if (hos_version >= hos::Version_9_1_0) { + entries = g_MinimumProgramVersions910; + num_entries = g_MinimumProgramVersionsCount910; + } else if (hos_version >= hos::Version_9_0_0) { + entries = g_MinimumProgramVersions900; + num_entries = g_MinimumProgramVersionsCount900; + } else if (hos_version >= hos::Version_8_1_0) { + entries = g_MinimumProgramVersions810; + num_entries = g_MinimumProgramVersionsCount810; + } + + for (size_t i = 0; i < num_entries; i++) { + if (entries[i].program_id == program_id) { + R_UNLESS(entries[i].version <= version, ldr::ResultInvalidVersion()); + } + } +#else + AMS_UNUSED(program_id, version); +#endif + return ResultSuccess(); + } + + /* Helpers. */ + Result GetProgramInfoFromMeta(ProgramInfo *out, const Meta *meta) { + /* Copy basic info. */ + out->main_thread_priority = meta->npdm->main_thread_priority; + out->default_cpu_id = meta->npdm->default_cpu_id; + out->main_thread_stack_size = meta->npdm->main_thread_stack_size; + out->program_id = meta->aci->program_id; + + /* Copy access controls. */ + size_t offset = 0; +#define COPY_ACCESS_CONTROL(source, which) \ + ({ \ + const size_t size = meta->source->which##_size; \ + R_UNLESS(offset + size <= sizeof(out->ac_buffer), ldr::ResultInternalError()); \ + out->source##_##which##_size = size; \ + std::memcpy(out->ac_buffer + offset, meta->source##_##which, size); \ + offset += size; \ + }) + + /* Copy all access controls to buffer. */ + COPY_ACCESS_CONTROL(acid, sac); + COPY_ACCESS_CONTROL(aci, sac); + COPY_ACCESS_CONTROL(acid, fac); + COPY_ACCESS_CONTROL(aci, fah); +#undef COPY_ACCESS_CONTROL + + /* Copy flags. */ + out->flags = MakeProgramInfoFlag(static_cast(meta->aci_kac), meta->aci->kac_size / sizeof(util::BitPack32)); + return ResultSuccess(); + } + + bool IsApplet(const Meta *meta) { + return (MakeProgramInfoFlag(static_cast(meta->aci_kac), meta->aci->kac_size / sizeof(util::BitPack32)) & ProgramInfoFlag_ApplicationTypeMask) == ProgramInfoFlag_Applet; + } + + bool IsApplication(const Meta *meta) { + return (MakeProgramInfoFlag(static_cast(meta->aci_kac), meta->aci->kac_size / sizeof(util::BitPack32)) & ProgramInfoFlag_ApplicationTypeMask) == ProgramInfoFlag_Application; + } + + Npdm::AddressSpaceType GetAddressSpaceType(const Meta *meta) { + return static_cast((meta->npdm->flags & Npdm::MetaFlag_AddressSpaceTypeMask) >> Npdm::MetaFlag_AddressSpaceTypeShift); + } + + Acid::PoolPartition GetPoolPartition(const Meta *meta) { + return static_cast((meta->acid->flags & Acid::AcidFlag_PoolPartitionMask) >> Acid::AcidFlag_PoolPartitionShift); + } + + Result LoadNsoHeaders(NsoHeader *nso_headers, bool *has_nso) { + /* Clear NSOs. */ + std::memset(nso_headers, 0, sizeof(*nso_headers) * Nso_Count); + std::memset(has_nso, 0, sizeof(*has_nso) * Nso_Count); + + for (size_t i = 0; i < Nso_Count; i++) { + fs::FileHandle file; + if (R_SUCCEEDED(fs::OpenFile(std::addressof(file), GetNsoPath(i), fs::OpenMode_Read))) { + ON_SCOPE_EXIT { fs::CloseFile(file); }; + + /* Read NSO header. */ + size_t read_size; + R_TRY(fs::ReadFile(std::addressof(read_size), file, 0, nso_headers + i, sizeof(*nso_headers))); + R_UNLESS(read_size == sizeof(*nso_headers), ldr::ResultInvalidNso()); + + has_nso[i] = true; + } + } + + return ResultSuccess(); + } + + Result ValidateNsoHeaders(const NsoHeader *nso_headers, const bool *has_nso) { + /* We must always have a main. */ + R_UNLESS(has_nso[Nso_Main], ldr::ResultInvalidNso()); + + /* If we don't have an RTLD, we must only have a main. */ + if (!has_nso[Nso_Rtld]) { + for (size_t i = Nso_Main + 1; i < Nso_Count; i++) { + R_UNLESS(!has_nso[i], ldr::ResultInvalidNso()); + } + } + + /* All NSOs must have zero text offset. */ + for (size_t i = 0; i < Nso_Count; i++) { + R_UNLESS(nso_headers[i].text_dst_offset == 0, ldr::ResultInvalidNso()); + } + + return ResultSuccess(); + } + + Result ValidateMeta(const Meta *meta, const ncm::ProgramLocation &loc, const fs::CodeVerificationData &code_verification_data) { + /* Validate version. */ + R_TRY(ValidateProgramVersion(loc.program_id, meta->npdm->version)); + + /* Validate program id. */ + R_UNLESS(meta->aci->program_id >= meta->acid->program_id_min, ldr::ResultInvalidProgramId()); + R_UNLESS(meta->aci->program_id <= meta->acid->program_id_max, ldr::ResultInvalidProgramId()); + + /* Check if nca is pcv or ptm */ + g_is_pcv = meta->aci->program_id == ncm::SystemProgramId::Pcv; + g_is_ptm = meta->aci->program_id == ncm::SystemProgramId::Ptm; + + /* Validate the kernel capabilities. */ + R_TRY(TestCapability(static_cast(meta->acid_kac), meta->acid->kac_size / sizeof(util::BitPack32), static_cast(meta->aci_kac), meta->aci->kac_size / sizeof(util::BitPack32))); + + /* If we have data to validate, validate it. */ + if (code_verification_data.has_data && meta->check_verification_data) { + const u8 *sig = code_verification_data.signature; + const size_t sig_size = sizeof(code_verification_data.signature); + const u8 *mod = static_cast(meta->modulus); + const size_t mod_size = crypto::Rsa2048PssSha256Verifier::ModulusSize; + const u8 *exp = fssystem::GetAcidSignatureKeyPublicExponent(); + const size_t exp_size = fssystem::AcidSignatureKeyPublicExponentSize; + const u8 *hsh = code_verification_data.target_hash; + const size_t hsh_size = sizeof(code_verification_data.target_hash); + const bool is_signature_valid = crypto::VerifyRsa2048PssSha256WithHash(sig, sig_size, mod, mod_size, exp, exp_size, hsh, hsh_size); + + R_UNLESS(is_signature_valid, ldr::ResultInvalidNcaSignature()); + } + + /* All good. */ + return ResultSuccess(); + } + + Result GetCreateProcessFlags(u32 *out, const Meta *meta, const u32 ldr_flags) { + const u8 meta_flags = meta->npdm->flags; + + u32 flags = 0; + + /* Set Is64Bit. */ + if (meta_flags & Npdm::MetaFlag_Is64Bit) { + flags |= svc::CreateProcessFlag_Is64Bit; + } + + /* Set AddressSpaceType. */ + switch (GetAddressSpaceType(meta)) { + case Npdm::AddressSpaceType_32Bit: + flags |= svc::CreateProcessFlag_AddressSpace32Bit; + break; + case Npdm::AddressSpaceType_64BitDeprecated: + flags |= svc::CreateProcessFlag_AddressSpace64BitDeprecated; + break; + case Npdm::AddressSpaceType_32BitWithoutAlias: + flags |= svc::CreateProcessFlag_AddressSpace32BitWithoutAlias; + break; + case Npdm::AddressSpaceType_64Bit: + flags |= svc::CreateProcessFlag_AddressSpace64Bit; + break; + default: + return ldr::ResultInvalidMeta(); + } + + /* Set Enable Debug. */ + if (ldr_flags & CreateProcessFlag_EnableDebug) { + flags |= svc::CreateProcessFlag_EnableDebug; + } + + /* Set Enable ASLR. */ + if (!(ldr_flags & CreateProcessFlag_DisableAslr)) { + flags |= svc::CreateProcessFlag_EnableAslr; + } + + /* Set Is Application. */ + if (IsApplication(meta)) { + flags |= svc::CreateProcessFlag_IsApplication; + + /* 7.0.0+: Set OptimizeMemoryAllocation if relevant. */ + if (hos::GetVersion() >= hos::Version_7_0_0) { + if (meta_flags & Npdm::MetaFlag_OptimizeMemoryAllocation) { + flags |= svc::CreateProcessFlag_OptimizeMemoryAllocation; + } + } + } + + /* 5.0.0+ Set Pool Partition. */ + if (hos::GetVersion() >= hos::Version_5_0_0) { + switch (GetPoolPartition(meta)) { + case Acid::PoolPartition_Application: + if (IsApplet(meta)) { + flags |= svc::CreateProcessFlag_PoolPartitionApplet; + } else { + flags |= svc::CreateProcessFlag_PoolPartitionApplication; + } + break; + case Acid::PoolPartition_Applet: + flags |= svc::CreateProcessFlag_PoolPartitionApplet; + break; + case Acid::PoolPartition_System: + flags |= svc::CreateProcessFlag_PoolPartitionSystem; + break; + case Acid::PoolPartition_SystemNonSecure: + flags |= svc::CreateProcessFlag_PoolPartitionSystemNonSecure; + break; + default: + return ldr::ResultInvalidMeta(); + } + } else if (hos::GetVersion() >= hos::Version_4_0_0) { + /* On 4.0.0+, the corresponding bit was simply "UseSecureMemory". */ + if (meta->acid->flags & Acid::AcidFlag_DeprecatedUseSecureMemory) { + flags |= svc::CreateProcessFlag_DeprecatedUseSecureMemory; + } + } + + /* 11.0.0+/meso Set Disable DAS merge. */ + if (meta_flags & Npdm::MetaFlag_DisableDeviceAddressSpaceMerge) { + flags |= svc::CreateProcessFlag_DisableDeviceAddressSpaceMerge; + } + + *out = flags; + return ResultSuccess(); + } + + Result GetCreateProcessParameter(svc::CreateProcessParameter *out, const Meta *meta, u32 flags, os::NativeHandle resource_limit) { + /* Clear output. */ + std::memset(out, 0, sizeof(*out)); + + /* Set name, version, program id, resource limit handle. */ + std::memcpy(out->name, meta->npdm->program_name, sizeof(out->name) - 1); + out->version = meta->npdm->version; + out->program_id = meta->aci->program_id.value; + out->reslimit = resource_limit; + + /* Set flags. */ + R_TRY(GetCreateProcessFlags(std::addressof(out->flags), meta, flags)); + + /* 3.0.0+ System Resource Size. */ + if (hos::GetVersion() >= hos::Version_3_0_0) { + /* Validate size is aligned. */ + R_UNLESS(util::IsAligned(meta->npdm->system_resource_size, os::MemoryBlockUnitSize), ldr::ResultInvalidSize()); + + /* Validate system resource usage. */ + if (meta->npdm->system_resource_size) { + /* Process must be 64-bit. */ + R_UNLESS((out->flags & svc::CreateProcessFlag_AddressSpace64Bit), ldr::ResultInvalidMeta()); + + /* Process must be application or applet. */ + R_UNLESS(IsApplication(meta) || IsApplet(meta), ldr::ResultInvalidMeta()); + + /* Size must be less than or equal to max. */ + R_UNLESS(meta->npdm->system_resource_size <= SystemResourceSizeMax, ldr::ResultInvalidMeta()); + } + out->system_resource_num_pages = meta->npdm->system_resource_size >> 12; + } + + return ResultSuccess(); + } + + ALWAYS_INLINE u64 GetCurrentProcessInfo(svc::InfoType info_type) { + u64 value; + R_ABORT_UNLESS(svc::GetInfo(std::addressof(value), info_type, svc::PseudoHandle::CurrentProcess, 0)); + return value; + } + + Result SearchFreeRegion(uintptr_t *out, size_t mapping_size) { + /* Get address space extents. */ + const uintptr_t heap_start = GetCurrentProcessInfo(svc::InfoType_HeapRegionAddress); + const size_t heap_size = GetCurrentProcessInfo(svc::InfoType_HeapRegionSize); + const uintptr_t alias_start = GetCurrentProcessInfo(svc::InfoType_AliasRegionAddress); + const size_t alias_size = GetCurrentProcessInfo(svc::InfoType_AliasRegionSize); + const uintptr_t aslr_start = GetCurrentProcessInfo(svc::InfoType_AslrRegionAddress); + const size_t aslr_size = GetCurrentProcessInfo(svc::InfoType_AslrRegionSize); + + /* Iterate upwards to find a free region. */ + uintptr_t address = aslr_start; + while (true) { + /* Declare variables for memory querying. */ + svc::MemoryInfo mem_info; + svc::PageInfo page_info; + + /* Check that we're still within bounds. */ + R_UNLESS(address < address + mapping_size, svc::ResultOutOfMemory()); + + /* If we're within the heap region, skip to the end of the heap region. */ + if (heap_size != 0 && !(address + mapping_size - 1 < heap_start || heap_start + heap_size - 1 < address)) { + R_UNLESS(address < heap_start + heap_size, svc::ResultOutOfMemory()); + address = heap_start + heap_size; + continue; + } + + /* If we're within the alias region, skip to the end of the alias region. */ + if (alias_size != 0 && !(address + mapping_size - 1 < alias_start || alias_start + alias_size - 1 < address)) { + R_UNLESS(address < alias_start + alias_size, svc::ResultOutOfMemory()); + address = alias_start + alias_size; + continue; + } + + /* Get the current memory range. */ + R_ABORT_UNLESS(svc::QueryMemory(std::addressof(mem_info), std::addressof(page_info), address)); + + /* If the memory range is free and big enough, use it. */ + if (mem_info.state == svc::MemoryState_Free && mapping_size <= ((mem_info.base_address + mem_info.size) - address)) { + *out = address; + return ResultSuccess(); + } + + /* Check that we can advance. */ + R_UNLESS(address < mem_info.base_address + mem_info.size, svc::ResultOutOfMemory()); + R_UNLESS(mem_info.base_address + mem_info.size - 1 < aslr_start + aslr_size - 1, svc::ResultOutOfMemory()); + + /* Advance. */ + address = mem_info.base_address + mem_info.size; + } + } + + Result DecideAddressSpaceLayout(ProcessInfo *out, svc::CreateProcessParameter *out_param, const NsoHeader *nso_headers, const bool *has_nso, const ArgumentStore::Entry *argument) { + /* Clear output. */ + out->args_address = 0; + out->args_size = 0; + std::memset(out->nso_address, 0, sizeof(out->nso_address)); + std::memset(out->nso_size, 0, sizeof(out->nso_size)); + + size_t total_size = 0; + bool argument_allocated = false; + + /* Calculate base offsets. */ + for (size_t i = 0; i < Nso_Count; i++) { + if (has_nso[i]) { + out->nso_address[i] = total_size; + const size_t text_end = nso_headers[i].text_dst_offset + nso_headers[i].text_size; + const size_t ro_end = nso_headers[i].ro_dst_offset + nso_headers[i].ro_size; + const size_t rw_end = nso_headers[i].rw_dst_offset + nso_headers[i].rw_size + nso_headers[i].bss_size; + out->nso_size[i] = text_end; + out->nso_size[i] = std::max(out->nso_size[i], ro_end); + out->nso_size[i] = std::max(out->nso_size[i], rw_end); + out->nso_size[i] = util::AlignUp(out->nso_size[i], os::MemoryPageSize); + + total_size += out->nso_size[i]; + + if (!argument_allocated && argument != nullptr) { + out->args_address = total_size; + out->args_size = util::AlignUp(2 * sizeof(u32) + argument->argument_size * 2 + ArgumentStore::ArgumentBufferSize, os::MemoryPageSize); + total_size += out->args_size; + argument_allocated = true; + } + } + } + + /* Calculate ASLR. */ + uintptr_t aslr_start = 0; + size_t aslr_size = 0; + if (hos::GetVersion() >= hos::Version_2_0_0) { + switch (out_param->flags & svc::CreateProcessFlag_AddressSpaceMask) { + case svc::CreateProcessFlag_AddressSpace32Bit: + case svc::CreateProcessFlag_AddressSpace32BitWithoutAlias: + aslr_start = svc::AddressSmallMap32Start; + aslr_size = svc::AddressSmallMap32Size; + break; + case svc::CreateProcessFlag_AddressSpace64BitDeprecated: + aslr_start = svc::AddressSmallMap36Start; + aslr_size = svc::AddressSmallMap36Size; + break; + case svc::CreateProcessFlag_AddressSpace64Bit: + aslr_start = svc::AddressMap39Start; + aslr_size = svc::AddressMap39Size; + break; + AMS_UNREACHABLE_DEFAULT_CASE(); + } + } else { + /* On 1.0.0, only 2 address space types existed. */ + if (out_param->flags & svc::CreateProcessFlag_AddressSpace64BitDeprecated) { + aslr_start = svc::AddressSmallMap36Start; + aslr_size = svc::AddressSmallMap36Size; + } else { + aslr_start = svc::AddressSmallMap32Start; + aslr_size = svc::AddressSmallMap32Size; + } + } + R_UNLESS(total_size <= aslr_size, svc::ResultOutOfMemory()); + + /* Set Create Process output. */ + uintptr_t aslr_slide = 0; + size_t free_size = (aslr_size - total_size); + if (out_param->flags & svc::CreateProcessFlag_EnableAslr) { + /* Nintendo uses MT19937 (not os::GenerateRandomBytes), but we'll just use TinyMT for now. */ + aslr_slide = os::GenerateRandomU64(free_size / os::MemoryBlockUnitSize) * os::MemoryBlockUnitSize; + } + + /* Set out. */ + aslr_start += aslr_slide; + for (size_t i = 0; i < Nso_Count; i++) { + if (has_nso[i]) { + out->nso_address[i] += aslr_start; + } + } + if (out->args_address) { + out->args_address += aslr_start; + } + + out_param->code_address = aslr_start; + out_param->code_num_pages = total_size >> 12; + + return ResultSuccess(); + } + + Result CreateProcessImpl(ProcessInfo *out, const Meta *meta, const NsoHeader *nso_headers, const bool *has_nso, const ArgumentStore::Entry *argument, u32 flags, os::NativeHandle resource_limit) { + /* Get CreateProcessParameter. */ + svc::CreateProcessParameter param; + R_TRY(GetCreateProcessParameter(std::addressof(param), meta, flags, resource_limit)); + + /* Decide on an NSO layout. */ + R_TRY(DecideAddressSpaceLayout(out, std::addressof(param), nso_headers, has_nso, argument)); + + /* Actually create process. */ + svc::Handle process_handle; + R_TRY(svc::CreateProcess(std::addressof(process_handle), std::addressof(param), static_cast(meta->aci_kac), meta->aci->kac_size / sizeof(u32))); + + /* Set the output handle. */ + out->process_handle = process_handle; + + return ResultSuccess(); + } + + Result LoadNsoSegment(fs::FileHandle file, const NsoHeader::SegmentInfo *segment, size_t file_size, const u8 *file_hash, bool is_compressed, bool check_hash, uintptr_t map_base, uintptr_t map_end) { + /* Select read size based on compression. */ + if (!is_compressed) { + file_size = segment->size; + } + + /* Validate size. */ + R_UNLESS(file_size <= segment->size, ldr::ResultInvalidNso()); + R_UNLESS(segment->size <= std::numeric_limits::max(), ldr::ResultInvalidNso()); + + /* Load data from file. */ + uintptr_t load_address = is_compressed ? map_end - file_size : map_base; + size_t read_size; + R_TRY(fs::ReadFile(std::addressof(read_size), file, segment->file_offset, reinterpret_cast(load_address), file_size)); + R_UNLESS(read_size == file_size, ldr::ResultInvalidNso()); + + /* Uncompress if necessary. */ + if (is_compressed) { + bool decompressed = (util::DecompressLZ4(reinterpret_cast(map_base), segment->size, reinterpret_cast(load_address), file_size) == static_cast(segment->size)); + R_UNLESS(decompressed, ldr::ResultInvalidNso()); + } + + /* Check hash if necessary. */ + if (check_hash) { + u8 hash[crypto::Sha256Generator::HashSize]; + crypto::GenerateSha256Hash(hash, sizeof(hash), reinterpret_cast(map_base), segment->size); + + R_UNLESS(std::memcmp(hash, file_hash, sizeof(hash)) == 0, ldr::ResultInvalidNso()); + } + + return ResultSuccess(); + } + + Result LoadAutoLoadModule(os::NativeHandle process_handle, fs::FileHandle file, uintptr_t map_address, const NsoHeader *nso_header, uintptr_t nso_address, size_t nso_size) { + /* Map and read data from file. */ + { + AutoCloseMap map(map_address, process_handle, nso_address, nso_size); + R_TRY(map.GetResult()); + + /* Load NSO segments. */ + R_TRY(LoadNsoSegment(file, std::addressof(nso_header->segments[NsoHeader::Segment_Text]), nso_header->text_compressed_size, nso_header->text_hash, (nso_header->flags & NsoHeader::Flag_CompressedText) != 0, + (nso_header->flags & NsoHeader::Flag_CheckHashText) != 0, map_address + nso_header->text_dst_offset, map_address + nso_size)); + R_TRY(LoadNsoSegment(file, std::addressof(nso_header->segments[NsoHeader::Segment_Ro]), nso_header->ro_compressed_size, nso_header->ro_hash, (nso_header->flags & NsoHeader::Flag_CompressedRo) != 0, + (nso_header->flags & NsoHeader::Flag_CheckHashRo) != 0, map_address + nso_header->ro_dst_offset, map_address + nso_size)); + R_TRY(LoadNsoSegment(file, std::addressof(nso_header->segments[NsoHeader::Segment_Rw]), nso_header->rw_compressed_size, nso_header->rw_hash, (nso_header->flags & NsoHeader::Flag_CompressedRw) != 0, + (nso_header->flags & NsoHeader::Flag_CheckHashRw) != 0, map_address + nso_header->rw_dst_offset, map_address + nso_size)); + + /* Clear unused space to zero. */ + const size_t text_end = nso_header->text_dst_offset + nso_header->text_size; + const size_t ro_end = nso_header->ro_dst_offset + nso_header->ro_size; + const size_t rw_end = nso_header->rw_dst_offset + nso_header->rw_size; + std::memset(reinterpret_cast(map_address), 0, nso_header->text_dst_offset); + std::memset(reinterpret_cast(map_address + text_end), 0, nso_header->ro_dst_offset - text_end); + std::memset(reinterpret_cast(map_address + ro_end), 0, nso_header->rw_dst_offset - ro_end); + std::memset(reinterpret_cast(map_address + rw_end), 0, nso_header->bss_size); + + /* Apply embedded patches. */ + ApplyEmbeddedPatchesToModule(nso_header->module_id, map_address, nso_size); + + /* Apply IPS patches. */ + LocateAndApplyIpsPatchesToModule(nso_header->module_id, map_address, nso_size); + + /* Apply pcv and ptm patches. */ + if (g_is_pcv) { + pcv::ApplyAutoPcvPatch(map_address, nso_size); + } + if (g_is_ptm) { + ptm::ApplyAutoPtmPatch(map_address, nso_size); + } + } + + /* Set permissions. */ + const size_t text_size = util::AlignUp(nso_header->text_size, os::MemoryPageSize); + const size_t ro_size = util::AlignUp(nso_header->ro_size, os::MemoryPageSize); + const size_t rw_size = util::AlignUp(nso_header->rw_size + nso_header->bss_size, os::MemoryPageSize); + if (text_size) { + R_TRY(svc::SetProcessMemoryPermission(process_handle, nso_address + nso_header->text_dst_offset, text_size, svc::MemoryPermission_ReadExecute)); + } + if (ro_size) { + R_TRY(svc::SetProcessMemoryPermission(process_handle, nso_address + nso_header->ro_dst_offset, ro_size, svc::MemoryPermission_Read)); + } + if (rw_size) { + R_TRY(svc::SetProcessMemoryPermission(process_handle, nso_address + nso_header->rw_dst_offset, rw_size, svc::MemoryPermission_ReadWrite)); + } + + return ResultSuccess(); + } + + Result LoadAutoLoadModules(const ProcessInfo *process_info, const NsoHeader *nso_headers, const bool *has_nso, const ArgumentStore::Entry *argument) { + /* Load each NSO. */ + for (size_t i = 0; i < Nso_Count; i++) { + if (has_nso[i]) { + fs::FileHandle file; + R_TRY(fs::OpenFile(std::addressof(file), GetNsoPath(i), fs::OpenMode_Read)); + ON_SCOPE_EXIT { fs::CloseFile(file); }; + + uintptr_t map_address; + R_TRY(SearchFreeRegion(std::addressof(map_address), process_info->nso_size[i])); + + R_TRY(LoadAutoLoadModule(process_info->process_handle, file, map_address, nso_headers + i, process_info->nso_address[i], process_info->nso_size[i])); + } + } + + /* Load arguments, if present. */ + if (argument != nullptr) { + /* Write argument data into memory. */ + { + uintptr_t map_address; + R_TRY(SearchFreeRegion(std::addressof(map_address), process_info->args_size)); + + AutoCloseMap map(map_address, process_info->process_handle, process_info->args_address, process_info->args_size); + R_TRY(map.GetResult()); + + ProgramArguments *args = reinterpret_cast(map_address); + std::memset(args, 0, sizeof(*args)); + args->allocated_size = process_info->args_size; + args->arguments_size = argument->argument_size; + std::memcpy(args->arguments, argument->argument, argument->argument_size); + } + + /* Set argument region permissions. */ + R_TRY(svc::SetProcessMemoryPermission(process_info->process_handle, process_info->args_address, process_info->args_size, svc::MemoryPermission_ReadWrite)); + } + + return ResultSuccess(); + } + + } + + /* Process Creation API. */ + Result CreateProcess(os::NativeHandle *out, PinId pin_id, const ncm::ProgramLocation &loc, const cfg::OverrideStatus &override_status, const char *path, const ArgumentStore::Entry *argument, u32 flags, os::NativeHandle resource_limit) { + /* Mount code. */ + AMS_UNUSED(path); + ScopedCodeMount mount(loc, override_status); + R_TRY(mount.GetResult()); + + /* Load meta, possibly from cache. */ + Meta meta; + R_TRY(LoadMetaFromCache(std::addressof(meta), loc, override_status)); + + /* Validate meta. */ + R_TRY(ValidateMeta(std::addressof(meta), loc, mount.GetCodeVerificationData())); + + /* Load, validate NSOs. */ + R_TRY(LoadNsoHeaders(g_nso_headers, g_has_nso)); + R_TRY(ValidateNsoHeaders(g_nso_headers, g_has_nso)); + + /* Actually create process. */ + ProcessInfo info; + R_TRY(CreateProcessImpl(std::addressof(info), std::addressof(meta), g_nso_headers, g_has_nso, argument, flags, resource_limit)); + + /* Load NSOs into process memory. */ + { + /* Ensure we close the process handle, if we fail. */ + auto process_guard = SCOPE_GUARD { os::CloseNativeHandle(info.process_handle); }; + + /* Load all NSOs. */ + R_TRY(LoadAutoLoadModules(std::addressof(info), g_nso_headers, g_has_nso, argument)); + + /* We don't need to close the process handle, since we succeeded. */ + process_guard.Cancel(); + } + + /* Register NSOs with the RoManager. */ + { + /* Nintendo doesn't validate this get, but we do. */ + os::ProcessId process_id = os::GetProcessId(info.process_handle); + + /* Register new process. */ + const auto as_type = GetAddressSpaceType(std::addressof(meta)); + RoManager::GetInstance().RegisterProcess(pin_id, process_id, meta.aci->program_id, as_type == Npdm::AddressSpaceType_64Bit || as_type == Npdm::AddressSpaceType_64BitDeprecated); + + /* Register all NSOs. */ + for (size_t i = 0; i < Nso_Count; i++) { + if (g_has_nso[i]) { + RoManager::GetInstance().AddNso(pin_id, g_nso_headers[i].module_id, info.nso_address[i], info.nso_size[i]); + } + } + } + + /* If we're overriding for HBL, perform HTML document redirection. */ + if (override_status.IsHbl()) { + /* Don't validate result, failure is okay. */ + RedirectHtmlDocumentPathForHbl(loc); + } + + /* Clear the external code for the program. */ + fssystem::DestroyExternalCode(loc.program_id); + + /* Note that we've created the program. */ + SetLaunchedBootProgram(loc.program_id); + + /* Move the process handle to output. */ + *out = info.process_handle; + + return ResultSuccess(); + } + + Result GetProgramInfo(ProgramInfo *out, cfg::OverrideStatus *out_status, const ncm::ProgramLocation &loc, const char *path) { + Meta meta; + + /* Load Meta. */ + { + AMS_UNUSED(path); + + ScopedCodeMount mount(loc); + R_TRY(mount.GetResult()); + R_TRY(LoadMeta(std::addressof(meta), loc, mount.GetOverrideStatus())); + if (out_status != nullptr) { + *out_status = mount.GetOverrideStatus(); + } + } + + return GetProgramInfoFromMeta(out, std::addressof(meta)); + } + + Result PinProgram(PinId *out_id, const ncm::ProgramLocation &loc, const cfg::OverrideStatus &override_status) { + R_UNLESS(RoManager::GetInstance().Allocate(out_id, loc, override_status), ldr::ResultMaxProcess()); + return ResultSuccess(); + } + + Result UnpinProgram(PinId id) { + R_UNLESS(RoManager::GetInstance().Free(id), ldr::ResultNotPinned()); + return ResultSuccess(); + } + + Result GetProcessModuleInfo(u32 *out_count, ldr::ModuleInfo *out, size_t max_out_count, os::ProcessId process_id) { + R_UNLESS(RoManager::GetInstance().GetProcessModuleInfo(out_count, out, max_out_count, process_id), ldr::ResultNotPinned()); + return ResultSuccess(); + } + + Result GetProgramLocationAndOverrideStatusFromPinId(ncm::ProgramLocation *out, cfg::OverrideStatus *out_status, PinId pin_id) { + R_UNLESS(RoManager::GetInstance().GetProgramLocationAndStatus(out, out_status, pin_id), ldr::ResultNotPinned()); + return ResultSuccess(); + } + +}