More precise timing params (accounting for 8Gb density)

This commit is contained in:
KazushiM
2021-12-24 00:29:42 +08:00
parent cf6ef64d99
commit f2215a25ed
2 changed files with 472 additions and 453 deletions

View File

@@ -264,71 +264,7 @@ namespace pcv {
#include "mtc_timing_table.hpp"
#if 0
#define ADJUST_PROP(TARGET, REF) (REF + ((GetEmcClock()-1331200)*(TARGET-REF))/(1600000-1331200))
#define ADJUST_PARAM_ROUND2_ALL_REG(TARGET_TABLE, REF_TABLE, PARAM) \
TARGET_TABLE->burst_regs.PARAM = \
((ADJUST_PROP(TARGET_TABLE->burst_regs.PARAM, REF_TABLE->burst_regs.PARAM) + 1) >> 1) << 1; \
TARGET_TABLE->shadow_regs_ca_train.PARAM = \
((ADJUST_PROP(TARGET_TABLE->shadow_regs_ca_train.PARAM, REF_TABLE->shadow_regs_ca_train.PARAM) + 1) >> 1) << 1; \
TARGET_TABLE->shadow_regs_rdwr_train.PARAM = \
((ADJUST_PROP(TARGET_TABLE->shadow_regs_rdwr_train.PARAM, REF_TABLE->shadow_regs_rdwr_train.PARAM) + 1) >> 1) << 1;
#define ADJUST_PARAM(TARGET_PARAM, REF_PARAM) \
TARGET_PARAM = ADJUST_PROP(TARGET_PARAM, REF_PARAM);
#define ADJUST_PARAM_TABLE(TARGET_TABLE, REF_TABLE, PARAM) \
ADJUST_PARAM(TARGET_TABLE->PARAM, REF_TABLE->PARAM)
#define ADJUST_PARAM_ALL_REG(TARGET_TABLE, REF_TABLE, PARAM) \
ADJUST_PARAM_TABLE(TARGET_TABLE, REF_TABLE, burst_regs.PARAM) \
ADJUST_PARAM_TABLE(TARGET_TABLE, REF_TABLE, shadow_regs_ca_train.PARAM) \
ADJUST_PARAM_TABLE(TARGET_TABLE, REF_TABLE, shadow_regs_rdwr_train.PARAM)
#define TRIM_BIT(IN_BITS, HIGH, LOW) \
((IN_BITS >> LOW) & ( (1u << (HIGH - LOW + 1u)) - 1u ))
#define ADJUST_BIT(TARGET_PARAM, REF_PARAM, HIGH, LOW) \
ADJUST_PROP(TRIM_BIT(TARGET_PARAM, HIGH, LOW), TRIM_BIT(REF_PARAM, HIGH, LOW))
#define CLEAR_BIT(BITS, HIGH, LOW) \
BITS = BITS & ~( ((1u << HIGH) << 1u) - (1u << LOW) );
#define ADJUST_BIT_ALL_REG_SINGLE_OP(TARGET_TABLE, REF_TABLE, PARAM, HIGH, LOW, OPERATION) \
TARGET_TABLE->burst_regs.PARAM = \
(ADJUST_BIT(TARGET_TABLE->burst_regs.PARAM, REF_TABLE->burst_regs.PARAM, HIGH, LOW) << LOW) OPERATION; \
TARGET_TABLE->shadow_regs_ca_train.PARAM = \
(ADJUST_BIT(TARGET_TABLE->shadow_regs_ca_train.PARAM, REF_TABLE->shadow_regs_ca_train.PARAM, HIGH, LOW)) << LOW OPERATION; \
TARGET_TABLE->shadow_regs_rdwr_train.PARAM = \
(ADJUST_BIT(TARGET_TABLE->shadow_regs_rdwr_train.PARAM, REF_TABLE->shadow_regs_rdwr_train.PARAM, HIGH, LOW)) << LOW OPERATION;
#define ADJUST_BIT_ALL_REG_PAIR(TARGET_TABLE, REF_TABLE, PARAM, HIGH1, LOW1, HIGH2, LOW2) \
TARGET_TABLE->burst_regs.PARAM = \
ADJUST_BIT(TARGET_TABLE->burst_regs.PARAM, REF_TABLE->burst_regs.PARAM, HIGH1, LOW1) << LOW1 \
| ADJUST_BIT(TARGET_TABLE->burst_regs.PARAM, REF_TABLE->burst_regs.PARAM, HIGH2, LOW2) << LOW2; \
TARGET_TABLE->shadow_regs_ca_train.PARAM = \
ADJUST_BIT(TARGET_TABLE->shadow_regs_ca_train.PARAM, REF_TABLE->shadow_regs_ca_train.PARAM, HIGH1, LOW1) << LOW1 \
| ADJUST_BIT(TARGET_TABLE->shadow_regs_ca_train.PARAM, REF_TABLE->shadow_regs_ca_train.PARAM, HIGH2, LOW2) << LOW2; \
TARGET_TABLE->shadow_regs_rdwr_train.PARAM = \
ADJUST_BIT(TARGET_TABLE->shadow_regs_rdwr_train.PARAM, REF_TABLE->shadow_regs_rdwr_train.PARAM, HIGH1, LOW1) << LOW1 \
| ADJUST_BIT(TARGET_TABLE->shadow_regs_rdwr_train.PARAM, REF_TABLE->shadow_regs_rdwr_train.PARAM, HIGH2, LOW2) << LOW2;
/* For latency allowance */
#define ADJUST_INVERSE(TARGET) ((TARGET*1000) / (GetEmcClock()/1600))
#endif
#define ADJUST_PARAM(PARAM) PARAM = GetEmcClock()*PARAM/1600000;
#define ADJUST_PARAM_TABLE(TABLE, PARAM) ADJUST_PARAM(TABLE->PARAM)
#define ADJUST_PARAM_ALL_REG(TABLE, PARAM) \
ADJUST_PARAM_TABLE(TABLE, burst_regs.PARAM) \
ADJUST_PARAM_TABLE(TABLE, shadow_regs_ca_train.PARAM) \
ADJUST_PARAM_TABLE(TABLE, shadow_regs_rdwr_train.PARAM)
void AdjustMtcTable(MarikoMtcTable* table)
void AdjustMtcTable(MarikoMtcTable* table, MarikoMtcTable* ref)
{
/* Official Tegra X1 TRM, sign up for nvidia developer program (free) to download:
* https://developer.nvidia.com/embedded/dlc/tegra-x1-technical-reference-manual
@@ -347,409 +283,506 @@ namespace pcv {
* you'd better calculate timings yourself rather than relying on following algorithm.
*/
ADJUST_PARAM_ALL_REG(table, emc_rc);
ADJUST_PARAM_ALL_REG(table, emc_rfc);
ADJUST_PARAM_ALL_REG(table, emc_rfcpb);
ADJUST_PARAM_ALL_REG(table, emc_ras);
ADJUST_PARAM_ALL_REG(table, emc_rp);
ADJUST_PARAM_ALL_REG(table, emc_r2w);
ADJUST_PARAM_ALL_REG(table, emc_w2r);
ADJUST_PARAM_ALL_REG(table, emc_r2p);
ADJUST_PARAM_ALL_REG(table, emc_w2p);
ADJUST_PARAM_ALL_REG(table, emc_trtm);
ADJUST_PARAM_ALL_REG(table, emc_twtm);
ADJUST_PARAM_ALL_REG(table, emc_tratm);
ADJUST_PARAM_ALL_REG(table, emc_twatm);
ADJUST_PARAM_ALL_REG(table, emc_rd_rcd);
ADJUST_PARAM_ALL_REG(table, emc_wr_rcd);
ADJUST_PARAM_ALL_REG(table, emc_rrd);
#define ADJUST_PARAM(TARGET, REF) TARGET = std::ceil(REF + ((GetEmcClock()-1331200)*(TARGET-REF))/(1600000-1331200));
ADJUST_PARAM_ALL_REG(table, emc_refresh);
ADJUST_PARAM_ALL_REG(table, emc_pre_refresh_req_cnt);
#define ADJUST_PARAM_TABLE(TABLE, PARAM, REF) ADJUST_PARAM(TABLE->PARAM, REF->PARAM)
ADJUST_PARAM_ALL_REG(table, emc_pdex2wr);
ADJUST_PARAM_ALL_REG(table, emc_pdex2rd);
ADJUST_PARAM_ALL_REG(table, emc_act2pden);
ADJUST_PARAM_ALL_REG(table, emc_rw2pden);
ADJUST_PARAM_ALL_REG(table, emc_cke2pden);
ADJUST_PARAM_ALL_REG(table, emc_pdex2mrr);
#define ADJUST_PARAM_ALL_REG(TABLE, PARAM, REF) \
ADJUST_PARAM_TABLE(TABLE, burst_regs.PARAM, REF) \
ADJUST_PARAM_TABLE(TABLE, shadow_regs_ca_train.PARAM, REF) \
ADJUST_PARAM_TABLE(TABLE, shadow_regs_rdwr_train.PARAM, REF)
ADJUST_PARAM_ALL_REG(table, emc_txsr);
ADJUST_PARAM_ALL_REG(table, emc_txsrdll);
ADJUST_PARAM_ALL_REG(table, emc_tcke);
ADJUST_PARAM_ALL_REG(table, emc_tckesr);
ADJUST_PARAM_ALL_REG(table, emc_tpd);
ADJUST_PARAM_ALL_REG(table, emc_tfaw);
ADJUST_PARAM_ALL_REG(table, emc_trpab);
ADJUST_PARAM_ALL_REG(table, emc_tclkstop);
ADJUST_PARAM_ALL_REG(table, emc_trefbw);
#define WRITE_PARAM_ALL_REG(TABLE, PARAM, VALUE)\
TABLE->burst_regs.PARAM = VALUE; \
TABLE->shadow_regs_ca_train.PARAM = VALUE; \
TABLE->shadow_regs_rdwr_train.PARAM = VALUE;
ADJUST_PARAM_ALL_REG(table, emc_pmacro_dll_cfg_2);
// tCK_avg (average clock period) in ns (10E-3 ns)
const double tCK_avg = GetEmcClock() == 2131200 ? 0.468 : 1000'000. / GetEmcClock();
// tRPpb (row precharge time per bank) in ns
const u32 tRPpb = 18;
// tRPab (row precharge time all banks) in ns
const u32 tRPab = 21;
// tRAS (row active time) in ns
const u32 tRAS = 42;
// tRC (ACTIVATE-ACTIVATE command period same bank) in ns
const u32 tRC = tRPpb + tRAS;
// tRFCab (refresh cycle time all banks) in ns for 8Gb density
const u32 tRFCab = 280;
// tRFCpb (refresh cycle time per bank) in ns for 8Gb density
const u32 tRFCpb = 140;
// tRCD (RAS-CAS delay) in ns
const u32 tRCD = 18;
// tRRD (Active bank-A to Active bank-B) in ns
const double tRRD = GetEmcClock() == 2131200 ? 7.5 : 10.;
// tREFpb (average refresh interval per bank) in ns for 8Gb density
const u32 tREFpb = 488;
// tREFab (average refresh interval all 8 banks) in ns for 8Gb density
// const u32 tREFab = tREFpb * 8;
// #_of_rows per die for 8Gb density
const u32 numOfRows = 65536;
// {REFRESH, REFRESH_LO} = max[(tREF/#_of_rows) / (emc_clk_period) - 64, (tREF/#_of_rows) / (emc_clk_period) * 97%]
// emc_clk_period = dram_clk / 2;
// 1600 MHz: 5894, but N' set to 6176 (~4.8% margin)
const u32 REFRESH = std::ceil((double(tREFpb) * GetEmcClock() / numOfRows * (1.048) / 2 - 64)) / 4 * 4;
// tPDEX2WR, tPDEX2RD (timing delay from exiting powerdown mode to a write/read command) in ns
const u32 tPDEX2 = 10;
// [Guessed] tACT2PDEN (timing delay from an activate, MRS or EMRS command to power-down entry) in ns
const u32 tACT2PDEN = 14;
// [Guessed] tPDEX2MRR (timing delay from exiting powerdown mode to MRR command) in ns
const double tPDEX2MRR = 28.75;
// [Guessed] tCKE2PDEN (timing delay from turning off CKE to power-down entry) in ns
const double tCKE2PDEN = 8.5;
// tXSR (SELF REFRESH exit to next valid command delay) in ns
const double tXSR = tRFCab + 7.5;
// tCKE (minimum CKE high pulse width) in ns
const u32 tCKE = 8;
// tCKELPD (minimum CKE low pulse width in SELF REFRESH) in ns
const u32 tCKELPD = 15;
// [Guessed] tPD (minimum CKE low pulse width in power-down mode) in ns
const double tPD = 7.5;
// tFAW (Four-bank Activate Window) in ns
const u32 tFAW = GetEmcClock() == 2131200 ? 30 : 40;
ADJUST_PARAM_TABLE(table, dram_timings.rl);
#define GET_CYCLE_CEIL(PARAM) std::ceil(double(PARAM) / tCK_avg)
ADJUST_PARAM_TABLE(table, burst_mc_regs.mc_emem_arb_timing_rcd);
ADJUST_PARAM_TABLE(table, burst_mc_regs.mc_emem_arb_timing_rp);
ADJUST_PARAM_TABLE(table, burst_mc_regs.mc_emem_arb_timing_rc);
ADJUST_PARAM_TABLE(table, burst_mc_regs.mc_emem_arb_timing_ras);
ADJUST_PARAM_TABLE(table, burst_mc_regs.mc_emem_arb_timing_faw);
ADJUST_PARAM_TABLE(table, burst_mc_regs.mc_emem_arb_timing_wap2pre);
ADJUST_PARAM_TABLE(table, burst_mc_regs.mc_emem_arb_timing_r2w);
ADJUST_PARAM_TABLE(table, burst_mc_regs.mc_emem_arb_timing_w2r);
ADJUST_PARAM_TABLE(table, burst_mc_regs.mc_emem_arb_timing_rfcpb);
WRITE_PARAM_ALL_REG(table, emc_rc, GET_CYCLE_CEIL(tRC));
WRITE_PARAM_ALL_REG(table, emc_rfc, GET_CYCLE_CEIL(tRFCab));
WRITE_PARAM_ALL_REG(table, emc_rfcpb, GET_CYCLE_CEIL(tRFCpb));
WRITE_PARAM_ALL_REG(table, emc_ras, GET_CYCLE_CEIL(tRAS));
WRITE_PARAM_ALL_REG(table, emc_rp, GET_CYCLE_CEIL(tRPpb));
ADJUST_PARAM_ALL_REG(table, emc_r2w, ref);
ADJUST_PARAM_ALL_REG(table, emc_w2r, ref);
ADJUST_PARAM_ALL_REG(table, emc_r2p, ref);
ADJUST_PARAM_ALL_REG(table, emc_w2p, ref);
ADJUST_PARAM_ALL_REG(table, emc_trtm, ref);
ADJUST_PARAM_ALL_REG(table, emc_twtm, ref);
ADJUST_PARAM_ALL_REG(table, emc_tratm, ref);
ADJUST_PARAM_ALL_REG(table, emc_twatm, ref);
WRITE_PARAM_ALL_REG(table, emc_rd_rcd, GET_CYCLE_CEIL(tRCD));
WRITE_PARAM_ALL_REG(table, emc_wr_rcd, GET_CYCLE_CEIL(tRCD));
WRITE_PARAM_ALL_REG(table, emc_rrd, GET_CYCLE_CEIL(tRRD));
WRITE_PARAM_ALL_REG(table, emc_refresh, REFRESH);
WRITE_PARAM_ALL_REG(table, emc_pre_refresh_req_cnt, REFRESH / 4);
WRITE_PARAM_ALL_REG(table, emc_pdex2wr, GET_CYCLE_CEIL(tPDEX2));
WRITE_PARAM_ALL_REG(table, emc_pdex2rd, GET_CYCLE_CEIL(tPDEX2));
WRITE_PARAM_ALL_REG(table, emc_act2pden,GET_CYCLE_CEIL(tACT2PDEN));
ADJUST_PARAM_ALL_REG(table, emc_rw2pden, ref);
WRITE_PARAM_ALL_REG(table, emc_cke2pden,GET_CYCLE_CEIL(tCKE2PDEN));
WRITE_PARAM_ALL_REG(table, emc_pdex2mrr,GET_CYCLE_CEIL(tPDEX2MRR));
WRITE_PARAM_ALL_REG(table, emc_txsr, GET_CYCLE_CEIL(tXSR));
WRITE_PARAM_ALL_REG(table, emc_txsrdll, GET_CYCLE_CEIL(tXSR));
WRITE_PARAM_ALL_REG(table, emc_tcke, GET_CYCLE_CEIL(tCKE));
WRITE_PARAM_ALL_REG(table, emc_tckesr, GET_CYCLE_CEIL(tCKELPD));
WRITE_PARAM_ALL_REG(table, emc_tpd, GET_CYCLE_CEIL(tPD));
WRITE_PARAM_ALL_REG(table, emc_tfaw, GET_CYCLE_CEIL(tFAW));
WRITE_PARAM_ALL_REG(table, emc_trpab, GET_CYCLE_CEIL(tRPab));
ADJUST_PARAM_ALL_REG(table, emc_tclkstop, ref);
WRITE_PARAM_ALL_REG(table, emc_trefbw, REFRESH + 64);
ADJUST_PARAM_TABLE(table, la_scale_regs.mc_mll_mpcorer_ptsa_rate);
ADJUST_PARAM_TABLE(table, la_scale_regs.mc_ptsa_grant_decrement);
ADJUST_PARAM_ALL_REG(table, emc_pmacro_dll_cfg_2, ref); // EMC_DLL_CFG_2_0: level select for VDDA?
ADJUST_PARAM_TABLE(table, min_mrs_wait);
ADJUST_PARAM_TABLE(table, latency);
}
// ADJUST_PARAM_TABLE(table, dram_timings.rl); // not used on Mariko
#if 0
void AdjustMtcTable(MarikoMtcTable* target_table, MarikoMtcTable* ref_table)
{
/* Official Tegra X1 TRM, sign up for nvidia developer program (free) to download: */
/* https://developer.nvidia.com/embedded/dlc/tegra-x1-technical-reference-manual */
/* Section 18.11: MC Registers */
constexpr u32 DIV = 4; // ?
table->burst_mc_regs.mc_emem_arb_timing_rcd = std::ceil(GET_CYCLE_CEIL(tRCD) / DIV - 2);
table->burst_mc_regs.mc_emem_arb_timing_rp = std::ceil(GET_CYCLE_CEIL(tRPpb) / DIV - 1);
table->burst_mc_regs.mc_emem_arb_timing_rc = std::ceil(std::max(GET_CYCLE_CEIL(tRC), GET_CYCLE_CEIL(tRAS)+GET_CYCLE_CEIL(tRPpb))/ DIV);
table->burst_mc_regs.mc_emem_arb_timing_ras = std::ceil(GET_CYCLE_CEIL(tRAS) / DIV - 2);
table->burst_mc_regs.mc_emem_arb_timing_faw = std::ceil(GET_CYCLE_CEIL(tFAW) / DIV - 1);
table->burst_mc_regs.mc_emem_arb_timing_rrd = std::ceil(GET_CYCLE_CEIL(tRRD) / DIV - 1);
table->burst_mc_regs.mc_emem_arb_timing_rap2pre = std::ceil(table->burst_regs.emc_r2p / DIV);
table->burst_mc_regs.mc_emem_arb_timing_wap2pre = std::ceil(table->burst_regs.emc_w2p / DIV);
table->burst_mc_regs.mc_emem_arb_timing_r2w = std::ceil(table->burst_regs.emc_r2w / DIV + 1);
table->burst_mc_regs.mc_emem_arb_timing_w2r = std::ceil(table->burst_regs.emc_w2r / DIV + 1);
table->burst_mc_regs.mc_emem_arb_timing_rfcpb = std::ceil(GET_CYCLE_CEIL(tRFCpb) / DIV + 1); // ?
/* Apparent timing parameters, simply adjust proportionally. */
ADJUST_PARAM_ALL_REG(target_table, ref_table, emc_rc);
ADJUST_PARAM_ALL_REG(target_table, ref_table, emc_rfc);
ADJUST_PARAM_ALL_REG(target_table, ref_table, emc_rfcpb);
ADJUST_PARAM_ALL_REG(target_table, ref_table, emc_ras);
ADJUST_PARAM_ALL_REG(target_table, ref_table, emc_rp);
ADJUST_PARAM_ALL_REG(target_table, ref_table, emc_r2w);
ADJUST_PARAM_ALL_REG(target_table, ref_table, emc_w2r);
ADJUST_PARAM_ALL_REG(target_table, ref_table, emc_r2p);
ADJUST_PARAM_ALL_REG(target_table, ref_table, emc_w2p);
ADJUST_PARAM_ALL_REG(target_table, ref_table, emc_trtm);
ADJUST_PARAM_ALL_REG(target_table, ref_table, emc_twtm);
ADJUST_PARAM_ALL_REG(target_table, ref_table, emc_tratm);
ADJUST_PARAM_ALL_REG(target_table, ref_table, emc_twatm);
ADJUST_PARAM_ALL_REG(target_table, ref_table, emc_rd_rcd);
ADJUST_PARAM_ALL_REG(target_table, ref_table, emc_wr_rcd);
ADJUST_PARAM_ALL_REG(target_table, ref_table, emc_rrd);
ADJUST_PARAM_TABLE(table, la_scale_regs.mc_mll_mpcorer_ptsa_rate, ref);
ADJUST_PARAM_TABLE(table, la_scale_regs.mc_ptsa_grant_decrement, ref);
/* emc_wdv, emc_wsv, emc_wev, emc_wdv_mask,
emc_quse, emc_quse_width, emc_ibdly, emc_obdly,
emc_einput, emc_einput_duration, emc_qrst, emc_qsafe,
emc_rdv, emc_rdv_mask, emc_rdv_early, emc_rdv_early_mask */
#ifdef EXPERIMENTAL
ADJUST_PARAM_ROUND2_ALL_REG(target_table, ref_table, emc_wdv);
ADJUST_PARAM_ROUND2_ALL_REG(target_table, ref_table, emc_wsv);
ADJUST_PARAM_ROUND2_ALL_REG(target_table, ref_table, emc_wev);
ADJUST_PARAM_ROUND2_ALL_REG(target_table, ref_table, emc_wdv_mask);
// ADJUST_PARAM_TABLE(table, min_mrs_wait); // not used on LPDDR4X
// ADJUST_PARAM_TABLE(table, latency); // not used
ADJUST_PARAM_ALL_REG(target_table, ref_table, emc_quse);
ADJUST_PARAM_ALL_REG(target_table, ref_table, emc_quse_width);
ADJUST_BIT_ALL_REG_SINGLE_OP(target_table, ref_table, emc_ibdly, 6,0, | (1 << 28));
ADJUST_BIT_ALL_REG_SINGLE_OP(target_table, ref_table, emc_obdly, 5,0, | (1 << 28));
ADJUST_PARAM_ALL_REG(target_table, ref_table, emc_einput);
ADJUST_PARAM_ALL_REG(target_table, ref_table, emc_einput_duration);
ADJUST_BIT_ALL_REG_SINGLE_OP(target_table, ref_table, emc_qrst, 6,0, | (6 << 16));
ADJUST_PARAM_ALL_REG(target_table, ref_table, emc_qsafe);
ADJUST_PARAM_ALL_REG(target_table, ref_table, emc_rdv);
target_table->burst_regs.emc_rdv_mask = target_table->burst_regs.emc_rdv + 2;
target_table->shadow_regs_ca_train.emc_rdv_mask = target_table->shadow_regs_ca_train.emc_rdv + 2;
target_table->shadow_regs_rdwr_train.emc_rdv_mask = target_table->shadow_regs_rdwr_train.emc_rdv + 2;
ADJUST_PARAM_ALL_REG(target_table, ref_table, emc_rdv_early);
target_table->burst_regs.emc_rdv_early_mask = target_table->burst_regs.emc_rdv_early + 2;
target_table->shadow_regs_ca_train.emc_rdv_early_mask = target_table->shadow_regs_ca_train.emc_rdv_early + 2;
target_table->shadow_regs_rdwr_train.emc_rdv_early_mask = target_table->shadow_regs_rdwr_train.emc_rdv_early + 2;
#endif
ADJUST_PARAM_ALL_REG(target_table, ref_table, emc_refresh);
ADJUST_PARAM_ALL_REG(target_table, ref_table, emc_pre_refresh_req_cnt);
ADJUST_PARAM_ALL_REG(target_table, ref_table, emc_pdex2wr);
ADJUST_PARAM_ALL_REG(target_table, ref_table, emc_pdex2rd);
ADJUST_PARAM_ALL_REG(target_table, ref_table, emc_act2pden);
ADJUST_PARAM_ALL_REG(target_table, ref_table, emc_rw2pden);
ADJUST_PARAM_ALL_REG(target_table, ref_table, emc_cke2pden);
ADJUST_PARAM_ALL_REG(target_table, ref_table, emc_pdex2mrr);
ADJUST_PARAM_ALL_REG(target_table, ref_table, emc_txsr);
ADJUST_PARAM_ALL_REG(target_table, ref_table, emc_txsrdll);
ADJUST_PARAM_ALL_REG(target_table, ref_table, emc_tcke);
ADJUST_PARAM_ALL_REG(target_table, ref_table, emc_tckesr);
ADJUST_PARAM_ALL_REG(target_table, ref_table, emc_tpd);
ADJUST_PARAM_ALL_REG(target_table, ref_table, emc_tfaw);
ADJUST_PARAM_ALL_REG(target_table, ref_table, emc_trpab);
ADJUST_PARAM_ALL_REG(target_table, ref_table, emc_tclkstop);
ADJUST_PARAM_ALL_REG(target_table, ref_table, emc_trefbw);
ADJUST_PARAM_ALL_REG(target_table, ref_table, emc_pmacro_dll_cfg_2);
/* emc_pmacro_...,
emc_zcal_wait_cnt, emc_mrs_wait_cnt(2),
emc_pmacro_autocal_cfg_common, emc_dyn_self_ref_control, emc_qpop, emc_pmacro_cmd_pad_tx_ctrl,
emc_tr_timing_0, emc_tr_rdv, emc_tr_qpop, emc_tr_rdv_mask, emc_tr_qsafe, emc_tr_qrst,
emc_training_vref_settle */
#ifdef EXPERIMENTAL
/* DDLL values */
{
#define OFFSET_ALL_REG(PARAM) \
offsetof(MarikoMtcTable, burst_regs.PARAM), \
offsetof(MarikoMtcTable, shadow_regs_ca_train.PARAM), \
offsetof(MarikoMtcTable, shadow_regs_rdwr_train.PARAM) \
/* Section 1: adjust HI bits: BIT 26:16 */
const uint32_t ddll_high[] = {
OFFSET_ALL_REG(emc_pmacro_ob_ddll_long_dq_rank1_4),
OFFSET_ALL_REG(emc_pmacro_ob_ddll_long_dq_rank1_5),
OFFSET_ALL_REG(emc_pmacro_ob_ddll_long_dqs_rank0_4),
OFFSET_ALL_REG(emc_pmacro_ob_ddll_long_dqs_rank0_5),
OFFSET_ALL_REG(emc_pmacro_ob_ddll_long_dqs_rank1_4),
OFFSET_ALL_REG(emc_pmacro_ob_ddll_long_dqs_rank1_5),
OFFSET_ALL_REG(emc_pmacro_ddll_long_cmd_0),
OFFSET_ALL_REG(emc_pmacro_ddll_long_cmd_1),
OFFSET_ALL_REG(emc_pmacro_ddll_long_cmd_2),
OFFSET_ALL_REG(emc_pmacro_ddll_long_cmd_3),
OFFSET_ALL_REG(emc_pmacro_ddll_long_cmd_4),
offsetof(MarikoMtcTable, trim_regs.emc_pmacro_ob_ddll_long_dq_rank0_0),
offsetof(MarikoMtcTable, trim_regs.emc_pmacro_ob_ddll_long_dq_rank0_1),
offsetof(MarikoMtcTable, trim_regs.emc_pmacro_ob_ddll_long_dq_rank0_2),
offsetof(MarikoMtcTable, trim_regs.emc_pmacro_ob_ddll_long_dq_rank0_3),
offsetof(MarikoMtcTable, trim_regs.emc_pmacro_ob_ddll_long_dq_rank0_4),
offsetof(MarikoMtcTable, trim_regs.emc_pmacro_ob_ddll_long_dq_rank0_5),
offsetof(MarikoMtcTable, trim_regs.emc_pmacro_ob_ddll_long_dq_rank1_0),
offsetof(MarikoMtcTable, trim_regs.emc_pmacro_ob_ddll_long_dq_rank1_1),
offsetof(MarikoMtcTable, trim_regs.emc_pmacro_ob_ddll_long_dq_rank1_2),
offsetof(MarikoMtcTable, trim_regs.emc_pmacro_ob_ddll_long_dq_rank1_3),
};
for (uint32_t i = 0; i < sizeof(ddll_high)/sizeof(uint32_t); i++)
{
uint32_t *ddll = reinterpret_cast<uint32_t *>(reinterpret_cast<uint8_t *>(target_table) + ddll_high[i]);
uint32_t *ddll_ref = reinterpret_cast<uint32_t *>(reinterpret_cast<uint8_t *>(ref_table) + ddll_high[i]);
uint16_t adjusted_ddll = ADJUST_BIT(*ddll, *ddll_ref, 26,16) & ((1 << 10) - 1);
CLEAR_BIT(*ddll, 26,16)
*ddll |= adjusted_ddll << 16;
}
/* Section 2: adjust LOW bits: BIT 10:0 */
const uint32_t ddll_low[] = {
OFFSET_ALL_REG(emc_pmacro_ob_ddll_long_dq_rank1_4),
OFFSET_ALL_REG(emc_pmacro_ob_ddll_long_dq_rank1_5),
OFFSET_ALL_REG(emc_pmacro_ob_ddll_long_dqs_rank0_0),
OFFSET_ALL_REG(emc_pmacro_ob_ddll_long_dqs_rank0_1),
OFFSET_ALL_REG(emc_pmacro_ob_ddll_long_dqs_rank0_3),
OFFSET_ALL_REG(emc_pmacro_ob_ddll_long_dqs_rank0_4),
OFFSET_ALL_REG(emc_pmacro_ob_ddll_long_dqs_rank1_0),
OFFSET_ALL_REG(emc_pmacro_ob_ddll_long_dqs_rank1_1),
OFFSET_ALL_REG(emc_pmacro_ob_ddll_long_dqs_rank1_3),
OFFSET_ALL_REG(emc_pmacro_ob_ddll_long_dqs_rank1_4),
OFFSET_ALL_REG(emc_pmacro_ddll_long_cmd_0),
OFFSET_ALL_REG(emc_pmacro_ddll_long_cmd_1),
OFFSET_ALL_REG(emc_pmacro_ddll_long_cmd_2),
OFFSET_ALL_REG(emc_pmacro_ddll_long_cmd_3),
OFFSET_ALL_REG(emc_pmacro_ddll_long_cmd_4),
offsetof(MarikoMtcTable, trim_regs.emc_pmacro_ob_ddll_long_dq_rank0_0),
offsetof(MarikoMtcTable, trim_regs.emc_pmacro_ob_ddll_long_dq_rank0_1),
offsetof(MarikoMtcTable, trim_regs.emc_pmacro_ob_ddll_long_dq_rank0_2),
offsetof(MarikoMtcTable, trim_regs.emc_pmacro_ob_ddll_long_dq_rank0_3),
offsetof(MarikoMtcTable, trim_regs.emc_pmacro_ob_ddll_long_dq_rank0_4),
offsetof(MarikoMtcTable, trim_regs.emc_pmacro_ob_ddll_long_dq_rank0_5),
offsetof(MarikoMtcTable, trim_regs.emc_pmacro_ob_ddll_long_dq_rank1_0),
offsetof(MarikoMtcTable, trim_regs.emc_pmacro_ob_ddll_long_dq_rank1_1),
offsetof(MarikoMtcTable, trim_regs.emc_pmacro_ob_ddll_long_dq_rank1_2),
offsetof(MarikoMtcTable, trim_regs.emc_pmacro_ob_ddll_long_dq_rank1_3),
};
for (uint32_t i = 0; i < sizeof(ddll_low)/sizeof(uint32_t); i++)
{
uint32_t *ddll = reinterpret_cast<uint32_t *>(reinterpret_cast<uint8_t *>(target_table) + ddll_low[i]);
uint32_t *ddll_ref = reinterpret_cast<uint32_t *>(reinterpret_cast<uint8_t *>(ref_table) + ddll_low[i]);
uint16_t adjusted_ddll = ADJUST_BIT(*ddll, *ddll_ref, 10,0) & ((1 << 10) - 1);
CLEAR_BIT(*ddll, 10,0)
*ddll |= adjusted_ddll;
}
// Calculate DIVM and DIVN (clock DIVisors)
// Common PLL oscillator is 38.4 MHz
// PLLMB_OUT = 38.4 MHz / PLLLMB_DIVM * PLLMB_DIVN
u32 divm = 1;
u32 divn = GetEmcClock() / 38400;
u32 remainder = GetEmcClock() % 38400;
if (remainder >= 38400 * (3/4)) {
divm = 4;
divn = divn * divm + 3;
} else
if (remainder >= 38400 * (2/3)) {
divm = 3;
divn = divn * divm + 2;
} else
if (remainder >= 38400 * (1/2)) {
divm = 2;
divn = divn * divm + 1;
} else
if (remainder >= 38400 * (1/3)) {
divm = 3;
divn = divn * divm + 1;
} else
if (remainder >= 38400 * (1/4)) {
divm = 4;
divn = divn * divm + 1;
}
ADJUST_BIT_ALL_REG_PAIR(target_table, ref_table, emc_zcal_wait_cnt, 21,16, 10,0)
ADJUST_BIT_ALL_REG_PAIR(target_table, ref_table, emc_mrs_wait_cnt, 21,16, 10,0)
ADJUST_BIT_ALL_REG_PAIR(target_table, ref_table, emc_mrs_wait_cnt2, 21,16, 10,0)
ADJUST_BIT_ALL_REG_SINGLE_OP(target_table, ref_table, emc_auto_cal_channel, 5,0, | 0xC1E00300)
ADJUST_BIT_ALL_REG_SINGLE_OP(target_table, ref_table, emc_pmacro_autocal_cfg_common, 5,0, | 8 << 8)
ADJUST_BIT_ALL_REG_PAIR(target_table, ref_table, emc_dyn_self_ref_control, 31,31, 15,0)
ADJUST_PARAM_ALL_REG(target_table, ref_table, emc_qpop);
ADJUST_BIT_ALL_REG_SINGLE_OP(target_table, ref_table, emc_tr_timing_0, 9,0, | 0x1186100)
ADJUST_PARAM_ALL_REG(target_table, ref_table, emc_tr_rdv);
target_table->burst_regs.emc_tr_rdv_mask = target_table->burst_regs.emc_tr_rdv + 2;
target_table->shadow_regs_ca_train.emc_tr_rdv_mask = target_table->shadow_regs_ca_train.emc_tr_rdv + 2;
target_table->shadow_regs_rdwr_train.emc_tr_rdv_mask = target_table->shadow_regs_rdwr_train.emc_tr_rdv + 2;
ADJUST_PARAM_ALL_REG(target_table, ref_table, emc_tr_qpop);
ADJUST_PARAM_ALL_REG(target_table, ref_table, emc_tr_qsafe);
ADJUST_BIT_ALL_REG_SINGLE_OP(target_table, ref_table, emc_tr_qrst, 6,0, | (6 << 16));
ADJUST_BIT_ALL_REG_SINGLE_OP(target_table, ref_table, emc_training_vref_settle, 15,0, | (4 << 16));
#endif
ADJUST_PARAM_TABLE(target_table, ref_table, dram_timings.rl);
ADJUST_PARAM_TABLE(target_table, ref_table, burst_mc_regs.mc_emem_arb_timing_rcd);
ADJUST_PARAM_TABLE(target_table, ref_table, burst_mc_regs.mc_emem_arb_timing_rp);
ADJUST_PARAM_TABLE(target_table, ref_table, burst_mc_regs.mc_emem_arb_timing_rc);
ADJUST_PARAM_TABLE(target_table, ref_table, burst_mc_regs.mc_emem_arb_timing_ras);
ADJUST_PARAM_TABLE(target_table, ref_table, burst_mc_regs.mc_emem_arb_timing_faw);
ADJUST_PARAM_TABLE(target_table, ref_table, burst_mc_regs.mc_emem_arb_timing_wap2pre);
ADJUST_PARAM_TABLE(target_table, ref_table, burst_mc_regs.mc_emem_arb_timing_r2w);
ADJUST_PARAM_TABLE(target_table, ref_table, burst_mc_regs.mc_emem_arb_timing_w2r);
ADJUST_PARAM_TABLE(target_table, ref_table, burst_mc_regs.mc_emem_arb_timing_rfcpb);
ADJUST_PARAM_TABLE(target_table, ref_table, la_scale_regs.mc_mll_mpcorer_ptsa_rate);
ADJUST_PARAM_TABLE(target_table, ref_table, la_scale_regs.mc_ptsa_grant_decrement);
ADJUST_PARAM_TABLE(target_table, ref_table, min_mrs_wait);
ADJUST_PARAM_TABLE(target_table, ref_table, latency);
/* Patch PLLMB divisors */
table->pllmb_divm = divm;
table->pllmb_divn = divn;
#ifdef EXPERIMENTAL
/* External Memory Arbitration Configuration */
/* BIT 20:16 - EXTRA_TICKS_PER_UPDATE: 0 */
/* BIT 8:0 - CYCLES_PER_UPDATE: 12(1600MHz), 10(1331.2MHz) */
ADJUST_PARAM_TABLE(target_table, ref_table, burst_mc_regs.mc_emem_arb_cfg);
/* External Memory Arbitration Configuration: Direction Arbiter: Turns */
/* BIT 31:24 - W2R_TURN: approx. mc_emem_arb_timing_w2r */
/* BIT 23:16 - R2W_TURN: approx. mc_emem_arb_timing_r2w */
/* BIT 15:8 - W2W_TURN: 0 */
/* BIT 7:0 - R2R_TURN: 0 */
{
uint32_t param_1600 = target_table->burst_mc_regs.mc_emem_arb_da_turns;
uint32_t param_1331 = ref_table->burst_mc_regs.mc_emem_arb_da_turns;
uint8_t w2r_turn = ADJUST_BIT(param_1600, param_1331, 31,24);
uint8_t r2w_turn = ADJUST_BIT(param_1600, param_1331, 23,16);
target_table->burst_mc_regs.mc_emem_arb_da_turns = w2r_turn << 24 | r2w_turn << 16;
}
#define ADJUST_PARAM_ROUND2_ALL_REG(TARGET_TABLE, REF_TABLE, PARAM) \
TARGET_TABLE->burst_regs.PARAM = \
((ADJUST_PROP(TARGET_TABLE->burst_regs.PARAM, REF_TABLE->burst_regs.PARAM) + 1) >> 1) << 1; \
TARGET_TABLE->shadow_regs_ca_train.PARAM = \
((ADJUST_PROP(TARGET_TABLE->shadow_regs_ca_train.PARAM, REF_TABLE->shadow_regs_ca_train.PARAM) + 1) >> 1) << 1; \
TARGET_TABLE->shadow_regs_rdwr_train.PARAM = \
((ADJUST_PROP(TARGET_TABLE->shadow_regs_rdwr_train.PARAM, REF_TABLE->shadow_regs_rdwr_train.PARAM) + 1) >> 1) << 1;
/* External Memory Arbitration Configuration: Direction Arbiter: Covers */
/* BIT 23:16 - RCD_W_COVER: 13(1600MHz), 11(1331.2MHz) */
/* BIT 15:8 - RCD_R_COVER: 8(1600MHz), 7(1331.2MHz) */
/* BIT 7:0 - RC_COVER: approx. mc_emem_arb_timing_rc, 12(1600MHz), 9(1331.2MHz) */
{
uint32_t param_1600 = target_table->burst_mc_regs.mc_emem_arb_da_covers;
uint32_t param_1331 = ref_table->burst_mc_regs.mc_emem_arb_da_covers;
uint8_t rcd_w_cover = ADJUST_BIT(param_1600, param_1331, 23,16);
uint8_t rcd_r_cover = ADJUST_BIT(param_1600, param_1331, 15,8);
uint8_t rc_cover = ADJUST_BIT(param_1600, param_1331, 7,0);
target_table->burst_mc_regs.mc_emem_arb_da_covers = rcd_w_cover << 16 | rcd_r_cover << 8 | rc_cover;
}
#define ADJUST_PARAM(TARGET_PARAM, REF_PARAM) \
TARGET_PARAM = ADJUST_PROP(TARGET_PARAM, REF_PARAM);
/* External Memory Arbitration Configuration: Miscellaneous Thresholds (0) */
/* BIT 20:16 - PRIORITY_INVERSION_ISO_THRESHOLD: 12(1600MHz), 10(1331.2MHz) */
/* BIT 14:8 - PRIORITY_INVERSION_THRESHOLD: 36(1600MHz), 30(1331.2MHz) */
/* BIT 7:0 - BC2AA_HOLDOFF_THRESHOLD: set to mc_emem_arb_timing_rc */
{
uint32_t param_1600 = target_table->burst_mc_regs.mc_emem_arb_misc0;
uint32_t param_1331 = ref_table->burst_mc_regs.mc_emem_arb_misc0;
uint8_t priority_inversion_iso_threshold = ADJUST_BIT(param_1600, param_1331, 20,16);
uint8_t priority_inversion_threshold = ADJUST_BIT(param_1600, param_1331, 14,8);
uint8_t bc2aa_holdoff_threshold = target_table->burst_mc_regs.mc_emem_arb_timing_rc;
CLEAR_BIT(target_table->burst_mc_regs.mc_emem_arb_misc0, 20,16)
CLEAR_BIT(target_table->burst_mc_regs.mc_emem_arb_misc0, 14,8)
CLEAR_BIT(target_table->burst_mc_regs.mc_emem_arb_misc0, 7,0)
target_table->burst_mc_regs.mc_emem_arb_misc0 |=
(priority_inversion_iso_threshold << 16 | priority_inversion_threshold << 8 | bc2aa_holdoff_threshold);
}
#define ADJUST_PARAM_TABLE(TARGET_TABLE, REF_TABLE, PARAM) \
ADJUST_PARAM(TARGET_TABLE->PARAM, REF_TABLE->PARAM)
/* Latency allowance settings */
{
/* Section 1: adjust write latency */
/* BIT 23:16 - ALLOWANCE_WRITE: 128(1600MHz), 153(1331.2MHz) */
const uint32_t latency_write_offset[] = {
offsetof(MarikoMtcTable, la_scale_regs.mc_latency_allowance_xusb_0),
offsetof(MarikoMtcTable, la_scale_regs.mc_latency_allowance_xusb_1),
offsetof(MarikoMtcTable, la_scale_regs.mc_latency_allowance_tsec_0),
offsetof(MarikoMtcTable, la_scale_regs.mc_latency_allowance_sdmmca_0),
offsetof(MarikoMtcTable, la_scale_regs.mc_latency_allowance_sdmmcaa_0),
offsetof(MarikoMtcTable, la_scale_regs.mc_latency_allowance_sdmmc_0),
offsetof(MarikoMtcTable, la_scale_regs.mc_latency_allowance_sdmmcab_0),
offsetof(MarikoMtcTable, la_scale_regs.mc_latency_allowance_ppcs_1),
offsetof(MarikoMtcTable, la_scale_regs.mc_latency_allowance_mpcore_0),
offsetof(MarikoMtcTable, la_scale_regs.mc_latency_allowance_avpc_0),
offsetof(MarikoMtcTable, la_scale_regs.mc_latency_allowance_gpu_0),
offsetof(MarikoMtcTable, la_scale_regs.mc_latency_allowance_gpu2_0),
offsetof(MarikoMtcTable, la_scale_regs.mc_latency_allowance_nvenc_0),
offsetof(MarikoMtcTable, la_scale_regs.mc_latency_allowance_nvdec_0),
offsetof(MarikoMtcTable, la_scale_regs.mc_latency_allowance_vic_0),
offsetof(MarikoMtcTable, la_scale_regs.mc_latency_allowance_isp2_1),
};
for (uint32_t i = 0; i < sizeof(latency_write_offset)/sizeof(uint32_t); i++)
#define ADJUST_PARAM_ALL_REG(TARGET_TABLE, REF_TABLE, PARAM) \
ADJUST_PARAM_TABLE(TARGET_TABLE, REF_TABLE, burst_regs.PARAM) \
ADJUST_PARAM_TABLE(TARGET_TABLE, REF_TABLE, shadow_regs_ca_train.PARAM) \
ADJUST_PARAM_TABLE(TARGET_TABLE, REF_TABLE, shadow_regs_rdwr_train.PARAM)
#define TRIM_BIT(IN_BITS, HIGH, LOW) \
((IN_BITS >> LOW) & ( (1u << (HIGH - LOW + 1u)) - 1u ))
#define ADJUST_BIT(TARGET_PARAM, REF_PARAM, HIGH, LOW) \
ADJUST_PROP(TRIM_BIT(TARGET_PARAM, HIGH, LOW), TRIM_BIT(REF_PARAM, HIGH, LOW))
#define CLEAR_BIT(BITS, HIGH, LOW) \
BITS = BITS & ~( ((1u << HIGH) << 1u) - (1u << LOW) );
#define ADJUST_BIT_ALL_REG_SINGLE_OP(TARGET_TABLE, REF_TABLE, PARAM, HIGH, LOW, OPERATION) \
TARGET_TABLE->burst_regs.PARAM = \
(ADJUST_BIT(TARGET_TABLE->burst_regs.PARAM, REF_TABLE->burst_regs.PARAM, HIGH, LOW) << LOW) OPERATION; \
TARGET_TABLE->shadow_regs_ca_train.PARAM = \
(ADJUST_BIT(TARGET_TABLE->shadow_regs_ca_train.PARAM, REF_TABLE->shadow_regs_ca_train.PARAM, HIGH, LOW)) << LOW OPERATION; \
TARGET_TABLE->shadow_regs_rdwr_train.PARAM = \
(ADJUST_BIT(TARGET_TABLE->shadow_regs_rdwr_train.PARAM, REF_TABLE->shadow_regs_rdwr_train.PARAM, HIGH, LOW)) << LOW OPERATION;
#define ADJUST_BIT_ALL_REG_PAIR(TARGET_TABLE, REF_TABLE, PARAM, HIGH1, LOW1, HIGH2, LOW2) \
TARGET_TABLE->burst_regs.PARAM = \
ADJUST_BIT(TARGET_TABLE->burst_regs.PARAM, REF_TABLE->burst_regs.PARAM, HIGH1, LOW1) << LOW1 \
| ADJUST_BIT(TARGET_TABLE->burst_regs.PARAM, REF_TABLE->burst_regs.PARAM, HIGH2, LOW2) << LOW2; \
TARGET_TABLE->shadow_regs_ca_train.PARAM = \
ADJUST_BIT(TARGET_TABLE->shadow_regs_ca_train.PARAM, REF_TABLE->shadow_regs_ca_train.PARAM, HIGH1, LOW1) << LOW1 \
| ADJUST_BIT(TARGET_TABLE->shadow_regs_ca_train.PARAM, REF_TABLE->shadow_regs_ca_train.PARAM, HIGH2, LOW2) << LOW2; \
TARGET_TABLE->shadow_regs_rdwr_train.PARAM = \
ADJUST_BIT(TARGET_TABLE->shadow_regs_rdwr_train.PARAM, REF_TABLE->shadow_regs_rdwr_train.PARAM, HIGH1, LOW1) << LOW1 \
| ADJUST_BIT(TARGET_TABLE->shadow_regs_rdwr_train.PARAM, REF_TABLE->shadow_regs_rdwr_train.PARAM, HIGH2, LOW2) << LOW2;
/* For latency allowance */
#define ADJUST_INVERSE(TARGET) ((TARGET*1000) / (GetEmcClock()/1600))
/* emc_wdv, emc_wsv, emc_wev, emc_wdv_mask,
emc_quse, emc_quse_width, emc_ibdly, emc_obdly,
emc_einput, emc_einput_duration, emc_qrst, emc_qsafe,
emc_rdv, emc_rdv_mask, emc_rdv_early, emc_rdv_early_mask */
ADJUST_PARAM_ROUND2_ALL_REG(target_table, ref_table, emc_wdv);
ADJUST_PARAM_ROUND2_ALL_REG(target_table, ref_table, emc_wsv);
ADJUST_PARAM_ROUND2_ALL_REG(target_table, ref_table, emc_wev);
ADJUST_PARAM_ROUND2_ALL_REG(target_table, ref_table, emc_wdv_mask);
ADJUST_PARAM_ALL_REG(target_table, ref_table, emc_quse);
ADJUST_PARAM_ALL_REG(target_table, ref_table, emc_quse_width);
ADJUST_BIT_ALL_REG_SINGLE_OP(target_table, ref_table, emc_ibdly, 6,0, | (1 << 28));
ADJUST_BIT_ALL_REG_SINGLE_OP(target_table, ref_table, emc_obdly, 5,0, | (1 << 28));
ADJUST_PARAM_ALL_REG(target_table, ref_table, emc_einput);
ADJUST_PARAM_ALL_REG(target_table, ref_table, emc_einput_duration);
ADJUST_BIT_ALL_REG_SINGLE_OP(target_table, ref_table, emc_qrst, 6,0, | (6 << 16));
ADJUST_PARAM_ALL_REG(target_table, ref_table, emc_qsafe);
ADJUST_PARAM_ALL_REG(target_table, ref_table, emc_rdv);
target_table->burst_regs.emc_rdv_mask = target_table->burst_regs.emc_rdv + 2;
target_table->shadow_regs_ca_train.emc_rdv_mask = target_table->shadow_regs_ca_train.emc_rdv + 2;
target_table->shadow_regs_rdwr_train.emc_rdv_mask = target_table->shadow_regs_rdwr_train.emc_rdv + 2;
ADJUST_PARAM_ALL_REG(target_table, ref_table, emc_rdv_early);
target_table->burst_regs.emc_rdv_early_mask = target_table->burst_regs.emc_rdv_early + 2;
target_table->shadow_regs_ca_train.emc_rdv_early_mask = target_table->shadow_regs_ca_train.emc_rdv_early + 2;
target_table->shadow_regs_rdwr_train.emc_rdv_early_mask = target_table->shadow_regs_rdwr_train.emc_rdv_early + 2;
/* emc_pmacro_...,
emc_zcal_wait_cnt, emc_mrs_wait_cnt(2),
emc_pmacro_autocal_cfg_common, emc_dyn_self_ref_control, emc_qpop, emc_pmacro_cmd_pad_tx_ctrl,
emc_tr_timing_0, emc_tr_rdv, emc_tr_qpop, emc_tr_rdv_mask, emc_tr_qsafe, emc_tr_qrst,
emc_training_vref_settle */
/* DDLL values */
{
uint32_t *latency = reinterpret_cast<uint32_t *>(reinterpret_cast<uint8_t *>(target_table) + latency_write_offset[i]);
CLEAR_BIT(*latency, 23,16)
*latency |= ADJUST_INVERSE(128) << 16;
#define OFFSET_ALL_REG(PARAM) \
offsetof(MarikoMtcTable, burst_regs.PARAM), \
offsetof(MarikoMtcTable, shadow_regs_ca_train.PARAM), \
offsetof(MarikoMtcTable, shadow_regs_rdwr_train.PARAM) \
/* Section 1: adjust HI bits: BIT 26:16 */
const uint32_t ddll_high[] = {
OFFSET_ALL_REG(emc_pmacro_ob_ddll_long_dq_rank1_4),
OFFSET_ALL_REG(emc_pmacro_ob_ddll_long_dq_rank1_5),
OFFSET_ALL_REG(emc_pmacro_ob_ddll_long_dqs_rank0_4),
OFFSET_ALL_REG(emc_pmacro_ob_ddll_long_dqs_rank0_5),
OFFSET_ALL_REG(emc_pmacro_ob_ddll_long_dqs_rank1_4),
OFFSET_ALL_REG(emc_pmacro_ob_ddll_long_dqs_rank1_5),
OFFSET_ALL_REG(emc_pmacro_ddll_long_cmd_0),
OFFSET_ALL_REG(emc_pmacro_ddll_long_cmd_1),
OFFSET_ALL_REG(emc_pmacro_ddll_long_cmd_2),
OFFSET_ALL_REG(emc_pmacro_ddll_long_cmd_3),
OFFSET_ALL_REG(emc_pmacro_ddll_long_cmd_4),
offsetof(MarikoMtcTable, trim_regs.emc_pmacro_ob_ddll_long_dq_rank0_0),
offsetof(MarikoMtcTable, trim_regs.emc_pmacro_ob_ddll_long_dq_rank0_1),
offsetof(MarikoMtcTable, trim_regs.emc_pmacro_ob_ddll_long_dq_rank0_2),
offsetof(MarikoMtcTable, trim_regs.emc_pmacro_ob_ddll_long_dq_rank0_3),
offsetof(MarikoMtcTable, trim_regs.emc_pmacro_ob_ddll_long_dq_rank0_4),
offsetof(MarikoMtcTable, trim_regs.emc_pmacro_ob_ddll_long_dq_rank0_5),
offsetof(MarikoMtcTable, trim_regs.emc_pmacro_ob_ddll_long_dq_rank1_0),
offsetof(MarikoMtcTable, trim_regs.emc_pmacro_ob_ddll_long_dq_rank1_1),
offsetof(MarikoMtcTable, trim_regs.emc_pmacro_ob_ddll_long_dq_rank1_2),
offsetof(MarikoMtcTable, trim_regs.emc_pmacro_ob_ddll_long_dq_rank1_3),
};
for (uint32_t i = 0; i < sizeof(ddll_high)/sizeof(uint32_t); i++)
{
uint32_t *ddll = reinterpret_cast<uint32_t *>(reinterpret_cast<uint8_t *>(target_table) + ddll_high[i]);
uint32_t *ddll_ref = reinterpret_cast<uint32_t *>(reinterpret_cast<uint8_t *>(ref_table) + ddll_high[i]);
uint16_t adjusted_ddll = ADJUST_BIT(*ddll, *ddll_ref, 26,16) & ((1 << 10) - 1);
CLEAR_BIT(*ddll, 26,16)
*ddll |= adjusted_ddll << 16;
}
/* Section 2: adjust LOW bits: BIT 10:0 */
const uint32_t ddll_low[] = {
OFFSET_ALL_REG(emc_pmacro_ob_ddll_long_dq_rank1_4),
OFFSET_ALL_REG(emc_pmacro_ob_ddll_long_dq_rank1_5),
OFFSET_ALL_REG(emc_pmacro_ob_ddll_long_dqs_rank0_0),
OFFSET_ALL_REG(emc_pmacro_ob_ddll_long_dqs_rank0_1),
OFFSET_ALL_REG(emc_pmacro_ob_ddll_long_dqs_rank0_3),
OFFSET_ALL_REG(emc_pmacro_ob_ddll_long_dqs_rank0_4),
OFFSET_ALL_REG(emc_pmacro_ob_ddll_long_dqs_rank1_0),
OFFSET_ALL_REG(emc_pmacro_ob_ddll_long_dqs_rank1_1),
OFFSET_ALL_REG(emc_pmacro_ob_ddll_long_dqs_rank1_3),
OFFSET_ALL_REG(emc_pmacro_ob_ddll_long_dqs_rank1_4),
OFFSET_ALL_REG(emc_pmacro_ddll_long_cmd_0),
OFFSET_ALL_REG(emc_pmacro_ddll_long_cmd_1),
OFFSET_ALL_REG(emc_pmacro_ddll_long_cmd_2),
OFFSET_ALL_REG(emc_pmacro_ddll_long_cmd_3),
OFFSET_ALL_REG(emc_pmacro_ddll_long_cmd_4),
offsetof(MarikoMtcTable, trim_regs.emc_pmacro_ob_ddll_long_dq_rank0_0),
offsetof(MarikoMtcTable, trim_regs.emc_pmacro_ob_ddll_long_dq_rank0_1),
offsetof(MarikoMtcTable, trim_regs.emc_pmacro_ob_ddll_long_dq_rank0_2),
offsetof(MarikoMtcTable, trim_regs.emc_pmacro_ob_ddll_long_dq_rank0_3),
offsetof(MarikoMtcTable, trim_regs.emc_pmacro_ob_ddll_long_dq_rank0_4),
offsetof(MarikoMtcTable, trim_regs.emc_pmacro_ob_ddll_long_dq_rank0_5),
offsetof(MarikoMtcTable, trim_regs.emc_pmacro_ob_ddll_long_dq_rank1_0),
offsetof(MarikoMtcTable, trim_regs.emc_pmacro_ob_ddll_long_dq_rank1_1),
offsetof(MarikoMtcTable, trim_regs.emc_pmacro_ob_ddll_long_dq_rank1_2),
offsetof(MarikoMtcTable, trim_regs.emc_pmacro_ob_ddll_long_dq_rank1_3),
};
for (uint32_t i = 0; i < sizeof(ddll_low)/sizeof(uint32_t); i++)
{
uint32_t *ddll = reinterpret_cast<uint32_t *>(reinterpret_cast<uint8_t *>(target_table) + ddll_low[i]);
uint32_t *ddll_ref = reinterpret_cast<uint32_t *>(reinterpret_cast<uint8_t *>(ref_table) + ddll_low[i]);
uint16_t adjusted_ddll = ADJUST_BIT(*ddll, *ddll_ref, 10,0) & ((1 << 10) - 1);
CLEAR_BIT(*ddll, 10,0)
*ddll |= adjusted_ddll;
}
}
/* Section 2: adjust read latency */
/* BIT 7:0 - ALLOWANCE_READ */
const uint32_t latency_read_offset[] = {
offsetof(MarikoMtcTable, la_scale_regs.mc_latency_allowance_hc_0),
offsetof(MarikoMtcTable, la_scale_regs.mc_latency_allowance_hc_1),
offsetof(MarikoMtcTable, la_scale_regs.mc_latency_allowance_gpu_0),
offsetof(MarikoMtcTable, la_scale_regs.mc_latency_allowance_gpu2_0),
offsetof(MarikoMtcTable, la_scale_regs.mc_latency_allowance_vic_0),
offsetof(MarikoMtcTable, la_scale_regs.mc_latency_allowance_vi2_0),
offsetof(MarikoMtcTable, la_scale_regs.mc_latency_allowance_isp2_1),
};
for (uint32_t i = 0; i < sizeof(latency_read_offset)/sizeof(uint32_t); i++)
ADJUST_BIT_ALL_REG_PAIR(target_table, ref_table, emc_zcal_wait_cnt, 21,16, 10,0)
ADJUST_BIT_ALL_REG_PAIR(target_table, ref_table, emc_mrs_wait_cnt, 21,16, 10,0)
ADJUST_BIT_ALL_REG_PAIR(target_table, ref_table, emc_mrs_wait_cnt2, 21,16, 10,0)
ADJUST_BIT_ALL_REG_SINGLE_OP(target_table, ref_table, emc_auto_cal_channel, 5,0, | 0xC1E00300)
ADJUST_BIT_ALL_REG_SINGLE_OP(target_table, ref_table, emc_pmacro_autocal_cfg_common, 5,0, | 8 << 8)
ADJUST_BIT_ALL_REG_PAIR(target_table, ref_table, emc_dyn_self_ref_control, 31,31, 15,0)
ADJUST_PARAM_ALL_REG(target_table, ref_table, emc_qpop);
ADJUST_BIT_ALL_REG_SINGLE_OP(target_table, ref_table, emc_tr_timing_0, 9,0, | 0x1186100)
ADJUST_PARAM_ALL_REG(target_table, ref_table, emc_tr_rdv);
target_table->burst_regs.emc_tr_rdv_mask = target_table->burst_regs.emc_tr_rdv + 2;
target_table->shadow_regs_ca_train.emc_tr_rdv_mask = target_table->shadow_regs_ca_train.emc_tr_rdv + 2;
target_table->shadow_regs_rdwr_train.emc_tr_rdv_mask = target_table->shadow_regs_rdwr_train.emc_tr_rdv + 2;
ADJUST_PARAM_ALL_REG(target_table, ref_table, emc_tr_qpop);
ADJUST_PARAM_ALL_REG(target_table, ref_table, emc_tr_qsafe);
ADJUST_BIT_ALL_REG_SINGLE_OP(target_table, ref_table, emc_tr_qrst, 6,0, | (6 << 16));
ADJUST_BIT_ALL_REG_SINGLE_OP(target_table, ref_table, emc_training_vref_settle, 15,0, | (4 << 16));
/* External Memory Arbitration Configuration */
/* BIT 20:16 - EXTRA_TICKS_PER_UPDATE: 0 */
/* BIT 8:0 - CYCLES_PER_UPDATE: 12(1600MHz), 10(1331.2MHz) */
ADJUST_PARAM_TABLE(target_table, ref_table, burst_mc_regs.mc_emem_arb_cfg);
/* External Memory Arbitration Configuration: Direction Arbiter: Turns */
/* BIT 31:24 - W2R_TURN: approx. mc_emem_arb_timing_w2r */
/* BIT 23:16 - R2W_TURN: approx. mc_emem_arb_timing_r2w */
/* BIT 15:8 - W2W_TURN: 0 */
/* BIT 7:0 - R2R_TURN: 0 */
{
uint32_t *latency = reinterpret_cast<uint32_t *>(reinterpret_cast<uint8_t *>(target_table) + latency_read_offset[i]);
uint8_t adjusted_latency = ADJUST_INVERSE(TRIM_BIT(*latency, 7,0));
CLEAR_BIT(*latency, 7,0)
*latency |= adjusted_latency;
}
}
/* PLLM and PLLMB control */
{
/*
* CLK_RST_CONTROLLER_PLLM_SS_CTRL1:
* BIT 31:16 : PLLM_SDM_SSC_MAX
* BIT 15:0 : PLLM_SDM_SSC_MIN
*
* CLK_RST_CONTROLLER_PLLM_SS_CTRL2:
* BIT 31:16 : PLLM_SDM_SSC_STEP
* BIT 15:0 : PLLM_SDM_DIN
*
* pllm(b)_ss_ctrl1:
* 1365, 342 (1600MHz)
* 0xFAAB, 0xF404 (1331MHz)
*
* pllm(b)_ss_ctrl2:
* 2, 1365 (1600MHz)
* 6, 0xFAAB (1331MHz)
*/
}
/* EMC misc. configuration */
{
/* ? Command Trigger: MRW, MRW2: MRW_OP - [PMC] data to be written ? */
{
uint32_t param_1600 = target_table->burst_mc_regs.mc_emem_arb_da_turns;
uint32_t param_1331 = ref_table->burst_mc_regs.mc_emem_arb_da_turns;
uint8_t w2r_turn = ADJUST_BIT(param_1600, param_1331, 31,24);
uint8_t r2w_turn = ADJUST_BIT(param_1600, param_1331, 23,16);
target_table->burst_mc_regs.mc_emem_arb_da_turns = w2r_turn << 24 | r2w_turn << 16;
}
/* EMC_CFG_2 */
/* BIT 5:3 - ZQ_EXTRA_DELAY: 6(1600MHz), 5(1331.2MHz), max possible value: 7 */
/* External Memory Arbitration Configuration: Direction Arbiter: Covers */
/* BIT 23:16 - RCD_W_COVER: 13(1600MHz), 11(1331.2MHz) */
/* BIT 15:8 - RCD_R_COVER: 8(1600MHz), 7(1331.2MHz) */
/* BIT 7:0 - RC_COVER: approx. mc_emem_arb_timing_rc, 12(1600MHz), 9(1331.2MHz) */
{
CLEAR_BIT(target_table->emc_cfg_2, 5,3)
target_table->emc_cfg_2 |= 7 << 3;
uint32_t param_1600 = target_table->burst_mc_regs.mc_emem_arb_da_covers;
uint32_t param_1331 = ref_table->burst_mc_regs.mc_emem_arb_da_covers;
uint8_t rcd_w_cover = ADJUST_BIT(param_1600, param_1331, 23,16);
uint8_t rcd_r_cover = ADJUST_BIT(param_1600, param_1331, 15,8);
uint8_t rc_cover = ADJUST_BIT(param_1600, param_1331, 7,0);
target_table->burst_mc_regs.mc_emem_arb_da_covers = rcd_w_cover << 16 | rcd_r_cover << 8 | rc_cover;
}
/* External Memory Arbitration Configuration: Miscellaneous Thresholds (0) */
/* BIT 20:16 - PRIORITY_INVERSION_ISO_THRESHOLD: 12(1600MHz), 10(1331.2MHz) */
/* BIT 14:8 - PRIORITY_INVERSION_THRESHOLD: 36(1600MHz), 30(1331.2MHz) */
/* BIT 7:0 - BC2AA_HOLDOFF_THRESHOLD: set to mc_emem_arb_timing_rc */
{
uint32_t param_1600 = target_table->burst_mc_regs.mc_emem_arb_misc0;
uint32_t param_1331 = ref_table->burst_mc_regs.mc_emem_arb_misc0;
uint8_t priority_inversion_iso_threshold = ADJUST_BIT(param_1600, param_1331, 20,16);
uint8_t priority_inversion_threshold = ADJUST_BIT(param_1600, param_1331, 14,8);
uint8_t bc2aa_holdoff_threshold = target_table->burst_mc_regs.mc_emem_arb_timing_rc;
CLEAR_BIT(target_table->burst_mc_regs.mc_emem_arb_misc0, 20,16)
CLEAR_BIT(target_table->burst_mc_regs.mc_emem_arb_misc0, 14,8)
CLEAR_BIT(target_table->burst_mc_regs.mc_emem_arb_misc0, 7,0)
target_table->burst_mc_regs.mc_emem_arb_misc0 |=
(priority_inversion_iso_threshold << 16 | priority_inversion_threshold << 8 | bc2aa_holdoff_threshold);
}
/* Latency allowance settings */
{
/* Section 1: adjust write latency */
/* BIT 23:16 - ALLOWANCE_WRITE: 128(1600MHz), 153(1331.2MHz) */
const uint32_t latency_write_offset[] = {
offsetof(MarikoMtcTable, la_scale_regs.mc_latency_allowance_xusb_0),
offsetof(MarikoMtcTable, la_scale_regs.mc_latency_allowance_xusb_1),
offsetof(MarikoMtcTable, la_scale_regs.mc_latency_allowance_tsec_0),
offsetof(MarikoMtcTable, la_scale_regs.mc_latency_allowance_sdmmca_0),
offsetof(MarikoMtcTable, la_scale_regs.mc_latency_allowance_sdmmcaa_0),
offsetof(MarikoMtcTable, la_scale_regs.mc_latency_allowance_sdmmc_0),
offsetof(MarikoMtcTable, la_scale_regs.mc_latency_allowance_sdmmcab_0),
offsetof(MarikoMtcTable, la_scale_regs.mc_latency_allowance_ppcs_1),
offsetof(MarikoMtcTable, la_scale_regs.mc_latency_allowance_mpcore_0),
offsetof(MarikoMtcTable, la_scale_regs.mc_latency_allowance_avpc_0),
offsetof(MarikoMtcTable, la_scale_regs.mc_latency_allowance_gpu_0),
offsetof(MarikoMtcTable, la_scale_regs.mc_latency_allowance_gpu2_0),
offsetof(MarikoMtcTable, la_scale_regs.mc_latency_allowance_nvenc_0),
offsetof(MarikoMtcTable, la_scale_regs.mc_latency_allowance_nvdec_0),
offsetof(MarikoMtcTable, la_scale_regs.mc_latency_allowance_vic_0),
offsetof(MarikoMtcTable, la_scale_regs.mc_latency_allowance_isp2_1),
};
for (uint32_t i = 0; i < sizeof(latency_write_offset)/sizeof(uint32_t); i++)
{
uint32_t *latency = reinterpret_cast<uint32_t *>(reinterpret_cast<uint8_t *>(target_table) + latency_write_offset[i]);
CLEAR_BIT(*latency, 23,16)
*latency |= ADJUST_INVERSE(128) << 16;
}
/* Section 2: adjust read latency */
/* BIT 7:0 - ALLOWANCE_READ */
const uint32_t latency_read_offset[] = {
offsetof(MarikoMtcTable, la_scale_regs.mc_latency_allowance_hc_0),
offsetof(MarikoMtcTable, la_scale_regs.mc_latency_allowance_hc_1),
offsetof(MarikoMtcTable, la_scale_regs.mc_latency_allowance_gpu_0),
offsetof(MarikoMtcTable, la_scale_regs.mc_latency_allowance_gpu2_0),
offsetof(MarikoMtcTable, la_scale_regs.mc_latency_allowance_vic_0),
offsetof(MarikoMtcTable, la_scale_regs.mc_latency_allowance_vi2_0),
offsetof(MarikoMtcTable, la_scale_regs.mc_latency_allowance_isp2_1),
};
for (uint32_t i = 0; i < sizeof(latency_read_offset)/sizeof(uint32_t); i++)
{
uint32_t *latency = reinterpret_cast<uint32_t *>(reinterpret_cast<uint8_t *>(target_table) + latency_read_offset[i]);
uint8_t adjusted_latency = ADJUST_INVERSE(TRIM_BIT(*latency, 7,0));
CLEAR_BIT(*latency, 7,0)
*latency |= adjusted_latency;
}
}
/* PLLM and PLLMB control */
{
/*
* CLK_RST_CONTROLLER_PLLM_SS_CTRL1:
* BIT 31:16 : PLLM_SDM_SSC_MAX
* BIT 15:0 : PLLM_SDM_SSC_MIN
*
* CLK_RST_CONTROLLER_PLLM_SS_CTRL2:
* BIT 31:16 : PLLM_SDM_SSC_STEP
* BIT 15:0 : PLLM_SDM_DIN
*
* pllm(b)_ss_ctrl1:
* 1365, 342 (1600MHz)
* 0xFAAB, 0xF404 (1331MHz)
*
* pllm(b)_ss_ctrl2:
* 2, 1365 (1600MHz)
* 6, 0xFAAB (1331MHz)
*
* No need to care about this if Spread Spectrum (SS) is disabled
*/
// Disable PLL Spread Spectrum Control
table->pll_en_ssc = 0;
table->pllm_ss_cfg = 1 << 30;
}
/* EMC misc. configuration */
{
/* ? Command Trigger: MRW, MRW2: MRW_OP - [PMC] data to be written ?
*
* EMC_MRW: MRW_OP
* 1600 MHz: 0x54
* 1331 MHz: 0x44
* 1065 MHz: 0x34
* 800 MHz: 0x34
* 665 MHz: 0x14
* 408 MHz: 0x04
* 204 MHz: 0x04
*
* EMC_MRW2: MRW2_OP
* 1600 MHz: 0x2D 45 5*9
* 1331 MHz: 0x24 36 4*9
* 1065 MHz: 0x1B 27 3*9
* 800 MHz: 0x12 18 2*9
* 665 MHz: 0x09 9 1*9
* 408 MHz: 0x00
* 204 MHz: 0x00
*/
{
}
/* EMC_CFG_2 */
/* BIT 5:3 - ZQ_EXTRA_DELAY: 6(1600MHz), 5(1331.2MHz), max possible value: 7 */
{
CLEAR_BIT(target_table->emc_cfg_2, 5,3)
target_table->emc_cfg_2 |= 7 << 3;
}
}
}
#endif
}
#endif
/* Unlock the second sub-partition for retail Mariko, and double the bandwidth (~60GB/s)
* https://github.com/CTCaer/hekate/blob/01b6e645b3cb69ddf28cc9eff40c4b35bf03dbd4/bdk/mem/sdram.h#L30

View File

@@ -179,32 +179,18 @@ namespace ams::ldr {
/* Patch max GPU voltage on Mariko */
std::memcpy(reinterpret_cast<void *>(mapped_nso + pcv::GpuVoltageLimitOffsets[i]), &pcv::NewGpuVoltageLimit, sizeof(pcv::NewGpuVoltageLimit));
/* Calculate DIVM and DIVN (clock DIVisors) */
/* Assume oscillator (PLLMB_IN) is 38.4 MHz */
/* PLLMB_OUT = PLLMB_IN / DIVM * DIVN */
u32 divm = 1;
u32 divn = GetEmcClock() / 38400;
if (GetEmcClock() - divn * 38400 >= 38400 / 2) {
divm = 2;
divn = divn * 2 + 1;
}
if (i >= 2) {
for (u32 j = 0; j < sizeof(pcv::MtcTable_1600[i-2])/sizeof(u32); j++) {
pcv::MarikoMtcTable* mtc_table_new = reinterpret_cast<pcv::MarikoMtcTable *>(mapped_nso + pcv::MtcTable_1600[i-2][j]);
pcv::MarikoMtcTable* mtc_table_old = reinterpret_cast<pcv::MarikoMtcTable *>(mapped_nso + pcv::MtcTable_1600[i-2][j] - pcv::MtcTableOffset);
#ifdef REPLACE_1331
/* Replace 1331 MHz with 1600 MHz */
/* Replace 1331 MHz with 1600 MHz, not possible without proper timings for oc clock */
std::memcpy(reinterpret_cast<void *>(mtc_table_old), reinterpret_cast<void *>(mtc_table_new), sizeof(pcv::MarikoMtcTable));
#endif
/* Generate new table for OC MHz */
pcv::AdjustMtcTable(mtc_table_new);
/* Patch clock divisors */
mtc_table_1600->pllmb_divm = divm;
mtc_table_1600->pllmb_divn = divn;
pcv::AdjustMtcTable(mtc_table_new, mtc_table_old);
}
}