Skip to content
Snippets Groups Projects
sequencer.c 110 KiB
Newer Older
/*
 * Copyright Altera Corporation (C) 2012-2015
 *
 * SPDX-License-Identifier:    BSD-3-Clause
 */

#include <common.h>
#include <asm/io.h>
#include <asm/arch/sdram.h>
#include "sequencer.h"
#include "sequencer_auto.h"
#include "sequencer_auto_ac_init.h"
#include "sequencer_auto_inst_init.h"
#include "sequencer_defines.h"

static void scc_mgr_load_dqs_for_write_group(uint32_t write_group);

static struct socfpga_sdr_rw_load_manager *sdr_rw_load_mgr_regs =
	(struct socfpga_sdr_rw_load_manager *)(SDR_PHYGRP_RWMGRGRP_ADDRESS | 0x800);

static struct socfpga_sdr_rw_load_jump_manager *sdr_rw_load_jump_mgr_regs =
	(struct socfpga_sdr_rw_load_jump_manager *)(SDR_PHYGRP_RWMGRGRP_ADDRESS | 0xC00);

static struct socfpga_sdr_reg_file *sdr_reg_file =
	(struct socfpga_sdr_reg_file *)SDR_PHYGRP_REGFILEGRP_ADDRESS;

static struct socfpga_sdr_scc_mgr *sdr_scc_mgr =
	(struct socfpga_sdr_scc_mgr *)(SDR_PHYGRP_SCCGRP_ADDRESS | 0xe00);

static struct socfpga_phy_mgr_cmd *phy_mgr_cmd =
	(struct socfpga_phy_mgr_cmd *)SDR_PHYGRP_PHYMGRGRP_ADDRESS;

static struct socfpga_phy_mgr_cfg *phy_mgr_cfg =
	(struct socfpga_phy_mgr_cfg *)(SDR_PHYGRP_PHYMGRGRP_ADDRESS | 0x40);

static struct socfpga_data_mgr *data_mgr =
	(struct socfpga_data_mgr *)SDR_PHYGRP_DATAMGRGRP_ADDRESS;
static struct socfpga_sdr_ctrl *sdr_ctrl =
	(struct socfpga_sdr_ctrl *)SDR_CTRLGRP_ADDRESS;

#define DELTA_D		1

/*
 * In order to reduce ROM size, most of the selectable calibration steps are
 * decided at compile time based on the user's calibration mode selection,
 * as captured by the STATIC_CALIB_STEPS selection below.
 *
 * However, to support simulation-time selection of fast simulation mode, where
 * we skip everything except the bare minimum, we need a few of the steps to
 * be dynamic.  In those cases, we either use the DYNAMIC_CALIB_STEPS for the
 * check, which is based on the rtl-supplied value, or we dynamically compute
 * the value to use based on the dynamically-chosen calibration mode
 */

#define DLEVEL 0
#define STATIC_IN_RTL_SIM 0
#define STATIC_SKIP_DELAY_LOOPS 0

#define STATIC_CALIB_STEPS (STATIC_IN_RTL_SIM | CALIB_SKIP_FULL_TEST | \
	STATIC_SKIP_DELAY_LOOPS)

/* calibration steps requested by the rtl */
uint16_t dyn_calib_steps;

/*
 * To make CALIB_SKIP_DELAY_LOOPS a dynamic conditional option
 * instead of static, we use boolean logic to select between
 * non-skip and skip values
 *
 * The mask is set to include all bits when not-skipping, but is
 * zero when skipping
 */

uint16_t skip_delay_mask;	/* mask off bits when skipping/not-skipping */

#define SKIP_DELAY_LOOP_VALUE_OR_ZERO(non_skip_value) \
	((non_skip_value) & skip_delay_mask)

struct gbl_type *gbl;
struct param_type *param;
uint32_t curr_shadow_reg;

static uint32_t rw_mgr_mem_calibrate_write_test(uint32_t rank_bgn,
	uint32_t write_group, uint32_t use_dm,
	uint32_t all_correct, uint32_t *bit_chk, uint32_t all_ranks);

static void set_failing_group_stage(uint32_t group, uint32_t stage,
	uint32_t substage)
{
	/*
	 * Only set the global stage if there was not been any other
	 * failing group
	 */
	if (gbl->error_stage == CAL_STAGE_NIL)	{
		gbl->error_substage = substage;
		gbl->error_stage = stage;
		gbl->error_group = group;
	}
}

static void reg_file_set_group(uint32_t set_group)
{
	/* Read the current group and stage */
	uint32_t cur_stage_group = readl(&sdr_reg_file->cur_stage);

	/* Clear the group */
	cur_stage_group &= 0x0000FFFF;

	/* Set the group */
	cur_stage_group |= (set_group << 16);

	/* Write the data back */
	writel(cur_stage_group, &sdr_reg_file->cur_stage);
}

static void reg_file_set_stage(uint32_t set_stage)
{
	/* Read the current group and stage */
	uint32_t cur_stage_group = readl(&sdr_reg_file->cur_stage);

	/* Clear the stage and substage */
	cur_stage_group &= 0xFFFF0000;

	/* Set the stage */
	cur_stage_group |= (set_stage & 0x000000FF);

	/* Write the data back */
	writel(cur_stage_group, &sdr_reg_file->cur_stage);
}

static void reg_file_set_sub_stage(uint32_t set_sub_stage)
{
	/* Read the current group and stage */
	uint32_t cur_stage_group = readl(&sdr_reg_file->cur_stage);

	/* Clear the substage */
	cur_stage_group &= 0xFFFF00FF;

	/* Set the sub stage */
	cur_stage_group |= ((set_sub_stage << 8) & 0x0000FF00);

	/* Write the data back */
	writel(cur_stage_group, &sdr_reg_file->cur_stage);
}

static void initialize(void)
{
	debug("%s:%d\n", __func__, __LINE__);
	/* USER calibration has control over path to memory */
	/*
	 * In Hard PHY this is a 2-bit control:
	 * 0: AFI Mux Select
	 * 1: DDIO Mux Select
	 */
	writel(0x3, &phy_mgr_cfg->mux_sel);

	/* USER memory clock is not stable we begin initialization  */
	writel(0, &phy_mgr_cfg->reset_mem_stbl);

	/* USER calibration status all set to zero */
	writel(0, &phy_mgr_cfg->cal_status);
	writel(0, &phy_mgr_cfg->cal_debug_info);

	if ((dyn_calib_steps & CALIB_SKIP_ALL) != CALIB_SKIP_ALL) {
		param->read_correct_mask_vg  = ((uint32_t)1 <<
			(RW_MGR_MEM_DQ_PER_READ_DQS /
			RW_MGR_MEM_VIRTUAL_GROUPS_PER_READ_DQS)) - 1;
		param->write_correct_mask_vg = ((uint32_t)1 <<
			(RW_MGR_MEM_DQ_PER_READ_DQS /
			RW_MGR_MEM_VIRTUAL_GROUPS_PER_READ_DQS)) - 1;
		param->read_correct_mask     = ((uint32_t)1 <<
			RW_MGR_MEM_DQ_PER_READ_DQS) - 1;
		param->write_correct_mask    = ((uint32_t)1 <<
			RW_MGR_MEM_DQ_PER_WRITE_DQS) - 1;
		param->dm_correct_mask       = ((uint32_t)1 <<
			(RW_MGR_MEM_DATA_WIDTH / RW_MGR_MEM_DATA_MASK_WIDTH))
			- 1;
	}
}

static void set_rank_and_odt_mask(uint32_t rank, uint32_t odt_mode)
{
	uint32_t odt_mask_0 = 0;
	uint32_t odt_mask_1 = 0;
	uint32_t cs_and_odt_mask;

	if (odt_mode == RW_MGR_ODT_MODE_READ_WRITE) {
		if (RW_MGR_MEM_NUMBER_OF_RANKS == 1) {
			/*
			 * 1 Rank
			 * Read: ODT = 0
			 * Write: ODT = 1
			 */
			odt_mask_0 = 0x0;
			odt_mask_1 = 0x1;
		} else if (RW_MGR_MEM_NUMBER_OF_RANKS == 2) {
			/* 2 Ranks */
			if (RW_MGR_MEM_NUMBER_OF_CS_PER_DIMM == 1) {
				/* - Dual-Slot , Single-Rank
				 * (1 chip-select per DIMM)
				 * OR
				 * - RDIMM, 4 total CS (2 CS per DIMM)
				 * means 2 DIMM
				 * Since MEM_NUMBER_OF_RANKS is 2 they are
				 * both single rank
				 * with 2 CS each (special for RDIMM)
				 * Read: Turn on ODT on the opposite rank
				 * Write: Turn on ODT on all ranks
				 */
				odt_mask_0 = 0x3 & ~(1 << rank);
				odt_mask_1 = 0x3;
			} else {
				/*
				 * USER - Single-Slot , Dual-rank DIMMs
				 * (2 chip-selects per DIMM)
				 * USER Read: Turn on ODT off on all ranks
				 * USER Write: Turn on ODT on active rank
				 */
				odt_mask_0 = 0x0;
				odt_mask_1 = 0x3 & (1 << rank);
			}
			/* 4 Ranks
			 * Read:
			 * ----------+-----------------------+
			 *           |                       |
			 *           |         ODT           |
			 * Read From +-----------------------+
			 *   Rank    |  3  |  2  |  1  |  0  |
			 * ----------+-----+-----+-----+-----+
			 *     0     |  0  |  1  |  0  |  0  |
			 *     1     |  1  |  0  |  0  |  0  |
			 *     2     |  0  |  0  |  0  |  1  |
			 *     3     |  0  |  0  |  1  |  0  |
			 * ----------+-----+-----+-----+-----+
			 *
			 * Write:
			 * ----------+-----------------------+
			 *           |                       |
			 *           |         ODT           |
			 * Write To  +-----------------------+
			 *   Rank    |  3  |  2  |  1  |  0  |
			 * ----------+-----+-----+-----+-----+
			 *     0     |  0  |  1  |  0  |  1  |
			 *     1     |  1  |  0  |  1  |  0  |
			 *     2     |  0  |  1  |  0  |  1  |
			 *     3     |  1  |  0  |  1  |  0  |
			 * ----------+-----+-----+-----+-----+
			 */
			switch (rank) {
			case 0:
				odt_mask_0 = 0x4;
				odt_mask_1 = 0x5;
				break;
			case 1:
				odt_mask_0 = 0x8;
				odt_mask_1 = 0xA;
				break;
			case 2:
				odt_mask_0 = 0x1;
				odt_mask_1 = 0x5;
				break;
			case 3:
				odt_mask_0 = 0x2;
				odt_mask_1 = 0xA;
				break;
			}
		}
	} else {
		odt_mask_0 = 0x0;
		odt_mask_1 = 0x0;
	}

	cs_and_odt_mask =
		(0xFF & ~(1 << rank)) |
		((0xFF & odt_mask_0) << 8) |
		((0xFF & odt_mask_1) << 16);
	writel(cs_and_odt_mask, SDR_PHYGRP_RWMGRGRP_ADDRESS |
				RW_MGR_SET_CS_AND_ODT_MASK_OFFSET);
}

static void scc_mgr_initialize(void)
{
	u32 addr = SDR_PHYGRP_SCCGRP_ADDRESS | SCC_MGR_HHP_RFILE_OFFSET;

	/*
	 * Clear register file for HPS
	 * 16 (2^4) is the size of the full register file in the scc mgr:
	 *	RFILE_DEPTH = log2(MEM_DQ_PER_DQS + 1 + MEM_DM_PER_DQS +
	 * MEM_IF_READ_DQS_WIDTH - 1) + 1;
	 */
	uint32_t i;
	for (i = 0; i < 16; i++) {
		debug_cond(DLEVEL == 1, "%s:%d: Clearing SCC RFILE index %u\n",
			   __func__, __LINE__, i);
		writel(0, addr + (i << 2));
	}
}

static void scc_mgr_set_dqs_bus_in_delay(uint32_t read_group,
						uint32_t delay)
{
	u32 addr = SDR_PHYGRP_SCCGRP_ADDRESS | SCC_MGR_DQS_IN_DELAY_OFFSET;

	/* Load the setting in the SCC manager */
	writel(delay, addr + (read_group << 2));
}

static void scc_mgr_set_dqs_io_in_delay(uint32_t write_group,
	uint32_t delay)
{
	u32 addr = SDR_PHYGRP_SCCGRP_ADDRESS | SCC_MGR_IO_IN_DELAY_OFFSET;
	writel(delay, addr + (RW_MGR_MEM_DQ_PER_WRITE_DQS << 2));
}

static void scc_mgr_set_dqs_en_phase(uint32_t read_group, uint32_t phase)
{
	u32 addr = SDR_PHYGRP_SCCGRP_ADDRESS | SCC_MGR_DQS_EN_PHASE_OFFSET;

	/* Load the setting in the SCC manager */
	writel(phase, addr + (read_group << 2));
}

static void scc_mgr_set_dqs_en_phase_all_ranks(uint32_t read_group,
					       uint32_t phase)
{
	uint32_t r;
	uint32_t update_scan_chains;

	for (r = 0; r < RW_MGR_MEM_NUMBER_OF_RANKS;
	     r += NUM_RANKS_PER_SHADOW_REG) {
		/*
		 * USER although the h/w doesn't support different phases per
		 * shadow register, for simplicity our scc manager modeling
		 * keeps different phase settings per shadow reg, and it's
		 * important for us to keep them in sync to match h/w.
		 * for efficiency, the scan chain update should occur only
		 * once to sr0.
		 */
		update_scan_chains = (r == 0) ? 1 : 0;

		scc_mgr_set_dqs_en_phase(read_group, phase);

		if (update_scan_chains) {
			writel(read_group, &sdr_scc_mgr->dqs_ena);
			writel(0, &sdr_scc_mgr->update);
		}
	}
}

static void scc_mgr_set_dqdqs_output_phase(uint32_t write_group,
						  uint32_t phase)
{
	u32 addr = SDR_PHYGRP_SCCGRP_ADDRESS | SCC_MGR_DQDQS_OUT_PHASE_OFFSET;

	/* Load the setting in the SCC manager */
	writel(phase, addr + (write_group << 2));
}

static void scc_mgr_set_dqdqs_output_phase_all_ranks(uint32_t write_group,
						     uint32_t phase)
{
	uint32_t r;
	uint32_t update_scan_chains;

	for (r = 0; r < RW_MGR_MEM_NUMBER_OF_RANKS;
	     r += NUM_RANKS_PER_SHADOW_REG) {
		/*
		 * USER although the h/w doesn't support different phases per
		 * shadow register, for simplicity our scc manager modeling
		 * keeps different phase settings per shadow reg, and it's
		 * important for us to keep them in sync to match h/w.
		 * for efficiency, the scan chain update should occur only
		 * once to sr0.
		 */
		update_scan_chains = (r == 0) ? 1 : 0;

		scc_mgr_set_dqdqs_output_phase(write_group, phase);

		if (update_scan_chains) {
			writel(write_group, &sdr_scc_mgr->dqs_ena);
			writel(0, &sdr_scc_mgr->update);
		}
	}
}

static void scc_mgr_set_dqs_en_delay(uint32_t read_group, uint32_t delay)
{
	uint32_t addr = SDR_PHYGRP_SCCGRP_ADDRESS | SCC_MGR_DQS_EN_DELAY_OFFSET;

	/* Load the setting in the SCC manager */
	writel(delay + IO_DQS_EN_DELAY_OFFSET, addr +
	       (read_group << 2));
}

static void scc_mgr_set_dqs_en_delay_all_ranks(uint32_t read_group,
					       uint32_t delay)
{
	uint32_t r;

	for (r = 0; r < RW_MGR_MEM_NUMBER_OF_RANKS;
		r += NUM_RANKS_PER_SHADOW_REG) {
		scc_mgr_set_dqs_en_delay(read_group, delay);

		writel(read_group, &sdr_scc_mgr->dqs_ena);
		/*
		 * In shadow register mode, the T11 settings are stored in
		 * registers in the core, which are updated by the DQS_ENA
		 * signals. Not issuing the SCC_MGR_UPD command allows us to
		 * save lots of rank switching overhead, by calling
		 * select_shadow_regs_for_update with update_scan_chains
		 * set to 0.
		 */
		writel(0, &sdr_scc_mgr->update);
	}
	/*
	 * In shadow register mode, the T11 settings are stored in
	 * registers in the core, which are updated by the DQS_ENA
	 * signals. Not issuing the SCC_MGR_UPD command allows us to
	 * save lots of rank switching overhead, by calling
	 * select_shadow_regs_for_update with update_scan_chains
	 * set to 0.
	 */
	writel(0, &sdr_scc_mgr->update);
}

static void scc_mgr_set_oct_out1_delay(uint32_t write_group, uint32_t delay)
{
	uint32_t read_group;
	uint32_t addr = SDR_PHYGRP_SCCGRP_ADDRESS | SCC_MGR_OCT_OUT1_DELAY_OFFSET;

	/*
	 * Load the setting in the SCC manager
	 * Although OCT affects only write data, the OCT delay is controlled
	 * by the DQS logic block which is instantiated once per read group.
	 * For protocols where a write group consists of multiple read groups,
	 * the setting must be set multiple times.
	 */
	for (read_group = write_group * RW_MGR_MEM_IF_READ_DQS_WIDTH /
	     RW_MGR_MEM_IF_WRITE_DQS_WIDTH;
	     read_group < (write_group + 1) * RW_MGR_MEM_IF_READ_DQS_WIDTH /
	     RW_MGR_MEM_IF_WRITE_DQS_WIDTH; ++read_group)
		writel(delay, addr + (read_group << 2));
}

static void scc_mgr_set_dq_out1_delay(uint32_t write_group,
				      uint32_t dq_in_group, uint32_t delay)
{
	uint32_t addr = SDR_PHYGRP_SCCGRP_ADDRESS | SCC_MGR_IO_OUT1_DELAY_OFFSET;

	/* Load the setting in the SCC manager */
	writel(delay, addr + (dq_in_group << 2));
}

static void scc_mgr_set_dq_in_delay(uint32_t write_group,
	uint32_t dq_in_group, uint32_t delay)
{
	uint32_t addr = SDR_PHYGRP_SCCGRP_ADDRESS | SCC_MGR_IO_IN_DELAY_OFFSET;

	/* Load the setting in the SCC manager */
	writel(delay, addr + (dq_in_group << 2));
}

static void scc_mgr_set_hhp_extras(void)
{
	/*
	 * Load the fixed setting in the SCC manager
	 * bits: 0:0 = 1'b1   - dqs bypass
	 * bits: 1:1 = 1'b1   - dq bypass
	 * bits: 4:2 = 3'b001   - rfifo_mode
	 * bits: 6:5 = 2'b01  - rfifo clock_select
	 * bits: 7:7 = 1'b0  - separate gating from ungating setting
	 * bits: 8:8 = 1'b0  - separate OE from Output delay setting
	 */
	uint32_t value = (0<<8) | (0<<7) | (1<<5) | (1<<2) | (1<<1) | (1<<0);
	uint32_t addr = SDR_PHYGRP_SCCGRP_ADDRESS | SCC_MGR_HHP_GLOBALS_OFFSET;
	writel(value, addr + SCC_MGR_HHP_EXTRAS_OFFSET);
}

static void scc_mgr_set_dqs_out1_delay(uint32_t write_group,
					      uint32_t delay)
{
	uint32_t addr = SDR_PHYGRP_SCCGRP_ADDRESS | SCC_MGR_IO_OUT1_DELAY_OFFSET;

	/* Load the setting in the SCC manager */
	writel(delay, addr + (RW_MGR_MEM_DQ_PER_WRITE_DQS << 2));
}

static void scc_mgr_set_dm_out1_delay(uint32_t write_group,
					     uint32_t dm, uint32_t delay)
{
	uint32_t addr = SDR_PHYGRP_SCCGRP_ADDRESS | SCC_MGR_IO_OUT1_DELAY_OFFSET;

	/* Load the setting in the SCC manager */
		((RW_MGR_MEM_DQ_PER_WRITE_DQS + 1 + dm) << 2));
}

/*
 * USER Zero all DQS config
 * TODO: maybe rename to scc_mgr_zero_dqs_config (or something)
 */
static void scc_mgr_zero_all(void)
{
	uint32_t i, r;

	/*
	 * USER Zero all DQS config settings, across all groups and all
	 * shadow registers
	 */
	for (r = 0; r < RW_MGR_MEM_NUMBER_OF_RANKS; r +=
	     NUM_RANKS_PER_SHADOW_REG) {
		for (i = 0; i < RW_MGR_MEM_IF_READ_DQS_WIDTH; i++) {
			/*
			 * The phases actually don't exist on a per-rank basis,
			 * but there's no harm updating them several times, so
			 * let's keep the code simple.
			 */
			scc_mgr_set_dqs_bus_in_delay(i, IO_DQS_IN_RESERVE);
			scc_mgr_set_dqs_en_phase(i, 0);
			scc_mgr_set_dqs_en_delay(i, 0);
		}

		for (i = 0; i < RW_MGR_MEM_IF_WRITE_DQS_WIDTH; i++) {
			scc_mgr_set_dqdqs_output_phase(i, 0);
			/* av/cv don't have out2 */
			scc_mgr_set_oct_out1_delay(i, IO_DQS_OUT_RESERVE);
		}
	}

	/* multicast to all DQS group enables */
	writel(0xff, &sdr_scc_mgr->dqs_ena);
	writel(0, &sdr_scc_mgr->update);
}

static void scc_set_bypass_mode(uint32_t write_group, uint32_t mode)
{
	/* mode = 0 : Do NOT bypass - Half Rate Mode */
	/* mode = 1 : Bypass - Full Rate Mode */

	/* only need to set once for all groups, pins, dq, dqs, dm */
	if (write_group == 0) {
		debug_cond(DLEVEL == 1, "%s:%d Setting HHP Extras\n", __func__,
			   __LINE__);
		scc_mgr_set_hhp_extras();
		debug_cond(DLEVEL == 1, "%s:%d Done Setting HHP Extras\n",
			  __func__, __LINE__);
	}
	/* multicast to all DQ enables */
	writel(0xff, &sdr_scc_mgr->dq_ena);
	writel(0xff, &sdr_scc_mgr->dm_ena);

	/* update current DQS IO enable */
	writel(0, &sdr_scc_mgr->dqs_io_ena);

	/* update the DQS logic */
	writel(write_group, &sdr_scc_mgr->dqs_ena);
	writel(0, &sdr_scc_mgr->update);
}

static void scc_mgr_zero_group(uint32_t write_group, uint32_t test_begin,
			       int32_t out_only)
{
	uint32_t i, r;

	for (r = 0; r < RW_MGR_MEM_NUMBER_OF_RANKS; r +=
		NUM_RANKS_PER_SHADOW_REG) {
		/* Zero all DQ config settings */
		for (i = 0; i < RW_MGR_MEM_DQ_PER_WRITE_DQS; i++) {
			scc_mgr_set_dq_out1_delay(write_group, i, 0);
			if (!out_only)
				scc_mgr_set_dq_in_delay(write_group, i, 0);
		}

		/* multicast to all DQ enables */
		writel(0xff, &sdr_scc_mgr->dq_ena);

		/* Zero all DM config settings */
		for (i = 0; i < RW_MGR_NUM_DM_PER_WRITE_GROUP; i++) {
			scc_mgr_set_dm_out1_delay(write_group, i, 0);
		}

		/* multicast to all DM enables */
		writel(0xff, &sdr_scc_mgr->dm_ena);

		/* zero all DQS io settings */
		if (!out_only)
			scc_mgr_set_dqs_io_in_delay(write_group, 0);
		/* av/cv don't have out2 */
		scc_mgr_set_dqs_out1_delay(write_group, IO_DQS_OUT_RESERVE);
		scc_mgr_set_oct_out1_delay(write_group, IO_DQS_OUT_RESERVE);
		scc_mgr_load_dqs_for_write_group(write_group);

		/* multicast to all DQS IO enables (only 1) */
		writel(0, &sdr_scc_mgr->dqs_io_ena);

		/* hit update to zero everything */
		writel(0, &sdr_scc_mgr->update);
	}
}

/* load up dqs config settings */
static void scc_mgr_load_dqs(uint32_t dqs)
{
	writel(dqs, &sdr_scc_mgr->dqs_ena);
}

static void scc_mgr_load_dqs_for_write_group(uint32_t write_group)
{
	uint32_t read_group;
	uint32_t addr = (u32)&sdr_scc_mgr->dqs_ena;
	/*
	 * Although OCT affects only write data, the OCT delay is controlled
	 * by the DQS logic block which is instantiated once per read group.
	 * For protocols where a write group consists of multiple read groups,
	 * the setting must be scanned multiple times.
	 */
	for (read_group = write_group * RW_MGR_MEM_IF_READ_DQS_WIDTH /
	     RW_MGR_MEM_IF_WRITE_DQS_WIDTH;
	     read_group < (write_group + 1) * RW_MGR_MEM_IF_READ_DQS_WIDTH /
	     RW_MGR_MEM_IF_WRITE_DQS_WIDTH; ++read_group)
}

/* load up dqs io config settings */
static void scc_mgr_load_dqs_io(void)
{
	writel(0, &sdr_scc_mgr->dqs_io_ena);
}

/* load up dq config settings */
static void scc_mgr_load_dq(uint32_t dq_in_group)
{
	writel(dq_in_group, &sdr_scc_mgr->dq_ena);
}

/* load up dm config settings */
static void scc_mgr_load_dm(uint32_t dm)
{
	writel(dm, &sdr_scc_mgr->dm_ena);
}

/*
 * apply and load a particular input delay for the DQ pins in a group
 * group_bgn is the index of the first dq pin (in the write group)
 */
static void scc_mgr_apply_group_dq_in_delay(uint32_t write_group,
					    uint32_t group_bgn, uint32_t delay)
{
	uint32_t i, p;

	for (i = 0, p = group_bgn; i < RW_MGR_MEM_DQ_PER_READ_DQS; i++, p++) {
		scc_mgr_set_dq_in_delay(write_group, p, delay);
		scc_mgr_load_dq(p);
	}
}

/* apply and load a particular output delay for the DQ pins in a group */
static void scc_mgr_apply_group_dq_out1_delay(uint32_t write_group,
					      uint32_t group_bgn,
					      uint32_t delay1)
{
	uint32_t i, p;

	for (i = 0, p = group_bgn; i < RW_MGR_MEM_DQ_PER_WRITE_DQS; i++, p++) {
		scc_mgr_set_dq_out1_delay(write_group, i, delay1);
		scc_mgr_load_dq(i);
	}
}

/* apply and load a particular output delay for the DM pins in a group */
static void scc_mgr_apply_group_dm_out1_delay(uint32_t write_group,
					      uint32_t delay1)
{
	uint32_t i;

	for (i = 0; i < RW_MGR_NUM_DM_PER_WRITE_GROUP; i++) {
		scc_mgr_set_dm_out1_delay(write_group, i, delay1);
		scc_mgr_load_dm(i);
	}
}


/* apply and load delay on both DQS and OCT out1 */
static void scc_mgr_apply_group_dqs_io_and_oct_out1(uint32_t write_group,
						    uint32_t delay)
{
	scc_mgr_set_dqs_out1_delay(write_group, delay);
	scc_mgr_load_dqs_io();

	scc_mgr_set_oct_out1_delay(write_group, delay);
	scc_mgr_load_dqs_for_write_group(write_group);
}

/* apply a delay to the entire output side: DQ, DM, DQS, OCT */
static void scc_mgr_apply_group_all_out_delay_add(uint32_t write_group,
						  uint32_t group_bgn,
						  uint32_t delay)
{
	uint32_t i, p, new_delay;

	/* dq shift */
	for (i = 0, p = group_bgn; i < RW_MGR_MEM_DQ_PER_WRITE_DQS; i++, p++) {
		new_delay = READ_SCC_DQ_OUT2_DELAY;
		new_delay += delay;

		if (new_delay > IO_IO_OUT2_DELAY_MAX) {
			debug_cond(DLEVEL == 1, "%s:%d (%u, %u, %u) DQ[%u,%u]:\
				   %u > %lu => %lu", __func__, __LINE__,
				   write_group, group_bgn, delay, i, p, new_delay,
				   (long unsigned int)IO_IO_OUT2_DELAY_MAX,
				   (long unsigned int)IO_IO_OUT2_DELAY_MAX);
			new_delay = IO_IO_OUT2_DELAY_MAX;
		}

		scc_mgr_load_dq(i);
	}

	/* dm shift */
	for (i = 0; i < RW_MGR_NUM_DM_PER_WRITE_GROUP; i++) {
		new_delay = READ_SCC_DM_IO_OUT2_DELAY;
		new_delay += delay;

		if (new_delay > IO_IO_OUT2_DELAY_MAX) {
			debug_cond(DLEVEL == 1, "%s:%d (%u, %u, %u) DM[%u]:\
				   %u > %lu => %lu\n",  __func__, __LINE__,
				   write_group, group_bgn, delay, i, new_delay,
				   (long unsigned int)IO_IO_OUT2_DELAY_MAX,
				   (long unsigned int)IO_IO_OUT2_DELAY_MAX);
			new_delay = IO_IO_OUT2_DELAY_MAX;
		}

		scc_mgr_load_dm(i);
	}

	/* dqs shift */
	new_delay = READ_SCC_DQS_IO_OUT2_DELAY;
	new_delay += delay;

	if (new_delay > IO_IO_OUT2_DELAY_MAX) {
		debug_cond(DLEVEL == 1, "%s:%d (%u, %u, %u) DQS: %u > %d => %d;"
			   " adding %u to OUT1\n", __func__, __LINE__,
			   write_group, group_bgn, delay, new_delay,
			   IO_IO_OUT2_DELAY_MAX, IO_IO_OUT2_DELAY_MAX,
			   new_delay - IO_IO_OUT2_DELAY_MAX);
		scc_mgr_set_dqs_out1_delay(write_group, new_delay -
					   IO_IO_OUT2_DELAY_MAX);
		new_delay = IO_IO_OUT2_DELAY_MAX;
	}

	scc_mgr_load_dqs_io();

	/* oct shift */
	new_delay = READ_SCC_OCT_OUT2_DELAY;
	new_delay += delay;

	if (new_delay > IO_IO_OUT2_DELAY_MAX) {
		debug_cond(DLEVEL == 1, "%s:%d (%u, %u, %u) DQS: %u > %d => %d;"
			   " adding %u to OUT1\n", __func__, __LINE__,
			   write_group, group_bgn, delay, new_delay,
			   IO_IO_OUT2_DELAY_MAX, IO_IO_OUT2_DELAY_MAX,
			   new_delay - IO_IO_OUT2_DELAY_MAX);
		scc_mgr_set_oct_out1_delay(write_group, new_delay -
					   IO_IO_OUT2_DELAY_MAX);
		new_delay = IO_IO_OUT2_DELAY_MAX;
	}

	scc_mgr_load_dqs_for_write_group(write_group);
}

/*
 * USER apply a delay to the entire output side (DQ, DM, DQS, OCT)
 * and to all ranks
 */
static void scc_mgr_apply_group_all_out_delay_add_all_ranks(
	uint32_t write_group, uint32_t group_bgn, uint32_t delay)
{
	uint32_t r;

	for (r = 0; r < RW_MGR_MEM_NUMBER_OF_RANKS;
		r += NUM_RANKS_PER_SHADOW_REG) {
		scc_mgr_apply_group_all_out_delay_add(write_group,
						      group_bgn, delay);
		writel(0, &sdr_scc_mgr->update);
	}
}

/* optimization used to recover some slots in ddr3 inst_rom */
/* could be applied to other protocols if we wanted to */
static void set_jump_as_return(void)
{
	/*
	 * to save space, we replace return with jump to special shared
	 * RETURN instruction so we set the counter to large value so that
	 * we always jump
	 */
	writel(0xff, &sdr_rw_load_mgr_regs->load_cntr0);
	writel(RW_MGR_RETURN, &sdr_rw_load_jump_mgr_regs->load_jump_add0);
}

/*
 * should always use constants as argument to ensure all computations are
 * performed at compile time
 */
static void delay_for_n_mem_clocks(const uint32_t clocks)
{
	uint32_t afi_clocks;
	uint8_t inner = 0;
	uint8_t outer = 0;
	uint16_t c_loop = 0;

	debug("%s:%d: clocks=%u ... start\n", __func__, __LINE__, clocks);


	afi_clocks = (clocks + AFI_RATE_RATIO-1) / AFI_RATE_RATIO;
	/* scale (rounding up) to get afi clocks */

	/*
	 * Note, we don't bother accounting for being off a little bit
	 * because of a few extra instructions in outer loops
	 * Note, the loops have a test at the end, and do the test before
	 * the decrement, and so always perform the loop
	 * 1 time more than the counter value
	 */
	if (afi_clocks == 0) {
		;
	} else if (afi_clocks <= 0x100) {
		inner = afi_clocks-1;
		outer = 0;
		c_loop = 0;
	} else if (afi_clocks <= 0x10000) {
		inner = 0xff;
		outer = (afi_clocks-1) >> 8;
		c_loop = 0;
	} else {
		inner = 0xff;
		outer = 0xff;
		c_loop = (afi_clocks-1) >> 16;
	}

	/*
	 * rom instructions are structured as follows:
	 *
	 *    IDLE_LOOP2: jnz cntr0, TARGET_A
	 *    IDLE_LOOP1: jnz cntr1, TARGET_B
	 *                return
	 *
	 * so, when doing nested loops, TARGET_A is set to IDLE_LOOP2, and
	 * TARGET_B is set to IDLE_LOOP2 as well
	 *
	 * if we have no outer loop, though, then we can use IDLE_LOOP1 only,
	 * and set TARGET_B to IDLE_LOOP1 and we skip IDLE_LOOP2 entirely
	 *
	 * a little confusing, but it helps save precious space in the inst_rom
	 * and sequencer rom and keeps the delays more accurate and reduces
	 * overhead
	 */
	if (afi_clocks <= 0x100) {
		writel(SKIP_DELAY_LOOP_VALUE_OR_ZERO(inner),
			&sdr_rw_load_mgr_regs->load_cntr1);
		writel(RW_MGR_IDLE_LOOP1,
			&sdr_rw_load_jump_mgr_regs->load_jump_add1);
		writel(RW_MGR_IDLE_LOOP1, SDR_PHYGRP_RWMGRGRP_ADDRESS |
					  RW_MGR_RUN_SINGLE_GROUP_OFFSET);
		writel(SKIP_DELAY_LOOP_VALUE_OR_ZERO(inner),
			&sdr_rw_load_mgr_regs->load_cntr0);
		writel(SKIP_DELAY_LOOP_VALUE_OR_ZERO(outer),
			&sdr_rw_load_mgr_regs->load_cntr1);
		writel(RW_MGR_IDLE_LOOP2,
			&sdr_rw_load_jump_mgr_regs->load_jump_add0);
		writel(RW_MGR_IDLE_LOOP2,
			&sdr_rw_load_jump_mgr_regs->load_jump_add1);

		/* hack to get around compiler not being smart enough */
		if (afi_clocks <= 0x10000) {
			
Loading
Loading full blame...