Skip to content
Snippets Groups Projects
sequencer.c 107 KiB
Newer Older
  • Learn to ignore specific revisions
  • 						IO_IO_IN_DELAY_MAX + 1) {
    							left_edge[i] = -(d + 1);
    						}
    					} else {
    						/* d = 0 failed, but it passed
    						when testing the left edge,
    						so it must be marginal,
    						set it to -1 */
    						if (right_edge[i] ==
    							IO_IO_IN_DELAY_MAX + 1 &&
    							left_edge[i] !=
    							IO_IO_IN_DELAY_MAX
    							+ 1) {
    							right_edge[i] = -1;
    						}
    						/* If a right edge has not been
    						seen yet, then a future passing
    						test will mark this edge as the
    						left edge */
    						else if (right_edge[i] ==
    							IO_IO_IN_DELAY_MAX +
    							1) {
    							left_edge[i] = -(d + 1);
    						}
    					}
    				}
    
    				debug_cond(DLEVEL == 2, "%s:%d vfifo_center[r,\
    					   d=%u]: ", __func__, __LINE__, d);
    				debug_cond(DLEVEL == 2, "bit_chk_test=%d left_edge[%u]: %d ",
    					   (int)(bit_chk & 1), i, left_edge[i]);
    				debug_cond(DLEVEL == 2, "right_edge[%u]: %d\n", i,
    					   right_edge[i]);
    				bit_chk = bit_chk >> 1;
    			}
    		}
    	}
    
    	/* Check that all bits have a window */
    	for (i = 0; i < RW_MGR_MEM_DQ_PER_READ_DQS; i++) {
    		debug_cond(DLEVEL == 2, "%s:%d vfifo_center: left_edge[%u]: \
    			   %d right_edge[%u]: %d", __func__, __LINE__,
    			   i, left_edge[i], i, right_edge[i]);
    		if ((left_edge[i] == IO_IO_IN_DELAY_MAX + 1) || (right_edge[i]
    			== IO_IO_IN_DELAY_MAX + 1)) {
    			/*
    			 * Restore delay chain settings before letting the loop
    			 * in rw_mgr_mem_calibrate_vfifo to retry different
    			 * dqs/ck relationships.
    			 */
    			scc_mgr_set_dqs_bus_in_delay(read_group, start_dqs);
    			if (IO_SHIFT_DQS_EN_WHEN_SHIFT_DQS) {
    				scc_mgr_set_dqs_en_delay(read_group,
    							 start_dqs_en);
    			}
    			scc_mgr_load_dqs(read_group);
    
    			writel(0, &sdr_scc_mgr->update);
    
    
    			debug_cond(DLEVEL == 1, "%s:%d vfifo_center: failed to \
    				   find edge [%u]: %d %d", __func__, __LINE__,
    				   i, left_edge[i], right_edge[i]);
    			if (use_read_test) {
    				set_failing_group_stage(read_group *
    					RW_MGR_MEM_DQ_PER_READ_DQS + i,
    					CAL_STAGE_VFIFO,
    					CAL_SUBSTAGE_VFIFO_CENTER);
    			} else {
    				set_failing_group_stage(read_group *
    					RW_MGR_MEM_DQ_PER_READ_DQS + i,
    					CAL_STAGE_VFIFO_AFTER_WRITES,
    					CAL_SUBSTAGE_VFIFO_CENTER);
    			}
    			return 0;
    		}
    	}
    
    	/* Find middle of window for each DQ bit */
    	mid_min = left_edge[0] - right_edge[0];
    	min_index = 0;
    	for (i = 1; i < RW_MGR_MEM_DQ_PER_READ_DQS; i++) {
    		mid = left_edge[i] - right_edge[i];
    		if (mid < mid_min) {
    			mid_min = mid;
    			min_index = i;
    		}
    	}
    
    	/*
    	 * -mid_min/2 represents the amount that we need to move DQS.
    	 * If mid_min is odd and positive we'll need to add one to
    	 * make sure the rounding in further calculations is correct
    	 * (always bias to the right), so just add 1 for all positive values.
    	 */
    	if (mid_min > 0)
    		mid_min++;
    
    	mid_min = mid_min / 2;
    
    	debug_cond(DLEVEL == 1, "%s:%d vfifo_center: mid_min=%d (index=%u)\n",
    		   __func__, __LINE__, mid_min, min_index);
    
    	/* Determine the amount we can change DQS (which is -mid_min) */
    	orig_mid_min = mid_min;
    	new_dqs = start_dqs - mid_min;
    	if (new_dqs > IO_DQS_IN_DELAY_MAX)
    		new_dqs = IO_DQS_IN_DELAY_MAX;
    	else if (new_dqs < 0)
    		new_dqs = 0;
    
    	mid_min = start_dqs - new_dqs;
    	debug_cond(DLEVEL == 1, "vfifo_center: new mid_min=%d new_dqs=%d\n",
    		   mid_min, new_dqs);
    
    	if (IO_SHIFT_DQS_EN_WHEN_SHIFT_DQS) {
    		if (start_dqs_en - mid_min > IO_DQS_EN_DELAY_MAX)
    			mid_min += start_dqs_en - mid_min - IO_DQS_EN_DELAY_MAX;
    		else if (start_dqs_en - mid_min < 0)
    			mid_min += start_dqs_en - mid_min;
    	}
    	new_dqs = start_dqs - mid_min;
    
    	debug_cond(DLEVEL == 1, "vfifo_center: start_dqs=%d start_dqs_en=%d \
    		   new_dqs=%d mid_min=%d\n", start_dqs,
    		   IO_SHIFT_DQS_EN_WHEN_SHIFT_DQS ? start_dqs_en : -1,
    		   new_dqs, mid_min);
    
    	/* Initialize data for export structures */
    	dqs_margin = IO_IO_IN_DELAY_MAX + 1;
    	dq_margin  = IO_IO_IN_DELAY_MAX + 1;
    
    	/* add delay to bring centre of all DQ windows to the same "level" */
    	for (i = 0, p = test_bgn; i < RW_MGR_MEM_DQ_PER_READ_DQS; i++, p++) {
    		/* Use values before divide by 2 to reduce round off error */
    		shift_dq = (left_edge[i] - right_edge[i] -
    			(left_edge[min_index] - right_edge[min_index]))/2  +
    			(orig_mid_min - mid_min);
    
    		debug_cond(DLEVEL == 2, "vfifo_center: before: \
    			   shift_dq[%u]=%d\n", i, shift_dq);
    
    
    		addr = SDR_PHYGRP_SCCGRP_ADDRESS | SCC_MGR_IO_IN_DELAY_OFFSET;
    
    		temp_dq_in_delay1 = readl(addr + (p << 2));
    		temp_dq_in_delay2 = readl(addr + (i << 2));
    
    
    		if (shift_dq + (int32_t)temp_dq_in_delay1 >
    			(int32_t)IO_IO_IN_DELAY_MAX) {
    			shift_dq = (int32_t)IO_IO_IN_DELAY_MAX - temp_dq_in_delay2;
    		} else if (shift_dq + (int32_t)temp_dq_in_delay1 < 0) {
    			shift_dq = -(int32_t)temp_dq_in_delay1;
    		}
    		debug_cond(DLEVEL == 2, "vfifo_center: after: \
    			   shift_dq[%u]=%d\n", i, shift_dq);
    		final_dq[i] = temp_dq_in_delay1 + shift_dq;
    
    		scc_mgr_set_dq_in_delay(p, final_dq[i]);
    
    		scc_mgr_load_dq(p);
    
    		debug_cond(DLEVEL == 2, "vfifo_center: margin[%u]=[%d,%d]\n", i,
    			   left_edge[i] - shift_dq + (-mid_min),
    			   right_edge[i] + shift_dq - (-mid_min));
    		/* To determine values for export structures */
    		if (left_edge[i] - shift_dq + (-mid_min) < dq_margin)
    			dq_margin = left_edge[i] - shift_dq + (-mid_min);
    
    		if (right_edge[i] + shift_dq - (-mid_min) < dqs_margin)
    			dqs_margin = right_edge[i] + shift_dq - (-mid_min);
    	}
    
    	final_dqs = new_dqs;
    	if (IO_SHIFT_DQS_EN_WHEN_SHIFT_DQS)
    		final_dqs_en = start_dqs_en - mid_min;
    
    	/* Move DQS-en */
    	if (IO_SHIFT_DQS_EN_WHEN_SHIFT_DQS) {
    		scc_mgr_set_dqs_en_delay(read_group, final_dqs_en);
    		scc_mgr_load_dqs(read_group);
    	}
    
    	/* Move DQS */
    	scc_mgr_set_dqs_bus_in_delay(read_group, final_dqs);
    	scc_mgr_load_dqs(read_group);
    	debug_cond(DLEVEL == 2, "%s:%d vfifo_center: dq_margin=%d \
    		   dqs_margin=%d", __func__, __LINE__,
    		   dq_margin, dqs_margin);
    
    	/*
    	 * Do not remove this line as it makes sure all of our decisions
    	 * have been applied. Apply the update bit.
    	 */
    
    	writel(0, &sdr_scc_mgr->update);
    
    
    	return (dq_margin >= 0) && (dqs_margin >= 0);
    }
    
    
    /**
     * rw_mgr_mem_calibrate_guaranteed_write() - Perform guaranteed write into the device
     * @rw_group:	Read/Write Group
     * @phase:	DQ/DQS phase
     *
     * Because initially no communication ca be reliably performed with the memory
     * device, the sequencer uses a guaranteed write mechanism to write data into
     * the memory device.
     */
    static int rw_mgr_mem_calibrate_guaranteed_write(const u32 rw_group,
    						 const u32 phase)
    {
    	int ret;
    
    	/* Set a particular DQ/DQS phase. */
    	scc_mgr_set_dqdqs_output_phase_all_ranks(rw_group, phase);
    
    	debug_cond(DLEVEL == 1, "%s:%d guaranteed write: g=%u p=%u\n",
    		   __func__, __LINE__, rw_group, phase);
    
    	/*
    	 * Altera EMI_RM 2015.05.04 :: Figure 1-25
    	 * Load up the patterns used by read calibration using the
    	 * current DQDQS phase.
    	 */
    	rw_mgr_mem_calibrate_read_load_patterns(0, 1);
    
    	if (gbl->phy_debug_mode_flags & PHY_DEBUG_DISABLE_GUARANTEED_READ)
    		return 0;
    
    	/*
    	 * Altera EMI_RM 2015.05.04 :: Figure 1-26
    	 * Back-to-Back reads of the patterns used for calibration.
    	 */
    
    	ret = rw_mgr_mem_calibrate_read_test_patterns(0, rw_group, 1);
    	if (ret)
    
    		debug_cond(DLEVEL == 1,
    			   "%s:%d Guaranteed read test failed: g=%u p=%u\n",
    			   __func__, __LINE__, rw_group, phase);
    
    /**
     * rw_mgr_mem_calibrate_dqs_enable_calibration() - DQS Enable Calibration
     * @rw_group:	Read/Write Group
     * @test_bgn:	Rank at which the test begins
     *
     * DQS enable calibration ensures reliable capture of the DQ signal without
     * glitches on the DQS line.
     */
    static int rw_mgr_mem_calibrate_dqs_enable_calibration(const u32 rw_group,
    						       const u32 test_bgn)
    {
    	int ret;
    
    	/*
    	 * Altera EMI_RM 2015.05.04 :: Figure 1-27
    	 * DQS and DQS Eanble Signal Relationships.
    	 */
    	ret = rw_mgr_mem_calibrate_vfifo_find_dqs_en_phase_sweep_dq_in_delay(
    
    /**
     * rw_mgr_mem_calibrate_dq_dqs_centering() - Centering DQ/DQS
     * @rw_group:		Read/Write Group
     * @test_bgn:		Rank at which the test begins
     * @use_read_test:	Perform a read test
     * @update_fom:		Update FOM
     *
     * The centerin DQ/DQS stage attempts to align DQ and DQS signals on reads
     * within a group.
     */
    static int
    rw_mgr_mem_calibrate_dq_dqs_centering(const u32 rw_group, const u32 test_bgn,
    				      const int use_read_test,
    				      const int update_fom)
    
    {
    	int ret, grp_calibrated;
    	u32 rank_bgn, sr;
    
    	/*
    	 * Altera EMI_RM 2015.05.04 :: Figure 1-28
    	 * Read per-bit deskew can be done on a per shadow register basis.
    	 */
    	grp_calibrated = 1;
    	for (rank_bgn = 0, sr = 0;
    	     rank_bgn < RW_MGR_MEM_NUMBER_OF_RANKS;
    	     rank_bgn += NUM_RANKS_PER_SHADOW_REG, sr++) {
    		/* Check if this set of ranks should be skipped entirely. */
    		if (param->skip_shadow_regs[sr])
    			continue;
    
    		ret = rw_mgr_mem_calibrate_vfifo_center(rank_bgn, rw_group,
    							rw_group, test_bgn,
    							use_read_test,
    							update_fom);
    		if (ret)
    			continue;
    
    		grp_calibrated = 0;
    	}
    
    	if (!grp_calibrated)
    		return -EIO;
    
    	return 0;
    }
    
    
    /**
     * rw_mgr_mem_calibrate_vfifo() - Calibrate the read valid prediction FIFO
     * @rw_group:		Read/Write Group
     * @test_bgn:		Rank at which the test begins
     *
     * Stage 1: Calibrate the read valid prediction FIFO.
     *
     * This function implements UniPHY calibration Stage 1, as explained in
     * detail in Altera EMI_RM 2015.05.04 , "UniPHY Calibration Stages".
    
     * - read valid prediction will consist of finding:
     *   - DQS enable phase and DQS enable delay (DQS Enable Calibration)
     *   - DQS input phase  and DQS input delay (DQ/DQS Centering)
    
     *  - we also do a per-bit deskew on the DQ lines.
     */
    
    static int rw_mgr_mem_calibrate_vfifo(const u32 rw_group, const u32 test_bgn)
    
    	uint32_t dtaps_per_ptap;
    	uint32_t failed_substage;
    
    
    	debug("%s:%d: %u %u\n", __func__, __LINE__, rw_group, test_bgn);
    
    	/* Update info for sims */
    	reg_file_set_group(rw_group);
    
    	reg_file_set_stage(CAL_STAGE_VFIFO);
    
    	reg_file_set_sub_stage(CAL_SUBSTAGE_GUARANTEED_READ);
    
    	failed_substage = CAL_SUBSTAGE_GUARANTEED_READ;
    
    	/* USER Determine number of delay taps for each phase tap. */
    
    	dtaps_per_ptap = DIV_ROUND_UP(IO_DELAY_PER_OPA_TAP,
    				      IO_DELAY_PER_DQS_EN_DCHAIN_TAP) - 1;
    
    	for (d = 0; d <= dtaps_per_ptap; d += 2) {
    
    		/*
    		 * In RLDRAMX we may be messing the delay of pins in
    
    		 * the same write rw_group but outside of the current read
    		 * the rw_group, but that's ok because we haven't calibrated
    
    			scc_mgr_apply_group_all_out_delay_add_all_ranks(
    
    		for (p = 0; p <= IO_DQDQS_OUT_PHASE_MAX; p++) {
    
    			/* 1) Guaranteed Write */
    			ret = rw_mgr_mem_calibrate_guaranteed_write(rw_group, p);
    			if (ret)
    				break;
    
    			/* 2) DQS Enable Calibration */
    			ret = rw_mgr_mem_calibrate_dqs_enable_calibration(rw_group,
    									  test_bgn);
    			if (ret) {
    
    				failed_substage = CAL_SUBSTAGE_DQS_EN_PHASE;
    
    			 * If doing read after write calibration, do not update
    			 * FOM now. Do it then.
    
    			ret = rw_mgr_mem_calibrate_dq_dqs_centering(rw_group,
    								test_bgn, 1, 0);
    			if (ret) {
    
    				failed_substage = CAL_SUBSTAGE_VFIFO_CENTER;
    
    	/* Calibration Stage 1 failed. */
    
    	set_failing_group_stage(rw_group, CAL_STAGE_VFIFO, failed_substage);
    
    	/* Calibration Stage 1 completed OK. */
    cal_done_ok:
    
    	/*
    	 * Reset the delay chains back to zero if they have moved > 1
    	 * (check for > 1 because loop will increase d even when pass in
    	 * first case).
    	 */
    	if (d > 2)
    
    		scc_mgr_zero_group(rw_group, 1);
    
    
    	return 1;
    }
    
    /* VFIFO Calibration -- Read Deskew Calibration after write deskew */
    static uint32_t rw_mgr_mem_calibrate_vfifo_end(uint32_t read_group,
    					       uint32_t test_bgn)
    {
    	uint32_t rank_bgn, sr;
    	uint32_t grp_calibrated;
    	uint32_t write_group;
    
    	debug("%s:%d %u %u", __func__, __LINE__, read_group, test_bgn);
    
    	/* update info for sims */
    
    	reg_file_set_stage(CAL_STAGE_VFIFO_AFTER_WRITES);
    	reg_file_set_sub_stage(CAL_SUBSTAGE_VFIFO_CENTER);
    
    	write_group = read_group;
    
    	/* update info for sims */
    	reg_file_set_group(read_group);
    
    	grp_calibrated = 1;
    	/* Read per-bit deskew can be done on a per shadow register basis */
    	for (rank_bgn = 0, sr = 0; rank_bgn < RW_MGR_MEM_NUMBER_OF_RANKS;
    		rank_bgn += NUM_RANKS_PER_SHADOW_REG, ++sr) {
    		/* Determine if this set of ranks should be skipped entirely */
    		if (!param->skip_shadow_regs[sr]) {
    		/* This is the last calibration round, update FOM here */
    			if (!rw_mgr_mem_calibrate_vfifo_center(rank_bgn,
    								write_group,
    								read_group,
    								test_bgn, 0,
    								1)) {
    				grp_calibrated = 0;
    			}
    		}
    	}
    
    
    	if (grp_calibrated == 0) {
    		set_failing_group_stage(write_group,
    					CAL_STAGE_VFIFO_AFTER_WRITES,
    					CAL_SUBSTAGE_VFIFO_CENTER);
    		return 0;
    	}
    
    	return 1;
    }
    
    /* Calibrate LFIFO to find smallest read latency */
    static uint32_t rw_mgr_mem_calibrate_lfifo(void)
    {
    	uint32_t found_one;
    	uint32_t bit_chk;
    
    	debug("%s:%d\n", __func__, __LINE__);
    
    	/* update info for sims */
    	reg_file_set_stage(CAL_STAGE_LFIFO);
    	reg_file_set_sub_stage(CAL_SUBSTAGE_READ_LATENCY);
    
    	/* Load up the patterns used by read calibration for all ranks */
    	rw_mgr_mem_calibrate_read_load_patterns(0, 1);
    	found_one = 0;
    
    	do {
    
    		writel(gbl->curr_read_lat, &phy_mgr_cfg->phy_rlat);
    
    		debug_cond(DLEVEL == 2, "%s:%d lfifo: read_lat=%u",
    			   __func__, __LINE__, gbl->curr_read_lat);
    
    		if (!rw_mgr_mem_calibrate_read_test_all_ranks(0,
    							      NUM_READ_TESTS,
    							      PASS_ALL_BITS,
    							      &bit_chk, 1)) {
    			break;
    		}
    
    		found_one = 1;
    		/* reduce read latency and see if things are working */
    		/* correctly */
    		gbl->curr_read_lat--;
    	} while (gbl->curr_read_lat > 0);
    
    	/* reset the fifos to get pointers to known state */
    
    
    	writel(0, &phy_mgr_cmd->fifo_reset);
    
    
    	if (found_one) {
    		/* add a fudge factor to the read latency that was determined */
    		gbl->curr_read_lat += 2;
    
    		writel(gbl->curr_read_lat, &phy_mgr_cfg->phy_rlat);
    
    		debug_cond(DLEVEL == 2, "%s:%d lfifo: success: using \
    			   read_lat=%u\n", __func__, __LINE__,
    			   gbl->curr_read_lat);
    		return 1;
    	} else {
    		set_failing_group_stage(0xff, CAL_STAGE_LFIFO,
    					CAL_SUBSTAGE_READ_LATENCY);
    
    		debug_cond(DLEVEL == 2, "%s:%d lfifo: failed at initial \
    			   read_lat=%u\n", __func__, __LINE__,
    			   gbl->curr_read_lat);
    		return 0;
    	}
    }
    
    /*
     * issue write test command.
     * two variants are provided. one that just tests a write pattern and
     * another that tests datamask functionality.
     */
    static void rw_mgr_mem_calibrate_write_test_issue(uint32_t group,
    						  uint32_t test_dm)
    {
    	uint32_t mcc_instruction;
    	uint32_t quick_write_mode = (((STATIC_CALIB_STEPS) & CALIB_SKIP_WRITES) &&
    		ENABLE_SUPER_QUICK_CALIBRATION);
    	uint32_t rw_wl_nop_cycles;
    	uint32_t addr;
    
    	/*
    	 * Set counter and jump addresses for the right
    	 * number of NOP cycles.
    	 * The number of supported NOP cycles can range from -1 to infinity
    	 * Three different cases are handled:
    	 *
    	 * 1. For a number of NOP cycles greater than 0, the RW Mgr looping
    	 *    mechanism will be used to insert the right number of NOPs
    	 *
    	 * 2. For a number of NOP cycles equals to 0, the micro-instruction
    	 *    issuing the write command will jump straight to the
    	 *    micro-instruction that turns on DQS (for DDRx), or outputs write
    	 *    data (for RLD), skipping
    	 *    the NOP micro-instruction all together
    	 *
    	 * 3. A number of NOP cycles equal to -1 indicates that DQS must be
    	 *    turned on in the same micro-instruction that issues the write
    	 *    command. Then we need
    	 *    to directly jump to the micro-instruction that sends out the data
    	 *
    	 * NOTE: Implementing this mechanism uses 2 RW Mgr jump-counters
    	 *       (2 and 3). One jump-counter (0) is used to perform multiple
    	 *       write-read operations.
    	 *       one counter left to issue this command in "multiple-group" mode
    	 */
    
    	rw_wl_nop_cycles = gbl->rw_wl_nop_cycles;
    
    	if (rw_wl_nop_cycles == -1) {
    		/*
    		 * CNTR 2 - We want to execute the special write operation that
    		 * turns on DQS right away and then skip directly to the
    		 * instruction that sends out the data. We set the counter to a
    		 * large number so that the jump is always taken.
    		 */
    
    		writel(0xFF, &sdr_rw_load_mgr_regs->load_cntr2);
    
    
    		/* CNTR 3 - Not used */
    		if (test_dm) {
    			mcc_instruction = RW_MGR_LFSR_WR_RD_DM_BANK_0_WL_1;
    			writel(RW_MGR_LFSR_WR_RD_DM_BANK_0_DATA,
    
    			       &sdr_rw_load_jump_mgr_regs->load_jump_add2);
    
    			writel(RW_MGR_LFSR_WR_RD_DM_BANK_0_NOP,
    
    			       &sdr_rw_load_jump_mgr_regs->load_jump_add3);
    
    		} else {
    			mcc_instruction = RW_MGR_LFSR_WR_RD_BANK_0_WL_1;
    
    			writel(RW_MGR_LFSR_WR_RD_BANK_0_DATA,
    				&sdr_rw_load_jump_mgr_regs->load_jump_add2);
    			writel(RW_MGR_LFSR_WR_RD_BANK_0_NOP,
    				&sdr_rw_load_jump_mgr_regs->load_jump_add3);
    
    		}
    	} else if (rw_wl_nop_cycles == 0) {
    		/*
    		 * CNTR 2 - We want to skip the NOP operation and go straight
    		 * to the DQS enable instruction. We set the counter to a large
    		 * number so that the jump is always taken.
    		 */
    
    		writel(0xFF, &sdr_rw_load_mgr_regs->load_cntr2);
    
    
    		/* CNTR 3 - Not used */
    		if (test_dm) {
    			mcc_instruction = RW_MGR_LFSR_WR_RD_DM_BANK_0;
    			writel(RW_MGR_LFSR_WR_RD_DM_BANK_0_DQS,
    
    			       &sdr_rw_load_jump_mgr_regs->load_jump_add2);
    
    		} else {
    			mcc_instruction = RW_MGR_LFSR_WR_RD_BANK_0;
    
    			writel(RW_MGR_LFSR_WR_RD_BANK_0_DQS,
    				&sdr_rw_load_jump_mgr_regs->load_jump_add2);
    
    		}
    	} else {
    		/*
    		 * CNTR 2 - In this case we want to execute the next instruction
    		 * and NOT take the jump. So we set the counter to 0. The jump
    		 * address doesn't count.
    		 */
    
    		writel(0x0, &sdr_rw_load_mgr_regs->load_cntr2);
    		writel(0x0, &sdr_rw_load_jump_mgr_regs->load_jump_add2);
    
    
    		/*
    		 * CNTR 3 - Set the nop counter to the number of cycles we
    		 * need to loop for, minus 1.
    		 */
    
    		writel(rw_wl_nop_cycles - 1, &sdr_rw_load_mgr_regs->load_cntr3);
    
    		if (test_dm) {
    			mcc_instruction = RW_MGR_LFSR_WR_RD_DM_BANK_0;
    
    			writel(RW_MGR_LFSR_WR_RD_DM_BANK_0_NOP,
    				&sdr_rw_load_jump_mgr_regs->load_jump_add3);
    
    		} else {
    			mcc_instruction = RW_MGR_LFSR_WR_RD_BANK_0;
    
    			writel(RW_MGR_LFSR_WR_RD_BANK_0_NOP,
    				&sdr_rw_load_jump_mgr_regs->load_jump_add3);
    
    	writel(0, SDR_PHYGRP_RWMGRGRP_ADDRESS |
    		  RW_MGR_RESET_READ_DATAPATH_OFFSET);
    
    		writel(0x08, &sdr_rw_load_mgr_regs->load_cntr0);
    
    		writel(0x40, &sdr_rw_load_mgr_regs->load_cntr0);
    
    	writel(mcc_instruction, &sdr_rw_load_jump_mgr_regs->load_jump_add0);
    
    
    	/*
    	 * CNTR 1 - This is used to ensure enough time elapses
    	 * for read data to come back.
    	 */
    
    	writel(0x30, &sdr_rw_load_mgr_regs->load_cntr1);
    
    		writel(RW_MGR_LFSR_WR_RD_DM_BANK_0_WAIT,
    			&sdr_rw_load_jump_mgr_regs->load_jump_add1);
    
    		writel(RW_MGR_LFSR_WR_RD_BANK_0_WAIT,
    			&sdr_rw_load_jump_mgr_regs->load_jump_add1);
    
    	addr = SDR_PHYGRP_RWMGRGRP_ADDRESS | RW_MGR_RUN_SINGLE_GROUP_OFFSET;
    
    	writel(mcc_instruction, addr + (group << 2));
    
    }
    
    /* Test writes, can check for a single bit pass or multiple bit pass */
    static uint32_t rw_mgr_mem_calibrate_write_test(uint32_t rank_bgn,
    	uint32_t write_group, uint32_t use_dm, uint32_t all_correct,
    	uint32_t *bit_chk, uint32_t all_ranks)
    {
    	uint32_t r;
    	uint32_t correct_mask_vg;
    	uint32_t tmp_bit_chk;
    	uint32_t vg;
    	uint32_t rank_end = all_ranks ? RW_MGR_MEM_NUMBER_OF_RANKS :
    		(rank_bgn + NUM_RANKS_PER_SHADOW_REG);
    	uint32_t addr_rw_mgr;
    	uint32_t base_rw_mgr;
    
    	*bit_chk = param->write_correct_mask;
    	correct_mask_vg = param->write_correct_mask_vg;
    
    	for (r = rank_bgn; r < rank_end; r++) {
    		if (param->skip_ranks[r]) {
    			/* request to skip the rank */
    			continue;
    		}
    
    		/* set rank */
    		set_rank_and_odt_mask(r, RW_MGR_ODT_MODE_READ_WRITE);
    
    		tmp_bit_chk = 0;
    
    		addr_rw_mgr = SDR_PHYGRP_RWMGRGRP_ADDRESS;
    
    		for (vg = RW_MGR_MEM_VIRTUAL_GROUPS_PER_WRITE_DQS-1; ; vg--) {
    			/* reset the fifos to get pointers to known state */
    
    			writel(0, &phy_mgr_cmd->fifo_reset);
    
    
    			tmp_bit_chk = tmp_bit_chk <<
    				(RW_MGR_MEM_DQ_PER_WRITE_DQS /
    				RW_MGR_MEM_VIRTUAL_GROUPS_PER_WRITE_DQS);
    			rw_mgr_mem_calibrate_write_test_issue(write_group *
    				RW_MGR_MEM_VIRTUAL_GROUPS_PER_WRITE_DQS+vg,
    				use_dm);
    
    
    			base_rw_mgr = readl(addr_rw_mgr);
    
    			tmp_bit_chk = tmp_bit_chk | (correct_mask_vg & ~(base_rw_mgr));
    			if (vg == 0)
    				break;
    		}
    		*bit_chk &= tmp_bit_chk;
    	}
    
    	if (all_correct) {
    		set_rank_and_odt_mask(0, RW_MGR_ODT_MODE_OFF);
    		debug_cond(DLEVEL == 2, "write_test(%u,%u,ALL) : %u == \
    			   %u => %lu", write_group, use_dm,
    			   *bit_chk, param->write_correct_mask,
    			   (long unsigned int)(*bit_chk ==
    			   param->write_correct_mask));
    		return *bit_chk == param->write_correct_mask;
    	} else {
    		set_rank_and_odt_mask(0, RW_MGR_ODT_MODE_OFF);
    		debug_cond(DLEVEL == 2, "write_test(%u,%u,ONE) : %u != ",
    		       write_group, use_dm, *bit_chk);
    		debug_cond(DLEVEL == 2, "%lu" " => %lu", (long unsigned int)0,
    			(long unsigned int)(*bit_chk != 0));
    		return *bit_chk != 0x00;
    	}
    }
    
    /*
     * center all windows. do per-bit-deskew to possibly increase size of
     * certain windows.
     */
    static uint32_t rw_mgr_mem_calibrate_writes_center(uint32_t rank_bgn,
    	uint32_t write_group, uint32_t test_bgn)
    {
    	uint32_t i, p, min_index;
    	int32_t d;
    	/*
    	 * Store these as signed since there are comparisons with
    	 * signed numbers.
    	 */
    	uint32_t bit_chk;
    	uint32_t sticky_bit_chk;
    	int32_t left_edge[RW_MGR_MEM_DQ_PER_WRITE_DQS];
    	int32_t right_edge[RW_MGR_MEM_DQ_PER_WRITE_DQS];
    	int32_t mid;
    	int32_t mid_min, orig_mid_min;
    	int32_t new_dqs, start_dqs, shift_dq;
    	int32_t dq_margin, dqs_margin, dm_margin;
    	uint32_t stop;
    	uint32_t temp_dq_out1_delay;
    	uint32_t addr;
    
    	debug("%s:%d %u %u", __func__, __LINE__, write_group, test_bgn);
    
    	dm_margin = 0;
    
    
    	addr = SDR_PHYGRP_SCCGRP_ADDRESS | SCC_MGR_IO_OUT1_DELAY_OFFSET;
    
    			  (RW_MGR_MEM_DQ_PER_WRITE_DQS << 2));
    
    	/* per-bit deskew */
    
    	/*
    	 * set the left and right edge of each bit to an illegal value
    	 * use (IO_IO_OUT1_DELAY_MAX + 1) as an illegal value.
    	 */
    	sticky_bit_chk = 0;
    	for (i = 0; i < RW_MGR_MEM_DQ_PER_WRITE_DQS; i++) {
    		left_edge[i]  = IO_IO_OUT1_DELAY_MAX + 1;
    		right_edge[i] = IO_IO_OUT1_DELAY_MAX + 1;
    	}
    
    	/* Search for the left edge of the window for each bit */
    	for (d = 0; d <= IO_IO_OUT1_DELAY_MAX; d++) {
    
    		scc_mgr_apply_group_dq_out1_delay(write_group, d);
    
    		writel(0, &sdr_scc_mgr->update);
    
    
    		/*
    		 * Stop searching when the read test doesn't pass AND when
    		 * we've seen a passing read on every bit.
    		 */
    		stop = !rw_mgr_mem_calibrate_write_test(rank_bgn, write_group,
    			0, PASS_ONE_BIT, &bit_chk, 0);
    		sticky_bit_chk = sticky_bit_chk | bit_chk;
    		stop = stop && (sticky_bit_chk == param->write_correct_mask);
    		debug_cond(DLEVEL == 2, "write_center(left): dtap=%d => %u \
    			   == %u && %u [bit_chk= %u ]\n",
    			d, sticky_bit_chk, param->write_correct_mask,
    			stop, bit_chk);
    
    		if (stop == 1) {
    			break;
    		} else {
    			for (i = 0; i < RW_MGR_MEM_DQ_PER_WRITE_DQS; i++) {
    				if (bit_chk & 1) {
    					/*
    					 * Remember a passing test as the
    					 * left_edge.
    					 */
    					left_edge[i] = d;
    				} else {
    					/*
    					 * If a left edge has not been seen
    					 * yet, then a future passing test will
    					 * mark this edge as the right edge.
    					 */
    					if (left_edge[i] ==
    						IO_IO_OUT1_DELAY_MAX + 1) {
    						right_edge[i] = -(d + 1);
    					}
    				}
    				debug_cond(DLEVEL == 2, "write_center[l,d=%d):", d);
    				debug_cond(DLEVEL == 2, "bit_chk_test=%d left_edge[%u]: %d",
    					   (int)(bit_chk & 1), i, left_edge[i]);
    				debug_cond(DLEVEL == 2, "right_edge[%u]: %d\n", i,
    				       right_edge[i]);
    				bit_chk = bit_chk >> 1;
    			}
    		}
    	}
    
    	/* Reset DQ delay chains to 0 */
    
    	scc_mgr_apply_group_dq_out1_delay(0);
    
    	sticky_bit_chk = 0;
    	for (i = RW_MGR_MEM_DQ_PER_WRITE_DQS - 1;; i--) {
    		debug_cond(DLEVEL == 2, "%s:%d write_center: left_edge[%u]: \
    			   %d right_edge[%u]: %d\n", __func__, __LINE__,
    			   i, left_edge[i], i, right_edge[i]);
    
    		/*
    		 * Check for cases where we haven't found the left edge,
    		 * which makes our assignment of the the right edge invalid.
    		 * Reset it to the illegal value.
    		 */
    		if ((left_edge[i] == IO_IO_OUT1_DELAY_MAX + 1) &&
    		    (right_edge[i] != IO_IO_OUT1_DELAY_MAX + 1)) {
    			right_edge[i] = IO_IO_OUT1_DELAY_MAX + 1;
    			debug_cond(DLEVEL == 2, "%s:%d write_center: reset \
    				   right_edge[%u]: %d\n", __func__, __LINE__,
    				   i, right_edge[i]);
    		}
    
    		/*
    		 * Reset sticky bit (except for bits where we have
    		 * seen the left edge).
    		 */
    		sticky_bit_chk = sticky_bit_chk << 1;
    		if ((left_edge[i] != IO_IO_OUT1_DELAY_MAX + 1))
    			sticky_bit_chk = sticky_bit_chk | 1;
    
    		if (i == 0)
    			break;
    	}
    
    	/* Search for the right edge of the window for each bit */
    	for (d = 0; d <= IO_IO_OUT1_DELAY_MAX - start_dqs; d++) {
    		scc_mgr_apply_group_dqs_io_and_oct_out1(write_group,
    							d + start_dqs);
    
    
    		writel(0, &sdr_scc_mgr->update);
    
    
    		/*
    		 * Stop searching when the read test doesn't pass AND when
    		 * we've seen a passing read on every bit.
    		 */
    		stop = !rw_mgr_mem_calibrate_write_test(rank_bgn, write_group,
    			0, PASS_ONE_BIT, &bit_chk, 0);
    
    		sticky_bit_chk = sticky_bit_chk | bit_chk;
    		stop = stop && (sticky_bit_chk == param->write_correct_mask);
    
    		debug_cond(DLEVEL == 2, "write_center (right): dtap=%u => %u == \
    			   %u && %u\n", d, sticky_bit_chk,
    			   param->write_correct_mask, stop);
    
    		if (stop == 1) {
    			if (d == 0) {
    				for (i = 0; i < RW_MGR_MEM_DQ_PER_WRITE_DQS;
    					i++) {
    					/* d = 0 failed, but it passed when
    					testing the left edge, so it must be
    					marginal, set it to -1 */
    					if (right_edge[i] ==
    						IO_IO_OUT1_DELAY_MAX + 1 &&
    						left_edge[i] !=
    						IO_IO_OUT1_DELAY_MAX + 1) {
    						right_edge[i] = -1;
    					}
    				}
    			}
    			break;
    		} else {
    			for (i = 0; i < RW_MGR_MEM_DQ_PER_WRITE_DQS; i++) {
    				if (bit_chk & 1) {
    					/*
    					 * Remember a passing test as
    					 * the right_edge.
    					 */
    					right_edge[i] = d;
    				} else {
    					if (d != 0) {
    						/*
    						 * If a right edge has not
    						 * been seen yet, then a future
    						 * passing test will mark this
    						 * edge as the left edge.
    						 */
    						if (right_edge[i] ==
    						    IO_IO_OUT1_DELAY_MAX + 1)
    							left_edge[i] = -(d + 1);
    					} else {
    						/*
    						 * d = 0 failed, but it passed
    						 * when testing the left edge,
    						 * so it must be marginal, set
    						 * it to -1.
    						 */
    						if (right_edge[i] ==
    						    IO_IO_OUT1_DELAY_MAX + 1 &&
    						    left_edge[i] !=
    						    IO_IO_OUT1_DELAY_MAX + 1)
    							right_edge[i] = -1;
    						/*
    						 * If a right edge has not been
    						 * seen yet, then a future
    						 * passing test will mark this
    						 * edge as the left edge.
    						 */
    						else if (right_edge[i] ==
    							IO_IO_OUT1_DELAY_MAX +
    							1)
    							left_edge[i] = -(d + 1);
    					}
    				}
    				debug_cond(DLEVEL == 2, "write_center[r,d=%d):", d);
    				debug_cond(DLEVEL == 2, "bit_chk_test=%d left_edge[%u]: %d",
    					   (int)(bit_chk & 1), i, left_edge[i]);
    				debug_cond(DLEVEL == 2, "right_edge[%u]: %d\n", i,
    					   right_edge[i]);
    				bit_chk = bit_chk >> 1;
    			}
    		}
    	}
    
    	/* Check that all bits have a window */
    	for (i = 0; i < RW_MGR_MEM_DQ_PER_WRITE_DQS; i++) {
    		debug_cond(DLEVEL == 2, "%s:%d write_center: left_edge[%u]: \
    			   %d right_edge[%u]: %d", __func__, __LINE__,
    			   i, left_edge[i], i, right_edge[i]);
    		if ((left_edge[i] == IO_IO_OUT1_DELAY_MAX + 1) ||
    		    (right_edge[i] == IO_IO_OUT1_DELAY_MAX + 1)) {
    			set_failing_group_stage(test_bgn + i,
    						CAL_STAGE_WRITES,
    						CAL_SUBSTAGE_WRITES_CENTER);
    			return 0;
    		}
    	}
    
    	/* Find middle of window for each DQ bit */
    	mid_min = left_edge[0] - right_edge[0];
    	min_index = 0;
    	for (i = 1; i < RW_MGR_MEM_DQ_PER_WRITE_DQS; i++) {
    		mid = left_edge[i] - right_edge[i];
    		if (mid < mid_min) {
    			mid_min = mid;
    			min_index = i;
    		}
    	}
    
    	/*
    	 * -mid_min/2 represents the amount that we need to move DQS.
    	 * If mid_min is odd and positive we'll need to add one to
    	 * make sure the rounding in further calculations is correct
    	 * (always bias to the right), so just add 1 for all positive values.
    	 */
    	if (mid_min > 0)
    		mid_min++;
    	mid_min = mid_min / 2;
    	debug_cond(DLEVEL == 1, "%s:%d write_center: mid_min=%d\n", __func__,
    		   __LINE__, mid_min);
    
    	/* Determine the amount we can change DQS (which is -mid_min) */
    	orig_mid_min = mid_min;
    	new_dqs = start_dqs;
    	mid_min = 0;
    	debug_cond(DLEVEL == 1, "%s:%d write_center: start_dqs=%d new_dqs=%d \
    		   mid_min=%d\n", __func__, __LINE__, start_dqs, new_dqs, mid_min);
    	/* Initialize data for export structures */
    	dqs_margin = IO_IO_OUT1_DELAY_MAX + 1;
    	dq_margin  = IO_IO_OUT1_DELAY_MAX + 1;
    
    	/* add delay to bring centre of all DQ windows to the same "level" */
    	for (i = 0, p = test_bgn; i < RW_MGR_MEM_DQ_PER_WRITE_DQS; i++, p++) {
    		/* Use values before divide by 2 to reduce round off error */
    		shift_dq = (left_edge[i] - right_edge[i] -
    			(left_edge[min_index] - right_edge[min_index]))/2  +
    		(orig_mid_min - mid_min);
    
    		debug_cond(DLEVEL == 2, "%s:%d write_center: before: shift_dq \
    			   [%u]=%d\n", __func__, __LINE__, i, shift_dq);
    
    
    		addr = SDR_PHYGRP_SCCGRP_ADDRESS | SCC_MGR_IO_OUT1_DELAY_OFFSET;
    
    		temp_dq_out1_delay = readl(addr + (i << 2));
    
    		if (shift_dq + (int32_t)temp_dq_out1_delay >
    			(int32_t)IO_IO_OUT1_DELAY_MAX) {
    			shift_dq = (int32_t)IO_IO_OUT1_DELAY_MAX - temp_dq_out1_delay;
    		} else if (shift_dq + (int32_t)temp_dq_out1_delay < 0) {
    			shift_dq = -(int32_t)temp_dq_out1_delay;
    		}
    		debug_cond(DLEVEL == 2, "write_center: after: shift_dq[%u]=%d\n",
    			   i, shift_dq);
    
    		scc_mgr_set_dq_out1_delay(i, temp_dq_out1_delay + shift_dq);
    
    		scc_mgr_load_dq(i);
    
    		debug_cond(DLEVEL == 2, "write_center: margin[%u]=[%d,%d]\n", i,
    			   left_edge[i] - shift_dq + (-mid_min),
    			   right_edge[i] + shift_dq - (-mid_min));
    		/* To determine values for export structures */
    		if (left_edge[i] - shift_dq + (-mid_min) < dq_margin)
    			dq_margin = left_edge[i] - shift_dq + (-mid_min);