Hi Simon, On Thu, Feb 5, 2015 at 12:24 AM, Simon Glass <s...@chromium.org> wrote: > Hi Bin, > > On 3 February 2015 at 04:45, Bin Meng <bmeng...@gmail.com> wrote: >> Add various utility codes needed for Quark MRC. >> >> Signed-off-by: Bin Meng <bmeng...@gmail.com> >> >> --- >> There are 12 checkpatch warnings in this patch, which are: >> >> warning: arch/x86/cpu/quark/mrc_util.c,1446: Too many leading tabs - >> consider code refactoring >> warning: arch/x86/cpu/quark/mrc_util.c,1450: line over 80 characters >> ... >> >> Fixing 'Too many leading tabs ...' will be very dangerous, as I don't have >> all the details on how Intel's MRC codes are actually written to play with >> the hardware. Trying to refactor them may lead to a non-working MRC codes. >> For the 'line over 80 characters' issue, we have to leave them as is now >> due to the 'Too many leading tabs ...', sigh. > > The code looks fine for the most part - I only have nits. > > I'm not keen on BIT though. See my comments and what improvements you > can make. It would be great to drop BIT.
I found it is hard to replace BIT to something meaningful, as lots of registers are undocumented. > Re the debug macros, I suppose they are OK to keep. U-Boot doesn't > have the concept of debug() for different categories or levels of > verbosity. Yep, maybe we can enhance U-Boot's debug() in the future. >> >> arch/x86/cpu/quark/hte.c | 398 +++++++++++ >> arch/x86/cpu/quark/hte.h | 44 ++ >> arch/x86/cpu/quark/mrc_util.c | 1499 >> +++++++++++++++++++++++++++++++++++++++++ >> arch/x86/cpu/quark/mrc_util.h | 153 +++++ >> 4 files changed, 2094 insertions(+) >> create mode 100644 arch/x86/cpu/quark/hte.c >> create mode 100644 arch/x86/cpu/quark/hte.h >> create mode 100644 arch/x86/cpu/quark/mrc_util.c >> create mode 100644 arch/x86/cpu/quark/mrc_util.h >> >> diff --git a/arch/x86/cpu/quark/hte.c b/arch/x86/cpu/quark/hte.c >> new file mode 100644 >> index 0000000..d813c9c >> --- /dev/null >> +++ b/arch/x86/cpu/quark/hte.c >> @@ -0,0 +1,398 @@ >> +/* >> + * Copyright (C) 2013, Intel Corporation >> + * Copyright (C) 2015, Bin Meng <bmeng...@gmail.com> >> + * >> + * Ported from Intel released Quark UEFI BIOS >> + * QuarkSocPkg/QuarkNorthCluster/MemoryInit/Pei/ > > Remove training slash? Fixed globally. >> + * >> + * SPDX-License-Identifier: Intel >> + */ >> + >> +#include <common.h> >> +#include <asm/arch/mrc.h> >> +#include <asm/arch/msg_port.h> >> +#include "mrc_util.h" >> +#include "hte.h" >> + >> +/** >> + * This function enables HTE to detect all possible errors for > > s/This function// globally > > I'd suggest present tense, like "enable HTE to detect all possible errors > for... Fixed globally. >> + * the given training parameters (per-bit or full byte lane). >> + */ >> +static void hte_enable_all_errors(void) >> +{ >> + msg_port_write(HTE, 0x000200A2, 0xFFFFFFFF); >> + msg_port_write(HTE, 0x000200A3, 0x000000FF); >> + msg_port_write(HTE, 0x000200A4, 0x00000000); > > Lower case hex again. All of Intel's MRC codes are using upper case, to keep it consistent (or maybe I don't want to replace every place to lower case ..) I chose not to fix these lower case, and just leave them as is now. >> +} >> + >> +/** >> + * This function goes and reads the HTE register in order to find any error >> + * >> + * @return: The errors detected in the HTE status register >> + */ >> +static u32 hte_check_errors(void) >> +{ >> + return msg_port_read(HTE, 0x000200A7); >> +} >> + >> +/** >> + * This function waits until HTE finishes >> + */ >> +static void hte_wait_for_complete(void) >> +{ >> + u32 tmp; >> + >> + ENTERFN(); >> + >> + do {} while ((msg_port_read(HTE, 0x00020012) & BIT30) != 0); >> + >> + tmp = msg_port_read(HTE, 0x00020011); >> + tmp |= BIT9; >> + tmp &= ~(BIT12 | BIT13); >> + msg_port_write(HTE, 0x00020011, tmp); >> + >> + LEAVEFN(); >> +} >> + >> +/** >> + * This function clears registers related with errors in the HTE >> + */ >> +static void hte_clear_error_regs(void) >> +{ >> + u32 tmp; >> + >> + /* >> + * Clear all HTE errors and enable error checking >> + * for burst and chunk. >> + */ >> + tmp = msg_port_read(HTE, 0x000200A1); >> + tmp |= BIT8; >> + msg_port_write(HTE, 0x000200A1, tmp); >> +} >> + >> +/** >> + * This function executes basic single cache line memory write/read/verify >> + * test using simple constant pattern, different for READ_RAIN and > > REAS_TRAIN? Fixed globally. >> + * WRITE_TRAIN modes. >> + * >> + * See hte_basic_write_read() which is external visible wrapper. > > the external (fix below also) > >> + * >> + * @mrc_params: host struture for all MRC global data >> + * @addr: memory adress being tested (must hit specific channel/rank) >> + * @first_run: if set then hte registers are configured, otherwise it is > > the hte? Changed to 'the HTE' >> + * assumed configuration is done and just re-run the test > > assumed configuration is done the we just re-run the test > , > (fix below also) Fixed. >> + * @mode: READ_TRAIN or WRITE_TRAIN (the difference is in the pattern) >> + * >> + * @return: byte lane failure on each bit (for Quark only bit0 and bit1) >> + */ >> +static u16 hte_basic_data_cmp(struct mrc_params *mrc_params, u32 addr, >> + u8 first_run, u8 mode) >> +{ >> + u32 pattern; >> + u32 offset; >> + >> + if (first_run) { >> + msg_port_write(HTE, 0x00020020, 0x01B10021); >> + msg_port_write(HTE, 0x00020021, 0x06000000); >> + msg_port_write(HTE, 0x00020022, addr >> 6); >> + msg_port_write(HTE, 0x00020062, 0x00800015); >> + msg_port_write(HTE, 0x00020063, 0xAAAAAAAA); >> + msg_port_write(HTE, 0x00020064, 0xCCCCCCCC); >> + msg_port_write(HTE, 0x00020065, 0xF0F0F0F0); >> + msg_port_write(HTE, 0x00020061, 0x00030008); >> + >> + if (mode == WRITE_TRAIN) >> + pattern = 0xC33C0000; >> + else /* READ_TRAIN */ >> + pattern = 0xAA5555AA; >> + >> + for (offset = 0x80; offset <= 0x8F; offset++) >> + msg_port_write(HTE, offset, pattern); >> + } >> + >> + msg_port_write(HTE, 0x000200A1, 0xFFFF1000); >> + msg_port_write(HTE, 0x00020011, 0x00011000); >> + msg_port_write(HTE, 0x00020011, 0x00011100); >> + >> + hte_wait_for_complete(); >> + >> + /* >> + * Return bits 15:8 of HTE_CH0_ERR_XSTAT to check for >> + * any bytelane errors. >> + */ >> + return (hte_check_errors() >> 8) & 0xFF; >> +} >> + >> +/** >> + * This function examines single cache line memory with write/read/verify >> + * test using multiple data patterns (victim-aggressor algorithm). >> + * >> + * See hte_write_stress_bit_lanes() which is external visible wrapper. >> + * >> + * @mrc_params: host struture for all MRC global data > > structure Fixed globally. >> + * @addr: memory adress being tested (must hit specific channel/rank) >> + * @loop_cnt: number of test iterations >> + * @seed_victim: victim data pattern seed >> + * @seed_aggressor: aggressor data pattern seed >> + * @victim_bit: should be 0 as auto rotate feature is in use > > auto-rotate Fixed. >> + * @first_run: if set then hte registers are configured, otherwise it is > > Actually I wonder if HTE would be better than hte, which looks like a > 'the' typo, particularly if you leave out 'the'. Also can you please > comment at the top of the file (first function) what HTE stands for)? Changed to 'the HTE'. Intel doc only mentions MTE (Memory Training Engine), and its registers are undocumented. I am not sure if this HTE means MTE in Intel's doc. So I don't add any comment for HTE. >> + * assumed configuration is done and just re-run the test >> + * >> + * @return: byte lane failure on each bit (for Quark only bit0 and bit1) >> + */ >> +static u16 hte_rw_data_cmp(struct mrc_params *mrc_params, u32 addr, >> + u8 loop_cnt, u32 seed_victim, u32 seed_aggressor, >> + u8 victim_bit, u8 first_run) >> +{ >> + u32 offset; >> + u32 tmp; >> + >> + if (first_run) { >> + msg_port_write(HTE, 0x00020020, 0x00910024); >> + msg_port_write(HTE, 0x00020023, 0x00810024); >> + msg_port_write(HTE, 0x00020021, 0x06070000); >> + msg_port_write(HTE, 0x00020024, 0x06070000); >> + msg_port_write(HTE, 0x00020022, addr >> 6); >> + msg_port_write(HTE, 0x00020025, addr >> 6); >> + msg_port_write(HTE, 0x00020062, 0x0000002A); >> + msg_port_write(HTE, 0x00020063, seed_victim); >> + msg_port_write(HTE, 0x00020064, seed_aggressor); >> + msg_port_write(HTE, 0x00020065, seed_victim); >> + >> + /* >> + * Write the pattern buffers to select the victim bit >> + * >> + * Start with bit0 >> + */ >> + for (offset = 0x80; offset <= 0x8F; offset++) { >> + if ((offset % 8) == victim_bit) >> + msg_port_write(HTE, offset, 0x55555555); >> + else >> + msg_port_write(HTE, offset, 0xCCCCCCCC); >> + } >> + >> + msg_port_write(HTE, 0x00020061, 0x00000000); >> + msg_port_write(HTE, 0x00020066, 0x03440000); >> + msg_port_write(HTE, 0x000200A1, 0xFFFF1000); >> + } >> + >> + tmp = 0x10001000 | (loop_cnt << 16); >> + msg_port_write(HTE, 0x00020011, tmp); >> + msg_port_write(HTE, 0x00020011, tmp | BIT8); >> + >> + hte_wait_for_complete(); >> + >> + /* >> + * Return bits 15:8 of HTE_CH0_ERR_XSTAT to check for >> + * any bytelane errors. >> + */ >> + return (hte_check_errors() >> 8) & 0xFF; >> +} >> + >> +/** >> + * This function uses HW HTE engine to initialize or test all memory >> attached >> + * to a given DUNIT. If flag is MRC_MEM_INIT, this routine writes 0s to all >> + * memory locations to initialize ECC. If flag is MRC_MEM_TEST, this routine >> + * will send an 5AA55AA5 pattern to all memory locations on the RankMask and >> + * then read it back. Then it sends an A55AA55A pattern to all memory >> locations >> + * on the RankMask and reads it back. >> + * >> + * @mrc_params: host struture for all MRC global data >> + * @flag: MRC_MEM_INIT or MRC_MEM_TEST >> + * >> + * @return: errors register showing HTE failures. Also prints out which rank >> + * failed the HTE test if failure occurs. For rank detection to >> work, >> + * the address map must be left in its default state. If MRC >> changes >> + * the address map, this function must be modified to change it >> back >> + * to default at the beginning, then restore it at the end. >> + */ >> +u32 hte_mem_init(struct mrc_params *mrc_params, u8 flag) >> +{ >> + u32 offset; >> + int test_num; >> + int i; >> + >> + /* >> + * Clear out the error registers at the start of each memory >> + * init or memory test run. >> + */ >> + hte_clear_error_regs(); >> + >> + msg_port_write(HTE, 0x00020062, 0x00000015); >> + >> + for (offset = 0x80; offset <= 0x8F; offset++) >> + msg_port_write(HTE, offset, ((offset & 1) ? 0xA55A : >> 0x5AA5)); >> + >> + msg_port_write(HTE, 0x00020021, 0x00000000); >> + msg_port_write(HTE, 0x00020022, (mrc_params->mem_size >> 6) - 1); >> + msg_port_write(HTE, 0x00020063, 0xAAAAAAAA); >> + msg_port_write(HTE, 0x00020064, 0xCCCCCCCC); >> + msg_port_write(HTE, 0x00020065, 0xF0F0F0F0); >> + msg_port_write(HTE, 0x00020066, 0x03000000); >> + >> + switch (flag) { >> + case MRC_MEM_INIT: >> + /* >> + * Only 1 write pass through memory is needed >> + * to initialize ECC >> + */ >> + test_num = 1; >> + break; >> + case MRC_MEM_TEST: >> + /* Write/read then write/read with inverted pattern */ >> + test_num = 4; >> + break; >> + default: >> + DPF(D_INFO, "Unknown parameter for flag: %d\n", flag); >> + return 0xFFFFFFFF; >> + } >> + >> + DPF(D_INFO, "hte_mem_init"); > > debug() Keep to use DPF(). >> + >> + for (i = 0; i < test_num; i++) { >> + DPF(D_INFO, "."); >> + >> + if (i == 0) { >> + msg_port_write(HTE, 0x00020061, 0x00000000); >> + msg_port_write(HTE, 0x00020020, 0x00110010); >> + } else if (i == 1) { >> + msg_port_write(HTE, 0x00020061, 0x00000000); >> + msg_port_write(HTE, 0x00020020, 0x00010010); >> + } else if (i == 2) { >> + msg_port_write(HTE, 0x00020061, 0x00010100); >> + msg_port_write(HTE, 0x00020020, 0x00110010); >> + } else { >> + msg_port_write(HTE, 0x00020061, 0x00010100); >> + msg_port_write(HTE, 0x00020020, 0x00010010); >> + } >> + >> + msg_port_write(HTE, 0x00020011, 0x00111000); >> + msg_port_write(HTE, 0x00020011, 0x00111100); >> + >> + hte_wait_for_complete(); >> + >> + /* If this is a READ pass, check for errors at the end */ >> + if ((i % 2) == 1) { >> + /* Return immediately if error */ >> + if (hte_check_errors()) >> + break; >> + } >> + } >> + >> + DPF(D_INFO, "done\n"); >> + >> + return hte_check_errors(); >> +} >> + >> +/** >> + * This function executes basic single cache line memory write/read/verify > > 'executes a basic' Fixed. >> + * test using simple constant pattern, different for READ_RAIN and >> + * WRITE_TRAIN modes. >> + * >> + * @mrc_params: host struture for all MRC global data > > structure, please fix globally Fixed globally. >> + * @addr: memory adress being tested (must hit specific channel/rank) >> + * @first_run: if set then hte registers are configured, otherwise it is >> + * assumed configuration is done and just re-run the test >> + * @mode: READ_TRAIN or WRITE_TRAIN (the difference is in the pattern) >> + * >> + * @return: byte lane failure on each bit (for Quark only bit0 and bit1) >> + */ >> +u16 hte_basic_write_read(struct mrc_params *mrc_params, u32 addr, >> + u8 first_run, u8 mode) > > Why do we use u8 for these? Would uint be good enough? Just a suggestion. > >> +{ >> + u16 errors; >> + >> + ENTERFN(); >> + >> + /* Enable all error reporting in preparation for HTE test */ >> + hte_enable_all_errors(); >> + hte_clear_error_regs(); >> + >> + errors = hte_basic_data_cmp(mrc_params, addr, first_run, mode); >> + >> + LEAVEFN(); >> + >> + return errors; >> +} >> + >> +/** >> + * This function examines single cache line memory with write/read/verify > > examines a single-cache-line memory > > (at least I think this is what it is saying) Fixed globally. >> + * test using multiple data patterns (victim-aggressor algorithm). >> + * >> + * @mrc_params: host struture for all MRC global data >> + * @addr: memory adress being tested (must hit specific channel/rank) >> + * @first_run: if set then hte registers are configured, otherwise it is >> + * assumed configuration is done and just re-run the test >> + * >> + * @return: byte lane failure on each bit (for Quark only bit0 and bit1) >> + */ >> +u16 hte_write_stress_bit_lanes(struct mrc_params *mrc_params, >> + u32 addr, u8 first_run) >> +{ >> + u16 errors; >> + u8 victim_bit = 0; >> + >> + ENTERFN(); >> + >> + /* Enable all error reporting in preparation for HTE test */ >> + hte_enable_all_errors(); >> + hte_clear_error_regs(); >> + >> + /* >> + * Loop through each bit in the bytelane. >> + * >> + * Each pass creates a victim bit while keeping all other bits the >> same >> + * as aggressors. AVN HTE adds an auto-rotate feature which allows us >> + * to program the entire victim/aggressor sequence in 1 step. > > What is AVN? I don't know. I guess it might be Intel's Avoton core in some Atom processors. So does not change the comment .. >> + * >> + * The victim bit rotates on each pass so no need to have software >> + * implement a victim bit loop like on VLV. > > VLV? I think it is sometimes better to write these out and put the > abbreviation in brackets after it, at least once in the file. Like above, I guess it is Valleyview (VLV). Don't know if it is correct. So keep it unchanged. > >> + */ >> + errors = hte_rw_data_cmp(mrc_params, addr, HTE_LOOP_CNT, >> + HTE_LFSR_VICTIM_SEED, >> HTE_LFSR_AGRESSOR_SEED, >> + victim_bit, first_run); >> + >> + LEAVEFN(); >> + >> + return errors; >> +} >> + >> +/** >> + * This function execute basic single cache line memory write or read. > > as above > >> + * This is just for receive enable / fine write levelling purpose. > > write-levelling (I think that's what you mean) Fixed >> + * >> + * @addr: memory adress being tested (must hit specific channel/rank) >> + * @first_run: if set then hte registers are configured, otherwise it is >> + * assumed configuration is done and just re-run the test >> + * @is_write: when non-zero memory write operation executed, otherwise read >> + */ >> +void hte_mem_op(u32 addr, u8 first_run, u8 is_write) >> +{ >> + u32 offset; >> + u32 tmp; >> + >> + hte_enable_all_errors(); >> + hte_clear_error_regs(); >> + >> + if (first_run) { >> + tmp = is_write ? 0x01110021 : 0x01010021; >> + msg_port_write(HTE, 0x00020020, tmp); >> + >> + msg_port_write(HTE, 0x00020021, 0x06000000); >> + msg_port_write(HTE, 0x00020022, addr >> 6); >> + msg_port_write(HTE, 0x00020062, 0x00800015); >> + msg_port_write(HTE, 0x00020063, 0xAAAAAAAA); >> + msg_port_write(HTE, 0x00020064, 0xCCCCCCCC); >> + msg_port_write(HTE, 0x00020065, 0xF0F0F0F0); >> + msg_port_write(HTE, 0x00020061, 0x00030008); >> + >> + for (offset = 0x80; offset <= 0x8F; offset++) >> + msg_port_write(HTE, offset, 0xC33C0000); >> + } >> + >> + msg_port_write(HTE, 0x000200A1, 0xFFFF1000); >> + msg_port_write(HTE, 0x00020011, 0x00011000); >> + msg_port_write(HTE, 0x00020011, 0x00011100); >> + >> + hte_wait_for_complete(); >> +} >> diff --git a/arch/x86/cpu/quark/hte.h b/arch/x86/cpu/quark/hte.h >> new file mode 100644 >> index 0000000..3a173ea >> --- /dev/null >> +++ b/arch/x86/cpu/quark/hte.h >> @@ -0,0 +1,44 @@ >> +/* >> + * Copyright (C) 2013, Intel Corporation >> + * Copyright (C) 2015, Bin Meng <bmeng...@gmail.com> >> + * >> + * Ported from Intel released Quark UEFI BIOS >> + * QuarkSocPkg/QuarkNorthCluster/MemoryInit/Pei/ >> + * >> + * SPDX-License-Identifier: Intel >> + */ >> + >> +#ifndef _HTE_H_ >> +#define _HTE_H_ >> + >> +enum { >> + MRC_MEM_INIT, >> + MRC_MEM_TEST >> +}; >> + >> +enum { >> + READ_TRAIN, >> + WRITE_TRAIN >> +}; >> + >> +/* >> + * EXP_LOOP_CNT field of HTE_CMD_CTL >> + * >> + * This CANNOT be less than 4! >> + */ >> +#define HTE_LOOP_CNT 5 >> + >> +/* random seed for victim */ >> +#define HTE_LFSR_VICTIM_SEED 0xF294BA21 >> + >> +/* random seed for aggressor */ >> +#define HTE_LFSR_AGRESSOR_SEED 0xEBA7492D >> + >> +u32 hte_mem_init(struct mrc_params *mrc_params, u8 flag); >> +u16 hte_basic_write_read(struct mrc_params *mrc_params, u32 addr, >> + u8 first_run, u8 mode); >> +u16 hte_write_stress_bit_lanes(struct mrc_params *mrc_params, >> + u32 addr, u8 first_run); >> +void hte_mem_op(u32 addr, u8 first_run, u8 is_write); > > Can you move the comments from the .c to the .h for these exported functions? > These routines are only used by MRC internally, and not public APIs. Thus I don't move the comments to header files. >> + >> +#endif /* _HTE_H_ */ >> diff --git a/arch/x86/cpu/quark/mrc_util.c b/arch/x86/cpu/quark/mrc_util.c >> new file mode 100644 >> index 0000000..1ae42d6 >> --- /dev/null >> +++ b/arch/x86/cpu/quark/mrc_util.c >> @@ -0,0 +1,1499 @@ >> +/* >> + * Copyright (C) 2013, Intel Corporation >> + * Copyright (C) 2015, Bin Meng <bmeng...@gmail.com> >> + * >> + * Ported from Intel released Quark UEFI BIOS >> + * QuarkSocPkg/QuarkNorthCluster/MemoryInit/Pei/ >> + * >> + * SPDX-License-Identifier: Intel >> + */ >> + >> +#include <common.h> >> +#include <asm/arch/device.h> >> +#include <asm/arch/mrc.h> >> +#include <asm/arch/msg_port.h> >> +#include "mrc_util.h" >> +#include "hte.h" >> +#include "smc.h" >> + >> +static const uint8_t vref_codes[64] = { >> + /* lowest to highest */ >> + 0x3F, 0x3E, 0x3D, 0x3C, 0x3B, 0x3A, 0x39, 0x38, >> + 0x37, 0x36, 0x35, 0x34, 0x33, 0x32, 0x31, 0x30, >> + 0x2F, 0x2E, 0x2D, 0x2C, 0x2B, 0x2A, 0x29, 0x28, >> + 0x27, 0x26, 0x25, 0x24, 0x23, 0x22, 0x21, 0x20, >> + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, >> + 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, >> + 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, >> + 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F >> +}; >> + >> +void mrc_write_mask(u32 unit, u32 addr, u32 data, u32 mask) >> +{ >> + msg_port_write(unit, addr, >> + (msg_port_read(unit, addr) & ~(mask)) | >> + ((data) & (mask))); >> +} >> + >> +void mrc_alt_write_mask(u32 unit, u32 addr, u32 data, u32 mask) >> +{ >> + msg_port_alt_write(unit, addr, >> + (msg_port_alt_read(unit, addr) & ~(mask)) | >> + ((data) & (mask))); >> +} >> + >> +void mrc_post_code(uint8_t major, uint8_t minor) >> +{ >> + /* send message to UART */ >> + DPF(D_INFO, "POST: 0x%01x%02x\n", major, minor); >> + >> + /* error check */ >> + if (major == 0xEE) >> + hang(); >> +} >> + >> +/* Delay number of nanoseconds */ >> +void delay_n(uint32_t ns) >> +{ >> + /* 1000 MHz clock has 1ns period --> no conversion required */ >> + uint64_t final_tsc = rdtsc(); > > blank line here after declarations end > Fixed. >> + final_tsc += ((get_tbclk_mhz() * ns) / 1000); >> + >> + while (rdtsc() < final_tsc) >> + ; >> +} >> + >> +/* Delay number of microseconds */ >> +void delay_u(uint32_t ms) >> +{ >> + /* 64-bit math is not an option, just use loops */ >> + while (ms--) >> + delay_n(1000); >> +} > > Some day I suspect these could be pulled out into general x86 > functions. Let's see if anything else needs them first. Agreed. >> + >> +/* Select Memory Manager as the source for PRI interface */ >> +void select_mem_mgr(void) >> +{ >> + u32 dco; >> + >> + ENTERFN(); >> + >> + dco = msg_port_read(MEM_CTLR, DCO); >> + dco &= ~BIT28; > > ~(1 << 28) > > Ah but I see you are using this everywhere. > > U-Boot tries to avoid defining this sort of thing. See some comments > below about this. Did not fix the BIT stuff. >> + msg_port_write(MEM_CTLR, DCO, dco); >> + >> + LEAVEFN(); >> +} >> + >> +/* Select HTE as the source for PRI interface */ >> +void select_hte(void) >> +{ >> + u32 dco; >> + >> + ENTERFN(); >> + >> + dco = msg_port_read(MEM_CTLR, DCO); >> + dco |= BIT28; >> + msg_port_write(MEM_CTLR, DCO, dco); >> + >> + LEAVEFN(); >> +} >> + >> +/* >> + * Send DRAM command >> + * data should be formated using DCMD_Xxxx macro or emrsXCommand structure >> + */ >> +void dram_init_command(uint32_t data) >> +{ >> + pci_write_config_dword(QUARK_HOST_BRIDGE, MSG_DATA_REG, data); >> + pci_write_config_dword(QUARK_HOST_BRIDGE, MSG_CTRL_EXT_REG, 0); >> + msg_port_setup(MSG_OP_DRAM_INIT, MEM_CTLR, 0); >> + >> + DPF(D_REGWR, "WR32 %03X %08X %08X\n", MEM_CTLR, 0, data); >> +} >> + >> +/* Send DRAM wake command using special MCU side-band WAKE opcode */ >> +void dram_wake_command(void) >> +{ >> + ENTERFN(); >> + >> + msg_port_setup(MSG_OP_DRAM_WAKE, MEM_CTLR, 0); >> + >> + LEAVEFN(); >> +} >> + >> +void training_message(uint8_t channel, uint8_t rank, uint8_t byte_lane) >> +{ >> + /* send message to UART */ >> + DPF(D_INFO, "CH%01X RK%01X BL%01X\n", channel, rank, byte_lane); >> +} >> + >> +/* >> + * This function will program the RCVEN delays >> + * >> + * (currently doesn't comprehend rank) >> + */ >> +void set_rcvn(uint8_t channel, uint8_t rank, >> + uint8_t byte_lane, uint32_t pi_count) > > reformat to 80cols. Should this or any other function in this file be static? These functions are used by MRC internally. Also I did not fix this 'reformat to 80cols) as I think it is fine. >> +{ >> + uint32_t reg; >> + uint32_t msk; >> + uint32_t temp; >> + >> + ENTERFN(); >> + >> + DPF(D_TRN, "Rcvn ch%d rnk%d ln%d : pi=%03X\n", >> + channel, rank, byte_lane, pi_count); >> + >> + /* >> + * RDPTR (1/2 MCLK, 64 PIs) >> + * BL0 -> B01PTRCTL0[11:08] (0x0-0xF) >> + * BL1 -> B01PTRCTL0[23:20] (0x0-0xF) >> + */ >> + reg = B01PTRCTL0 + ((byte_lane >> 1) * DDRIODQ_BL_OFFSET) + >> + (channel * DDRIODQ_CH_OFFSET); >> + msk = (byte_lane & BIT0) ? (BIT23 | BIT22 | BIT21 | BIT20) : >> + (BIT11 | BIT10 | BIT9 | BIT8); > > Would this be better as: > > (0xf << 20) | (0xf << 8) > > It might be more meaningful also. > > I really don't think these long strings of | are nice. I agree, but I did not fix this. Changing those globally is really error prone and may break the whole MRC (my brain could be bad converting these bits to hex numbers). >> + temp = (byte_lane & BIT0) ? ((pi_count / HALF_CLK) << 20) : >> + ((pi_count / HALF_CLK) << 8); >> + mrc_alt_write_mask(DDRPHY, reg, temp, msk); >> + >> + /* Adjust PI_COUNT */ >> + pi_count -= ((pi_count / HALF_CLK) & 0xF) * HALF_CLK; >> + >> + /* >> + * PI (1/64 MCLK, 1 PIs) >> + * BL0 -> B0DLLPICODER0[29:24] (0x00-0x3F) >> + * BL1 -> B1DLLPICODER0[29:24] (0x00-0x3F) > > lower case hex again Did not fix these for consistency. >> + */ >> + reg = (byte_lane & BIT0) ? (B1DLLPICODER0) : (B0DLLPICODER0); >> + reg += (((byte_lane >> 1) * DDRIODQ_BL_OFFSET) + >> + (channel * DDRIODQ_CH_OFFSET)); >> + msk = (BIT29 | BIT28 | BIT27 | BIT26 | BIT25 | BIT24); >> + temp = pi_count << 24; >> + mrc_alt_write_mask(DDRPHY, reg, temp, msk); >> + >> + /* >> + * DEADBAND >> + * BL0/1 -> B01DBCTL1[08/11] (+1 select) >> + * BL0/1 -> B01DBCTL1[02/05] (enable) >> + */ >> + reg = B01DBCTL1 + ((byte_lane >> 1) * DDRIODQ_BL_OFFSET) + >> + (channel * DDRIODQ_CH_OFFSET); >> + msk = 0x00; >> + temp = 0x00; >> + >> + /* enable */ >> + msk |= (byte_lane & BIT0) ? (BIT5) : (BIT2); > > Remove () around BIT5 Fixed globally. >> + if ((pi_count < EARLY_DB) || (pi_count > LATE_DB)) >> + temp |= msk; >> + >> + /* select */ >> + msk |= (byte_lane & BIT0) ? (BIT11) : (BIT8); >> + if (pi_count < EARLY_DB) >> + temp |= msk; > > These uses of BIT seem more useful to me. > > Still it would be better to have #defines for the bits which actually > describe their meaning. > > Maybe you don't know the meaning though... Yes!!! >> + >> + mrc_alt_write_mask(DDRPHY, reg, temp, msk); >> + >> + /* error check */ >> + if (pi_count > 0x3F) { >> + training_message(channel, rank, byte_lane); >> + mrc_post_code(0xEE, 0xE0); >> + } >> + >> + LEAVEFN(); >> +} >> + >> +/* >> + * This function will return the current RCVEN delay on the given >> + * channel, rank, byte_lane as an absolute PI count. >> + * >> + * (currently doesn't comprehend rank) >> + */ >> +uint32_t get_rcvn(uint8_t channel, uint8_t rank, uint8_t byte_lane) >> +{ >> + uint32_t reg; >> + uint32_t temp; >> + uint32_t pi_count; >> + >> + ENTERFN(); >> + >> + /* >> + * RDPTR (1/2 MCLK, 64 PIs) >> + * BL0 -> B01PTRCTL0[11:08] (0x0-0xF) >> + * BL1 -> B01PTRCTL0[23:20] (0x0-0xF) >> + */ >> + reg = B01PTRCTL0 + ((byte_lane >> 1) * DDRIODQ_BL_OFFSET) + >> + (channel * DDRIODQ_CH_OFFSET); >> + temp = msg_port_alt_read(DDRPHY, reg); >> + temp >>= (byte_lane & BIT0) ? (20) : (8); >> + temp &= 0xF; >> + >> + /* Adjust PI_COUNT */ >> + pi_count = temp * HALF_CLK; >> + >> + /* >> + * PI (1/64 MCLK, 1 PIs) >> + * BL0 -> B0DLLPICODER0[29:24] (0x00-0x3F) >> + * BL1 -> B1DLLPICODER0[29:24] (0x00-0x3F) >> + */ >> + reg = (byte_lane & BIT0) ? (B1DLLPICODER0) : (B0DLLPICODER0); > > Please avoid () around simple constants. Put them in the #define/enum if > needed. Fixed globally by removing (). >> + reg += (((byte_lane >> 1) * DDRIODQ_BL_OFFSET) + >> + (channel * DDRIODQ_CH_OFFSET)); >> + temp = msg_port_alt_read(DDRPHY, reg); >> + temp >>= 24; >> + temp &= 0x3F; >> + >> + /* Adjust PI_COUNT */ >> + pi_count += temp; >> + >> + LEAVEFN(); >> + >> + return pi_count; >> +} >> + >> +/* >> + * This function will program the RDQS delays based on an absolute >> + * amount of PIs. >> + * >> + * (currently doesn't comprehend rank) >> + */ >> +void set_rdqs(uint8_t channel, uint8_t rank, >> + uint8_t byte_lane, uint32_t pi_count) >> +{ >> + uint32_t reg; >> + uint32_t msk; >> + uint32_t temp; >> + >> + ENTERFN(); >> + DPF(D_TRN, "Rdqs ch%d rnk%d ln%d : pi=%03X\n", >> + channel, rank, byte_lane, pi_count); >> + >> + /* >> + * PI (1/128 MCLK) >> + * BL0 -> B0RXDQSPICODE[06:00] (0x00-0x47) >> + * BL1 -> B1RXDQSPICODE[06:00] (0x00-0x47) >> + */ >> + reg = (byte_lane & BIT0) ? (B1RXDQSPICODE) : (B0RXDQSPICODE); >> + reg += (((byte_lane >> 1) * DDRIODQ_BL_OFFSET) + >> + (channel * DDRIODQ_CH_OFFSET)); >> + msk = (BIT6 | BIT5 | BIT4 | BIT3 | BIT2 | BIT1 | BIT0); >> + temp = pi_count << 0; >> + mrc_alt_write_mask(DDRPHY, reg, temp, msk); >> + >> + /* error check (shouldn't go above 0x3F) */ >> + if (pi_count > 0x47) { >> + training_message(channel, rank, byte_lane); >> + mrc_post_code(0xEE, 0xE1); >> + } >> + >> + LEAVEFN(); >> +} >> + >> +/* >> + * This function will return the current RDQS delay on the given >> + * channel, rank, byte_lane as an absolute PI count. >> + * >> + * (currently doesn't comprehend rank) >> + */ >> +uint32_t get_rdqs(uint8_t channel, uint8_t rank, uint8_t byte_lane) >> +{ >> + uint32_t reg; >> + uint32_t temp; >> + uint32_t pi_count; >> + >> + ENTERFN(); >> + >> + /* >> + * PI (1/128 MCLK) >> + * BL0 -> B0RXDQSPICODE[06:00] (0x00-0x47) >> + * BL1 -> B1RXDQSPICODE[06:00] (0x00-0x47) >> + */ >> + reg = (byte_lane & BIT0) ? (B1RXDQSPICODE) : (B0RXDQSPICODE); >> + reg += (((byte_lane >> 1) * DDRIODQ_BL_OFFSET) + >> + (channel * DDRIODQ_CH_OFFSET)); >> + temp = msg_port_alt_read(DDRPHY, reg); >> + >> + /* Adjust PI_COUNT */ >> + pi_count = temp & 0x7F; >> + >> + LEAVEFN(); >> + >> + return pi_count; >> +} >> + >> +/* >> + * This function will program the WDQS delays based on an absolute >> + * amount of PIs. >> + * >> + * (currently doesn't comprehend rank) >> + */ >> +void set_wdqs(uint8_t channel, uint8_t rank, >> + uint8_t byte_lane, uint32_t pi_count) >> +{ >> + uint32_t reg; >> + uint32_t msk; >> + uint32_t temp; >> + >> + ENTERFN(); >> + >> + DPF(D_TRN, "Wdqs ch%d rnk%d ln%d : pi=%03X\n", >> + channel, rank, byte_lane, pi_count); >> + >> + /* >> + * RDPTR (1/2 MCLK, 64 PIs) >> + * BL0 -> B01PTRCTL0[07:04] (0x0-0xF) >> + * BL1 -> B01PTRCTL0[19:16] (0x0-0xF) >> + */ >> + reg = B01PTRCTL0 + ((byte_lane >> 1) * DDRIODQ_BL_OFFSET) + >> + (channel * DDRIODQ_CH_OFFSET); >> + msk = (byte_lane & BIT0) ? (BIT19 | BIT18 | BIT17 | BIT16) : >> + (BIT7 | BIT6 | BIT5 | BIT4); >> + temp = pi_count / HALF_CLK; >> + temp <<= (byte_lane & BIT0) ? (16) : (4); >> + mrc_alt_write_mask(DDRPHY, reg, temp, msk); >> + >> + /* Adjust PI_COUNT */ >> + pi_count -= ((pi_count / HALF_CLK) & 0xF) * HALF_CLK; >> + >> + /* >> + * PI (1/64 MCLK, 1 PIs) >> + * BL0 -> B0DLLPICODER0[21:16] (0x00-0x3F) >> + * BL1 -> B1DLLPICODER0[21:16] (0x00-0x3F) >> + */ >> + reg = (byte_lane & BIT0) ? (B1DLLPICODER0) : (B0DLLPICODER0); >> + reg += (((byte_lane >> 1) * DDRIODQ_BL_OFFSET) + >> + (channel * DDRIODQ_CH_OFFSET)); >> + msk = (BIT21 | BIT20 | BIT19 | BIT18 | BIT17 | BIT16); >> + temp = pi_count << 16; >> + mrc_alt_write_mask(DDRPHY, reg, temp, msk); >> + >> + /* >> + * DEADBAND >> + * BL0/1 -> B01DBCTL1[07/10] (+1 select) >> + * BL0/1 -> B01DBCTL1[01/04] (enable) >> + */ >> + reg = B01DBCTL1 + ((byte_lane >> 1) * DDRIODQ_BL_OFFSET) + >> + (channel * DDRIODQ_CH_OFFSET); >> + msk = 0x00; >> + temp = 0x00; >> + >> + /* enable */ >> + msk |= (byte_lane & BIT0) ? (BIT4) : (BIT1); >> + if ((pi_count < EARLY_DB) || (pi_count > LATE_DB)) >> + temp |= msk; >> + >> + /* select */ >> + msk |= (byte_lane & BIT0) ? (BIT10) : (BIT7); >> + if (pi_count < EARLY_DB) >> + temp |= msk; >> + >> + mrc_alt_write_mask(DDRPHY, reg, temp, msk); >> + >> + /* error check */ >> + if (pi_count > 0x3F) { >> + training_message(channel, rank, byte_lane); >> + mrc_post_code(0xEE, 0xE2); >> + } >> + >> + LEAVEFN(); >> +} >> + >> +/* >> + * This function will return the amount of WDQS delay on the given >> + * channel, rank, byte_lane as an absolute PI count. >> + * >> + * (currently doesn't comprehend rank) >> + */ >> +uint32_t get_wdqs(uint8_t channel, uint8_t rank, uint8_t byte_lane) >> +{ >> + uint32_t reg; >> + uint32_t temp; >> + uint32_t pi_count; >> + >> + ENTERFN(); >> + >> + /* >> + * RDPTR (1/2 MCLK, 64 PIs) >> + * BL0 -> B01PTRCTL0[07:04] (0x0-0xF) >> + * BL1 -> B01PTRCTL0[19:16] (0x0-0xF) >> + */ >> + reg = B01PTRCTL0 + ((byte_lane >> 1) * DDRIODQ_BL_OFFSET) + >> + (channel * DDRIODQ_CH_OFFSET); >> + temp = msg_port_alt_read(DDRPHY, reg); >> + temp >>= (byte_lane & BIT0) ? (16) : (4); >> + temp &= 0xF; >> + >> + /* Adjust PI_COUNT */ >> + pi_count = (temp * HALF_CLK); >> + >> + /* >> + * PI (1/64 MCLK, 1 PIs) >> + * BL0 -> B0DLLPICODER0[21:16] (0x00-0x3F) >> + * BL1 -> B1DLLPICODER0[21:16] (0x00-0x3F) >> + */ >> + reg = (byte_lane & BIT0) ? (B1DLLPICODER0) : (B0DLLPICODER0); >> + reg += (((byte_lane >> 1) * DDRIODQ_BL_OFFSET) + >> + (channel * DDRIODQ_CH_OFFSET)); >> + temp = msg_port_alt_read(DDRPHY, reg); >> + temp >>= 16; >> + temp &= 0x3F; >> + >> + /* Adjust PI_COUNT */ >> + pi_count += temp; >> + >> + LEAVEFN(); >> + >> + return pi_count; >> +} >> + >> +/* >> + * This function will program the WDQ delays based on an absolute >> + * number of PIs. >> + * >> + * (currently doesn't comprehend rank) >> + */ >> +void set_wdq(uint8_t channel, uint8_t rank, >> + uint8_t byte_lane, uint32_t pi_count) >> +{ >> + uint32_t reg; >> + uint32_t msk; >> + uint32_t temp; >> + >> + ENTERFN(); >> + >> + DPF(D_TRN, "Wdq ch%d rnk%d ln%d : pi=%03X\n", >> + channel, rank, byte_lane, pi_count); >> + >> + /* >> + * RDPTR (1/2 MCLK, 64 PIs) >> + * BL0 -> B01PTRCTL0[03:00] (0x0-0xF) >> + * BL1 -> B01PTRCTL0[15:12] (0x0-0xF) >> + */ >> + reg = B01PTRCTL0 + ((byte_lane >> 1) * DDRIODQ_BL_OFFSET) + >> + (channel * DDRIODQ_CH_OFFSET); >> + msk = (byte_lane & BIT0) ? (BIT15 | BIT14 | BIT13 | BIT12) : >> + (BIT3 | BIT2 | BIT1 | BIT0); >> + temp = pi_count / HALF_CLK; >> + temp <<= (byte_lane & BIT0) ? (12) : (0); >> + mrc_alt_write_mask(DDRPHY, reg, temp, msk); >> + >> + /* Adjust PI_COUNT */ >> + pi_count -= ((pi_count / HALF_CLK) & 0xF) * HALF_CLK; >> + >> + /* >> + * PI (1/64 MCLK, 1 PIs) >> + * BL0 -> B0DLLPICODER0[13:08] (0x00-0x3F) >> + * BL1 -> B1DLLPICODER0[13:08] (0x00-0x3F) >> + */ >> + reg = (byte_lane & BIT0) ? (B1DLLPICODER0) : (B0DLLPICODER0); >> + reg += (((byte_lane >> 1) * DDRIODQ_BL_OFFSET) + >> + (channel * DDRIODQ_CH_OFFSET)); >> + msk = (BIT13 | BIT12 | BIT11 | BIT10 | BIT9 | BIT8); >> + temp = pi_count << 8; >> + mrc_alt_write_mask(DDRPHY, reg, temp, msk); >> + >> + /* >> + * DEADBAND >> + * BL0/1 -> B01DBCTL1[06/09] (+1 select) >> + * BL0/1 -> B01DBCTL1[00/03] (enable) >> + */ >> + reg = B01DBCTL1 + ((byte_lane >> 1) * DDRIODQ_BL_OFFSET) + >> + (channel * DDRIODQ_CH_OFFSET); >> + msk = 0x00; >> + temp = 0x00; >> + >> + /* enable */ >> + msk |= (byte_lane & BIT0) ? (BIT3) : (BIT0); >> + if ((pi_count < EARLY_DB) || (pi_count > LATE_DB)) >> + temp |= msk; >> + >> + /* select */ >> + msk |= (byte_lane & BIT0) ? (BIT9) : (BIT6); >> + if (pi_count < EARLY_DB) >> + temp |= msk; >> + >> + mrc_alt_write_mask(DDRPHY, reg, temp, msk); >> + >> + /* error check */ >> + if (pi_count > 0x3F) { >> + training_message(channel, rank, byte_lane); >> + mrc_post_code(0xEE, 0xE3); >> + } >> + >> + LEAVEFN(); >> +} >> + >> +/* >> + * This function will return the amount of WDQ delay on the given >> + * channel, rank, byte_lane as an absolute PI count. >> + * >> + * (currently doesn't comprehend rank) >> + */ >> +uint32_t get_wdq(uint8_t channel, uint8_t rank, uint8_t byte_lane) >> +{ >> + uint32_t reg; >> + uint32_t temp; >> + uint32_t pi_count; >> + >> + ENTERFN(); >> + >> + /* >> + * RDPTR (1/2 MCLK, 64 PIs) >> + * BL0 -> B01PTRCTL0[03:00] (0x0-0xF) >> + * BL1 -> B01PTRCTL0[15:12] (0x0-0xF) >> + */ >> + reg = B01PTRCTL0 + ((byte_lane >> 1) * DDRIODQ_BL_OFFSET) + >> + (channel * DDRIODQ_CH_OFFSET); >> + temp = msg_port_alt_read(DDRPHY, reg); >> + temp >>= (byte_lane & BIT0) ? (12) : (0); >> + temp &= 0xF; >> + >> + /* Adjust PI_COUNT */ >> + pi_count = (temp * HALF_CLK); >> + >> + /* >> + * PI (1/64 MCLK, 1 PIs) >> + * BL0 -> B0DLLPICODER0[13:08] (0x00-0x3F) >> + * BL1 -> B1DLLPICODER0[13:08] (0x00-0x3F) >> + */ >> + reg = (byte_lane & BIT0) ? (B1DLLPICODER0) : (B0DLLPICODER0); >> + reg += (((byte_lane >> 1) * DDRIODQ_BL_OFFSET) + >> + (channel * DDRIODQ_CH_OFFSET)); >> + temp = msg_port_alt_read(DDRPHY, reg); >> + temp >>= 8; >> + temp &= 0x3F; >> + >> + /* Adjust PI_COUNT */ >> + pi_count += temp; >> + >> + LEAVEFN(); >> + >> + return pi_count; >> +} >> + >> +/* >> + * This function will program the WCMD delays based on an absolute >> + * number of PIs. >> + */ >> +void set_wcmd(uint8_t channel, uint32_t pi_count) >> +{ >> + uint32_t reg; >> + uint32_t msk; >> + uint32_t temp; >> + >> + ENTERFN(); >> + >> + /* >> + * RDPTR (1/2 MCLK, 64 PIs) >> + * CMDPTRREG[11:08] (0x0-0xF) >> + */ >> + reg = CMDPTRREG + (channel * DDRIOCCC_CH_OFFSET); >> + msk = (BIT11 | BIT10 | BIT9 | BIT8); >> + temp = pi_count / HALF_CLK; >> + temp <<= 8; >> + mrc_alt_write_mask(DDRPHY, reg, temp, msk); >> + >> + /* Adjust PI_COUNT */ >> + pi_count -= ((pi_count / HALF_CLK) & 0xF) * HALF_CLK; >> + >> + /* >> + * PI (1/64 MCLK, 1 PIs) >> + * CMDDLLPICODER0[29:24] -> CMDSLICE R3 (unused) >> + * CMDDLLPICODER0[21:16] -> CMDSLICE L3 (unused) >> + * CMDDLLPICODER0[13:08] -> CMDSLICE R2 (unused) >> + * CMDDLLPICODER0[05:00] -> CMDSLICE L2 (unused) >> + * CMDDLLPICODER1[29:24] -> CMDSLICE R1 (unused) >> + * CMDDLLPICODER1[21:16] -> CMDSLICE L1 (0x00-0x3F) >> + * CMDDLLPICODER1[13:08] -> CMDSLICE R0 (unused) >> + * CMDDLLPICODER1[05:00] -> CMDSLICE L0 (unused) >> + */ >> + reg = CMDDLLPICODER1 + (channel * DDRIOCCC_CH_OFFSET); >> + >> + msk = (BIT29 | BIT28 | BIT27 | BIT26 | BIT25 | BIT24 | >> + BIT21 | BIT20 | BIT19 | BIT18 | BIT17 | BIT16 | >> + BIT13 | BIT12 | BIT11 | BIT10 | BIT9 | BIT8 | >> + BIT5 | BIT4 | BIT3 | BIT2 | BIT1 | BIT0); >> + >> + temp = (pi_count << 24) | (pi_count << 16) | >> + (pi_count << 8) | (pi_count << 0); >> + >> + mrc_alt_write_mask(DDRPHY, reg, temp, msk); >> + reg = CMDDLLPICODER0 + (channel * DDRIOCCC_CH_OFFSET); /* PO */ >> + mrc_alt_write_mask(DDRPHY, reg, temp, msk); >> + >> + /* >> + * DEADBAND >> + * CMDCFGREG0[17] (+1 select) >> + * CMDCFGREG0[16] (enable) >> + */ >> + reg = CMDCFGREG0 + (channel * DDRIOCCC_CH_OFFSET); >> + msk = 0x00; >> + temp = 0x00; >> + >> + /* enable */ >> + msk |= BIT16; >> + if ((pi_count < EARLY_DB) || (pi_count > LATE_DB)) >> + temp |= msk; >> + >> + /* select */ >> + msk |= BIT17; >> + if (pi_count < EARLY_DB) >> + temp |= msk; >> + >> + mrc_alt_write_mask(DDRPHY, reg, temp, msk); >> + >> + /* error check */ >> + if (pi_count > 0x3F) >> + mrc_post_code(0xEE, 0xE4); >> + >> + LEAVEFN(); >> +} >> + >> +/* >> + * This function will return the amount of WCMD delay on the given >> + * channel as an absolute PI count. >> + */ >> +uint32_t get_wcmd(uint8_t channel) >> +{ >> + uint32_t reg; >> + uint32_t temp; >> + uint32_t pi_count; >> + >> + ENTERFN(); >> + >> + /* >> + * RDPTR (1/2 MCLK, 64 PIs) >> + * CMDPTRREG[11:08] (0x0-0xF) >> + */ >> + reg = CMDPTRREG + (channel * DDRIOCCC_CH_OFFSET); >> + temp = msg_port_alt_read(DDRPHY, reg); >> + temp >>= 8; >> + temp &= 0xF; >> + >> + /* Adjust PI_COUNT */ >> + pi_count = temp * HALF_CLK; >> + >> + /* >> + * PI (1/64 MCLK, 1 PIs) >> + * CMDDLLPICODER0[29:24] -> CMDSLICE R3 (unused) >> + * CMDDLLPICODER0[21:16] -> CMDSLICE L3 (unused) >> + * CMDDLLPICODER0[13:08] -> CMDSLICE R2 (unused) >> + * CMDDLLPICODER0[05:00] -> CMDSLICE L2 (unused) >> + * CMDDLLPICODER1[29:24] -> CMDSLICE R1 (unused) >> + * CMDDLLPICODER1[21:16] -> CMDSLICE L1 (0x00-0x3F) >> + * CMDDLLPICODER1[13:08] -> CMDSLICE R0 (unused) >> + * CMDDLLPICODER1[05:00] -> CMDSLICE L0 (unused) >> + */ >> + reg = CMDDLLPICODER1 + (channel * DDRIOCCC_CH_OFFSET); >> + temp = msg_port_alt_read(DDRPHY, reg); >> + temp >>= 16; >> + temp &= 0x3F; >> + >> + /* Adjust PI_COUNT */ >> + pi_count += temp; >> + >> + LEAVEFN(); >> + >> + return pi_count; >> +} >> + >> +/* >> + * This function will program the WCLK delays based on an absolute >> + * number of PIs. >> + */ >> +void set_wclk(uint8_t channel, uint8_t rank, uint32_t pi_count) >> +{ >> + uint32_t reg; >> + uint32_t msk; >> + uint32_t temp; >> + >> + ENTERFN(); >> + >> + /* >> + * RDPTR (1/2 MCLK, 64 PIs) >> + * CCPTRREG[15:12] -> CLK1 (0x0-0xF) >> + * CCPTRREG[11:08] -> CLK0 (0x0-0xF) >> + */ >> + reg = CCPTRREG + (channel * DDRIOCCC_CH_OFFSET); >> + msk = (BIT15 | BIT14 | BIT13 | BIT12 | BIT11 | BIT10 | BIT9 | BIT8); > > mask = 0xff00 is much better, isn't it? Agreed, but did not fix it. >> + temp = ((pi_count / HALF_CLK) << 12) | ((pi_count / HALF_CLK) << 8); >> + mrc_alt_write_mask(DDRPHY, reg, temp, msk); >> + >> + /* Adjust PI_COUNT */ >> + pi_count -= ((pi_count / HALF_CLK) & 0xF) * HALF_CLK; >> + >> + /* >> + * PI (1/64 MCLK, 1 PIs) >> + * ECCB1DLLPICODER0[13:08] -> CLK0 (0x00-0x3F) >> + * ECCB1DLLPICODER0[21:16] -> CLK1 (0x00-0x3F) >> + */ >> + reg = (rank) ? (ECCB1DLLPICODER0) : (ECCB1DLLPICODER0); >> + reg += (channel * DDRIOCCC_CH_OFFSET); >> + msk = (BIT21 | BIT20 | BIT19 | BIT18 | BIT17 | BIT16 | >> + BIT13 | BIT12 | BIT11 | BIT10 | BIT9 | BIT8); > > Ick! > Echo!! >> + temp = (pi_count << 16) | (pi_count << 8); >> + mrc_alt_write_mask(DDRPHY, reg, temp, msk); >> + reg = (rank) ? (ECCB1DLLPICODER1) : (ECCB1DLLPICODER1); > > Remove all (), and below. Please fix globally. Fixed globally. >> + reg += (channel * DDRIOCCC_CH_OFFSET); >> + mrc_alt_write_mask(DDRPHY, reg, temp, msk); >> + reg = (rank) ? (ECCB1DLLPICODER2) : (ECCB1DLLPICODER2); >> + reg += (channel * DDRIOCCC_CH_OFFSET); >> + mrc_alt_write_mask(DDRPHY, reg, temp, msk); >> + reg = (rank) ? (ECCB1DLLPICODER3) : (ECCB1DLLPICODER3); >> + reg += (channel * DDRIOCCC_CH_OFFSET); >> + mrc_alt_write_mask(DDRPHY, reg, temp, msk); >> + >> + /* >> + * DEADBAND >> + * CCCFGREG1[11:08] (+1 select) >> + * CCCFGREG1[03:00] (enable) >> + */ >> + reg = CCCFGREG1 + (channel * DDRIOCCC_CH_OFFSET); >> + msk = 0x00; >> + temp = 0x00; >> + >> + /* enable */ >> + msk |= (BIT3 | BIT2 | BIT1 | BIT0); >> + if ((pi_count < EARLY_DB) || (pi_count > LATE_DB)) >> + temp |= msk; >> + >> + /* select */ >> + msk |= (BIT11 | BIT10 | BIT9 | BIT8); >> + if (pi_count < EARLY_DB) >> + temp |= msk; >> + >> + mrc_alt_write_mask(DDRPHY, reg, temp, msk); >> + >> + /* error check */ >> + if (pi_count > 0x3F) >> + mrc_post_code(0xEE, 0xE5); >> + >> + LEAVEFN(); >> +} >> + >> +/* >> + * This function will return the amout of WCLK delay on the given >> + * channel, rank as an absolute PI count. >> + */ >> +uint32_t get_wclk(uint8_t channel, uint8_t rank) >> +{ >> + uint32_t reg; >> + uint32_t temp; >> + uint32_t pi_count; >> + >> + ENTERFN(); >> + >> + /* >> + * RDPTR (1/2 MCLK, 64 PIs) >> + * CCPTRREG[15:12] -> CLK1 (0x0-0xF) >> + * CCPTRREG[11:08] -> CLK0 (0x0-0xF) >> + */ >> + reg = CCPTRREG + (channel * DDRIOCCC_CH_OFFSET); >> + temp = msg_port_alt_read(DDRPHY, reg); >> + temp >>= (rank) ? (12) : (8); >> + temp &= 0xF; >> + >> + /* Adjust PI_COUNT */ >> + pi_count = temp * HALF_CLK; >> + >> + /* >> + * PI (1/64 MCLK, 1 PIs) >> + * ECCB1DLLPICODER0[13:08] -> CLK0 (0x00-0x3F) >> + * ECCB1DLLPICODER0[21:16] -> CLK1 (0x00-0x3F) >> + */ >> + reg = (rank) ? (ECCB1DLLPICODER0) : (ECCB1DLLPICODER0); >> + reg += (channel * DDRIOCCC_CH_OFFSET); >> + temp = msg_port_alt_read(DDRPHY, reg); >> + temp >>= (rank) ? (16) : (8); >> + temp &= 0x3F; >> + >> + pi_count += temp; >> + >> + LEAVEFN(); >> + >> + return pi_count; >> +} >> + >> +/* >> + * This function will program the WCTL delays based on an absolute >> + * number of PIs. >> + * >> + * (currently doesn't comprehend rank) >> + */ >> +void set_wctl(uint8_t channel, uint8_t rank, uint32_t pi_count) >> +{ >> + uint32_t reg; >> + uint32_t msk; >> + uint32_t temp; >> + >> + ENTERFN(); >> + >> + /* >> + * RDPTR (1/2 MCLK, 64 PIs) >> + * CCPTRREG[31:28] (0x0-0xF) >> + * CCPTRREG[27:24] (0x0-0xF) >> + */ >> + reg = CCPTRREG + (channel * DDRIOCCC_CH_OFFSET); >> + msk = (BIT31 | BIT30 | BIT29 | BIT28 | BIT27 | BIT26 | BIT25 | >> BIT24); >> + temp = ((pi_count / HALF_CLK) << 28) | ((pi_count / HALF_CLK) << 24); >> + mrc_alt_write_mask(DDRPHY, reg, temp, msk); >> + >> + /* Adjust PI_COUNT */ >> + pi_count -= ((pi_count / HALF_CLK) & 0xF) * HALF_CLK; >> + >> + /* >> + * PI (1/64 MCLK, 1 PIs) >> + * ECCB1DLLPICODER?[29:24] (0x00-0x3F) >> + * ECCB1DLLPICODER?[29:24] (0x00-0x3F) >> + */ >> + reg = ECCB1DLLPICODER0 + (channel * DDRIOCCC_CH_OFFSET); >> + msk = (BIT29 | BIT28 | BIT27 | BIT26 | BIT25 | BIT24); >> + temp = (pi_count << 24); >> + mrc_alt_write_mask(DDRPHY, reg, temp, msk); >> + reg = ECCB1DLLPICODER1 + (channel * DDRIOCCC_CH_OFFSET); >> + mrc_alt_write_mask(DDRPHY, reg, temp, msk); >> + reg = ECCB1DLLPICODER2 + (channel * DDRIOCCC_CH_OFFSET); >> + mrc_alt_write_mask(DDRPHY, reg, temp, msk); >> + reg = ECCB1DLLPICODER3 + (channel * DDRIOCCC_CH_OFFSET); >> + mrc_alt_write_mask(DDRPHY, reg, temp, msk); >> + >> + /* >> + * DEADBAND >> + * CCCFGREG1[13:12] (+1 select) >> + * CCCFGREG1[05:04] (enable) >> + */ >> + reg = CCCFGREG1 + (channel * DDRIOCCC_CH_OFFSET); >> + msk = 0x00; >> + temp = 0x00; >> + >> + /* enable */ >> + msk |= (BIT5 | BIT4); >> + if ((pi_count < EARLY_DB) || (pi_count > LATE_DB)) >> + temp |= msk; >> + >> + /* select */ >> + msk |= (BIT13 | BIT12); >> + if (pi_count < EARLY_DB) >> + temp |= msk; >> + >> + mrc_alt_write_mask(DDRPHY, reg, temp, msk); >> + >> + /* error check */ >> + if (pi_count > 0x3F) >> + mrc_post_code(0xEE, 0xE6); >> + >> + LEAVEFN(); >> +} >> + >> +/* >> + * This function will return the amount of WCTL delay on the given >> + * channel, rank as an absolute PI count. >> + * >> + * (currently doesn't comprehend rank) >> + */ >> +uint32_t get_wctl(uint8_t channel, uint8_t rank) >> +{ >> + uint32_t reg; >> + uint32_t temp; >> + uint32_t pi_count; >> + >> + ENTERFN(); >> + >> + /* >> + * RDPTR (1/2 MCLK, 64 PIs) >> + * CCPTRREG[31:28] (0x0-0xF) >> + * CCPTRREG[27:24] (0x0-0xF) >> + */ >> + reg = CCPTRREG + (channel * DDRIOCCC_CH_OFFSET); >> + temp = msg_port_alt_read(DDRPHY, reg); >> + temp >>= 24; >> + temp &= 0xF; >> + >> + /* Adjust PI_COUNT */ >> + pi_count = temp * HALF_CLK; >> + >> + /* >> + * PI (1/64 MCLK, 1 PIs) >> + * ECCB1DLLPICODER?[29:24] (0x00-0x3F) >> + * ECCB1DLLPICODER?[29:24] (0x00-0x3F) >> + */ >> + reg = ECCB1DLLPICODER0 + (channel * DDRIOCCC_CH_OFFSET); >> + temp = msg_port_alt_read(DDRPHY, reg); >> + temp >>= 24; >> + temp &= 0x3F; >> + >> + /* Adjust PI_COUNT */ >> + pi_count += temp; >> + >> + LEAVEFN(); >> + >> + return pi_count; >> +} >> + >> +/* >> + * This function will program the internal Vref setting in a given >> + * byte lane in a given channel. >> + */ >> +void set_vref(uint8_t channel, uint8_t byte_lane, uint32_t setting) >> +{ >> + uint32_t reg = (byte_lane & 0x1) ? (B1VREFCTL) : (B0VREFCTL); >> + >> + ENTERFN(); >> + >> + DPF(D_TRN, "Vref ch%d ln%d : val=%03X\n", >> + channel, byte_lane, setting); >> + >> + mrc_alt_write_mask(DDRPHY, (reg + (channel * DDRIODQ_CH_OFFSET) + >> + ((byte_lane >> 1) * DDRIODQ_BL_OFFSET)), >> + (vref_codes[setting] << 2), >> + (BIT7 | BIT6 | BIT5 | BIT4 | BIT3 | BIT2)); >> + >> + /* >> + * need to wait ~300ns for Vref to settle >> + * (check that this is necessary) >> + */ >> + delay_n(300); >> + >> + /* ??? may need to clear pointers ??? */ >> + >> + LEAVEFN(); >> +} >> + >> +/* >> + * This function will return the internal Vref setting for the given >> + * channel, byte_lane. >> + */ >> +uint32_t get_vref(uint8_t channel, uint8_t byte_lane) >> +{ >> + uint8_t j; >> + uint32_t ret_val = sizeof(vref_codes) / 2; >> + uint32_t reg = (byte_lane & 0x1) ? (B1VREFCTL) : (B0VREFCTL); >> + uint32_t temp; >> + >> + ENTERFN(); >> + >> + temp = msg_port_alt_read(DDRPHY, (reg + (channel * >> DDRIODQ_CH_OFFSET) + >> + ((byte_lane >> 1) * DDRIODQ_BL_OFFSET))); >> + temp >>= 2; >> + temp &= 0x3F; >> + >> + for (j = 0; j < sizeof(vref_codes); j++) { >> + if (vref_codes[j] == temp) { >> + ret_val = j; >> + break; >> + } >> + } >> + >> + LEAVEFN(); >> + >> + return ret_val; >> +} >> + >> +/* >> + * This function will return a 32 bit address in the desired > > 32-bit Fixed >> + * channel and rank. >> + */ >> +uint32_t get_addr(uint8_t channel, uint8_t rank) >> +{ >> + uint32_t offset = 0x02000000; /* 32MB */ >> + >> + /* Begin product specific code */ >> + if (channel > 0) { >> + DPF(D_ERROR, "ILLEGAL CHANNEL\n"); >> + DEAD_LOOP(); >> + } >> + >> + if (rank > 1) { >> + DPF(D_ERROR, "ILLEGAL RANK\n"); >> + DEAD_LOOP(); >> + } >> + >> + /* use 256MB lowest density as per DRP == 0x0003 */ >> + offset += rank * (256 * 1024 * 1024); >> + >> + return offset; >> +} >> + >> +/* >> + * This function will sample the DQTRAINSTS registers in the given >> + * channel/rank SAMPLE_SIZE times looking for a valid '0' or '1'. >> + * >> + * It will return an encoded DWORD in which each bit corresponds to > > DWORD? Changed to 32-bit data >> + * the sampled value on the byte lane. >> + */ >> +uint32_t sample_dqs(struct mrc_params *mrc_params, uint8_t channel, >> + uint8_t rank, bool rcvn) >> +{ >> + uint8_t j; /* just a counter */ >> + uint8_t bl; /* which BL in the module (always 2 per module) */ >> + uint8_t bl_grp; /* which BL module */ >> + /* byte lane divisor */ > > Maybe rename the variable so you can drop the comment? That does not help much. Unchanged. >> + uint8_t bl_divisor = (mrc_params->channel_width == X16) ? 2 : 1; >> + uint32_t msk[2]; /* BLx in module */ >> + /* DQTRAINSTS register contents for each sample */ >> + uint32_t sampled_val[SAMPLE_SIZE]; >> + uint32_t num_0s; /* tracks the number of '0' samples */ >> + uint32_t num_1s; /* tracks the number of '1' samples */ >> + uint32_t ret_val = 0x00; /* assume all '0' samples */ >> + uint32_t address = get_addr(channel, rank); >> + >> + /* initialise msk[] */ >> + msk[0] = (rcvn) ? (BIT1) : (BIT9); /* BL0 */ >> + msk[1] = (rcvn) ? (BIT0) : (BIT8); /* BL1 */ >> + >> + /* cycle through each byte lane group */ >> + for (bl_grp = 0; bl_grp < (NUM_BYTE_LANES / bl_divisor) / 2; >> bl_grp++) { >> + /* take SAMPLE_SIZE samples */ >> + for (j = 0; j < SAMPLE_SIZE; j++) { >> + hte_mem_op(address, mrc_params->first_run, >> + rcvn ? 0 : 1); >> + mrc_params->first_run = 0; >> + >> + /* >> + * record the contents of the proper >> + * DQTRAINSTS register >> + */ >> + sampled_val[j] = msg_port_alt_read(DDRPHY, >> + (DQTRAINSTS + >> + (bl_grp * DDRIODQ_BL_OFFSET) + >> + (channel * DDRIODQ_CH_OFFSET))); >> + } >> + >> + /* >> + * look for a majority value (SAMPLE_SIZE / 2) + 1 >> + * on the byte lane and set that value in the corresponding >> + * ret_val bit >> + */ >> + for (bl = 0; bl < 2; bl++) { >> + num_0s = 0x00; /* reset '0' tracker for byte lane */ >> + num_1s = 0x00; /* reset '1' tracker for byte lane */ >> + for (j = 0; j < SAMPLE_SIZE; j++) { >> + if (sampled_val[j] & msk[bl]) >> + num_1s++; >> + else >> + num_0s++; >> + } >> + if (num_1s > num_0s) >> + ret_val |= (1 << (bl + (bl_grp * 2))); >> + } >> + } >> + >> + /* >> + * "ret_val.0" contains the status of BL0 >> + * "ret_val.1" contains the status of BL1 >> + * "ret_val.2" contains the status of BL2 >> + * etc. > > This comment should go in @return in the function comment. Actually I failed to understand what it really means, so leave it unchanged :( >> + */ >> + return ret_val; >> +} >> + >> +/* This function will find the rising edge transition on RCVN or WDQS */ >> +void find_rising_edge(struct mrc_params *mrc_params, uint32_t delay[], >> + uint8_t channel, uint8_t rank, bool rcvn) >> +{ >> + bool all_edges_found; /* determines stop condition */ >> + bool direction[NUM_BYTE_LANES]; /* direction indicator */ >> + uint8_t sample; /* sample counter */ >> + uint8_t bl; /* byte lane counter */ >> + /* byte lane divisor */ >> + uint8_t bl_divisor = (mrc_params->channel_width == X16) ? 2 : 1; >> + uint32_t sample_result[SAMPLE_CNT]; /* results of sample_dqs() */ >> + uint32_t temp; >> + uint32_t transition_pattern; >> + >> + ENTERFN(); >> + >> + /* select hte and request initial configuration */ >> + select_hte(); >> + mrc_params->first_run = 1; >> + >> + /* Take 3 sample points (T1,T2,T3) to obtain a transition pattern */ >> + for (sample = 0; sample < SAMPLE_CNT; sample++) { >> + /* program the desired delays for sample */ >> + for (bl = 0; bl < (NUM_BYTE_LANES / bl_divisor); bl++) { >> + /* increase sample delay by 26 PI (0.2 CLK) */ >> + if (rcvn) { >> + set_rcvn(channel, rank, bl, >> + delay[bl] + (sample * SAMPLE_DLY)); >> + } else { >> + set_wdqs(channel, rank, bl, >> + delay[bl] + (sample * SAMPLE_DLY)); >> + } >> + } >> + >> + /* take samples (Tsample_i) */ >> + sample_result[sample] = sample_dqs(mrc_params, >> + channel, rank, rcvn); >> + >> + DPF(D_TRN, >> + "Find rising edge %s ch%d rnk%d: #%d dly=%d dqs=%02X\n", >> + (rcvn ? "RCVN" : "WDQS"), channel, rank, sample, >> + sample * SAMPLE_DLY, sample_result[sample]); >> + } >> + >> + /* >> + * This pattern will help determine where we landed and ultimately >> + * how to place RCVEN/WDQS. >> + */ >> + for (bl = 0; bl < (NUM_BYTE_LANES / bl_divisor); bl++) { >> + /* build transition_pattern (MSB is 1st sample) */ >> + transition_pattern = 0; >> + for (sample = 0; sample < SAMPLE_CNT; sample++) { >> + transition_pattern |= >> + ((sample_result[sample] & (1 << bl)) >> bl) >> << >> + (SAMPLE_CNT - 1 - sample); >> + } >> + >> + DPF(D_TRN, "=== transition pattern %d\n", >> transition_pattern); >> + >> + /* >> + * set up to look for rising edge based on >> + * transition_pattern >> + */ >> + switch (transition_pattern) { >> + case 0: /* sampled 0->0->0 */ >> + /* move forward from T3 looking for 0->1 */ >> + delay[bl] += 2 * SAMPLE_DLY; >> + direction[bl] = FORWARD; >> + break; >> + case 1: /* sampled 0->0->1 */ >> + case 5: /* sampled 1->0->1 (bad duty cycle) *HSD#237503* */ >> + /* move forward from T2 looking for 0->1 */ >> + delay[bl] += 1 * SAMPLE_DLY; >> + direction[bl] = FORWARD; >> + break; >> + case 2: /* sampled 0->1->0 (bad duty cycle) *HSD#237503* */ >> + case 3: /* sampled 0->1->1 */ >> + /* move forward from T1 looking for 0->1 */ >> + delay[bl] += 0 * SAMPLE_DLY; >> + direction[bl] = FORWARD; >> + break; >> + case 4: /* sampled 1->0->0 (assumes BL8, HSD#234975) */ >> + /* move forward from T3 looking for 0->1 */ >> + delay[bl] += 2 * SAMPLE_DLY; >> + direction[bl] = FORWARD; >> + break; >> + case 6: /* sampled 1->1->0 */ >> + case 7: /* sampled 1->1->1 */ >> + /* move backward from T1 looking for 1->0 */ >> + delay[bl] += 0 * SAMPLE_DLY; >> + direction[bl] = BACKWARD; >> + break; >> + default: >> + mrc_post_code(0xEE, 0xEE); >> + break; >> + } >> + >> + /* program delays */ >> + if (rcvn) >> + set_rcvn(channel, rank, bl, delay[bl]); >> + else >> + set_wdqs(channel, rank, bl, delay[bl]); >> + } >> + >> + /* >> + * Based on the observed transition pattern on the byte lane, >> + * begin looking for a rising edge with single PI granularity. >> + */ >> + do { >> + all_edges_found = true; /* assume all byte lanes passed */ >> + /* take a sample */ >> + temp = sample_dqs(mrc_params, channel, rank, rcvn); >> + /* check all each byte lane for proper edge */ >> + for (bl = 0; bl < (NUM_BYTE_LANES / bl_divisor); bl++) { >> + if (temp & (1 << bl)) { >> + /* sampled "1" */ >> + if (direction[bl] == BACKWARD) { >> + /* >> + * keep looking for edge >> + * on this byte lane >> + */ >> + all_edges_found = false; >> + delay[bl] -= 1; >> + if (rcvn) { >> + set_rcvn(channel, rank, >> + bl, delay[bl]); >> + } else { >> + set_wdqs(channel, rank, >> + bl, delay[bl]); >> + } >> + } >> + } else { >> + /* sampled "0" */ >> + if (direction[bl] == FORWARD) { >> + /* >> + * keep looking for edge >> + * on this byte lane >> + */ >> + all_edges_found = false; >> + delay[bl] += 1; >> + if (rcvn) { >> + set_rcvn(channel, rank, >> + bl, delay[bl]); >> + } else { >> + set_wdqs(channel, rank, >> + bl, delay[bl]); >> + } >> + } >> + } >> + } >> + } while (!all_edges_found); >> + >> + /* restore DDR idle state */ >> + dram_init_command(DCMD_PREA(rank)); >> + >> + DPF(D_TRN, "Delay %03X %03X %03X %03X\n", >> + delay[0], delay[1], delay[2], delay[3]); >> + >> + LEAVEFN(); >> +} >> + >> +/* >> + * This function will return a 32 bit mask that will be used to >> + * check for byte lane failures. >> + */ >> +uint32_t byte_lane_mask(struct mrc_params *mrc_params) >> +{ >> + uint32_t j; >> + uint32_t ret_val = 0x00; >> + >> + /* >> + * set ret_val based on NUM_BYTE_LANES such that you will check >> + * only BL0 in result >> + * >> + * (each bit in result represents a byte lane) >> + */ >> + for (j = 0; j < MAX_BYTE_LANES; j += NUM_BYTE_LANES) >> + ret_val |= (1 << ((j / NUM_BYTE_LANES) * NUM_BYTE_LANES)); >> + >> + /* >> + * HSD#235037 >> + * need to adjust the mask for 16-bit mode >> + */ >> + if (mrc_params->channel_width == X16) >> + ret_val |= (ret_val << 2); >> + >> + return ret_val; >> +} >> + >> +/* >> + * Check memory executing simple write/read/verify at the specified address. >> + * >> + * Bits in the result indicate failure on specific byte lane. >> + */ >> +uint32_t check_rw_coarse(struct mrc_params *mrc_params, uint32_t address) >> +{ >> + uint32_t result = 0; >> + uint8_t first_run = 0; >> + >> + if (mrc_params->hte_setup) { >> + mrc_params->hte_setup = 0; >> + first_run = 1; >> + select_hte(); >> + } >> + >> + result = hte_basic_write_read(mrc_params, address, >> + first_run, WRITE_TRAIN); > > reformat to 80cols Fixed. >> + >> + DPF(D_TRN, "check_rw_coarse result is %x\n", result); >> + >> + return result; >> +} >> + >> +/* >> + * Check memory executing write/read/verify of many data patterns >> + * at the specified address. Bits in the result indicate failure >> + * on specific byte lane. >> + */ >> +uint32_t check_bls_ex(struct mrc_params *mrc_params, uint32_t address) >> +{ >> + uint32_t result; >> + uint8_t first_run = 0; >> + >> + if (mrc_params->hte_setup) { >> + mrc_params->hte_setup = 0; >> + first_run = 1; >> + select_hte(); >> + } >> + >> + result = hte_write_stress_bit_lanes(mrc_params, address, first_run); >> + >> + DPF(D_TRN, "check_bls_ex result is %x\n", result); >> + >> + return result; >> +} >> + >> +/* >> + * 32-bit LFSR with characteristic polynomial: X^32 + X^22 +X^2 + X^1 >> + * >> + * The function takes pointer to previous 32 bit value and >> + * modifies it to next value. >> + */ >> +void lfsr32(uint32_t *lfsr_ptr) >> +{ >> + uint32_t bit; >> + uint32_t lfsr; >> + int i; >> + >> + lfsr = *lfsr_ptr; >> + >> + for (i = 0; i < 32; i++) { >> + bit = 1 ^ (lfsr & BIT0); >> + bit = bit ^ ((lfsr & BIT1) >> 1); >> + bit = bit ^ ((lfsr & BIT2) >> 2); >> + bit = bit ^ ((lfsr & BIT22) >> 22); >> + >> + lfsr = ((lfsr >> 1) | (bit << 31)); >> + } >> + >> + *lfsr_ptr = lfsr; >> +} >> + >> +/* Clear the pointers in a given byte lane in a given channel */ >> +void clear_pointers(void) >> +{ >> + uint8_t channel; >> + uint8_t bl; >> + >> + ENTERFN(); >> + >> + for (channel = 0; channel < NUM_CHANNELS; channel++) { >> + for (bl = 0; bl < NUM_BYTE_LANES; bl++) { >> + mrc_alt_write_mask(DDRPHY, >> + (B01PTRCTL1 + >> + (channel * DDRIODQ_CH_OFFSET) + >> + ((bl >> 1) * DDRIODQ_BL_OFFSET)), >> + ~BIT8, BIT8); >> + >> + mrc_alt_write_mask(DDRPHY, >> + (B01PTRCTL1 + >> + (channel * DDRIODQ_CH_OFFSET) + >> + ((bl >> 1) * DDRIODQ_BL_OFFSET)), >> + BIT8, BIT8); >> + } >> + } >> + >> + LEAVEFN(); >> +} >> + >> +void print_timings(struct mrc_params *mrc_params) >> +{ >> + uint8_t algo; >> + uint8_t channel; >> + uint8_t rank; >> + uint8_t bl; >> + uint8_t bl_divisor = (mrc_params->channel_width == X16) ? 2 : 1; >> + >> + DPF(D_INFO, "\n---------------------------"); >> + DPF(D_INFO, "\nALGO[CH:RK] BL0 BL1 BL2 BL3"); >> + DPF(D_INFO, "\n==========================="); >> + >> + for (algo = 0; algo < MAX_ALGOS; algo++) { >> + for (channel = 0; channel < NUM_CHANNELS; channel++) { >> + if (mrc_params->channel_enables & (1 << channel)) { >> + for (rank = 0; rank < NUM_RANKS; rank++) { > > Can we put this block in its own function to fix the over-indenting? Fixed. >> + if (mrc_params->rank_enables & >> + (1 << rank)) { >> + switch (algo) { >> + case RCVN: >> + DPF(D_INFO, >> + >> "\nRCVN[%02d:%02d]", >> + channel, rank); >> + break; >> + case WDQS: >> + DPF(D_INFO, >> + >> "\nWDQS[%02d:%02d]", >> + channel, rank); >> + break; >> + case WDQX: >> + DPF(D_INFO, >> + >> "\nWDQx[%02d:%02d]", >> + channel, rank); >> + break; >> + case RDQS: >> + DPF(D_INFO, >> + >> "\nRDQS[%02d:%02d]", >> + channel, rank); >> + break; >> + case VREF: >> + DPF(D_INFO, >> + >> "\nVREF[%02d:%02d]", >> + channel, rank); >> + break; >> + case WCMD: >> + DPF(D_INFO, >> + >> "\nWCMD[%02d:%02d]", >> + channel, rank); >> + break; >> + case WCTL: >> + DPF(D_INFO, >> + >> "\nWCTL[%02d:%02d]", >> + channel, rank); >> + break; >> + case WCLK: >> + DPF(D_INFO, >> + >> "\nWCLK[%02d:%02d]", >> + channel, rank); >> + break; >> + default: >> + break; >> + } >> + >> + for (bl = 0; >> + bl < (NUM_BYTE_LANES / >> bl_divisor); >> + bl++) { >> + switch (algo) { >> + case RCVN: >> + DPF(D_INFO, >> + " %03d", >> + >> get_rcvn(channel, rank, bl)); >> + break; >> + case WDQS: >> + DPF(D_INFO, >> + " %03d", >> + >> get_wdqs(channel, rank, bl)); >> + break; >> + case WDQX: >> + DPF(D_INFO, >> + " %03d", >> + >> get_wdq(channel, rank, bl)); >> + break; >> + case RDQS: >> + DPF(D_INFO, >> + " %03d", >> + >> get_rdqs(channel, rank, bl)); >> + break; >> + case VREF: >> + DPF(D_INFO, >> + " %03d", >> + >> get_vref(channel, bl)); >> + break; >> + case WCMD: >> + DPF(D_INFO, >> + " %03d", >> + >> get_wcmd(channel)); >> + break; >> + case WCTL: >> + DPF(D_INFO, >> + " %03d", >> + >> get_wctl(channel, rank)); >> + break; >> + case WCLK: >> + DPF(D_INFO, >> + " %03d", >> + >> get_wclk(channel, rank)); >> + break; >> + default: >> + break; >> + } >> + } >> + } >> + } >> + } >> + } >> + } >> + >> + DPF(D_INFO, "\n---------------------------"); >> + DPF(D_INFO, "\n"); >> +} >> diff --git a/arch/x86/cpu/quark/mrc_util.h b/arch/x86/cpu/quark/mrc_util.h >> new file mode 100644 >> index 0000000..edbe219 >> --- /dev/null >> +++ b/arch/x86/cpu/quark/mrc_util.h >> @@ -0,0 +1,153 @@ >> +/* >> + * Copyright (C) 2013, Intel Corporation >> + * Copyright (C) 2015, Bin Meng <bmeng...@gmail.com> >> + * >> + * Ported from Intel released Quark UEFI BIOS >> + * QuarkSocPkg/QuarkNorthCluster/MemoryInit/Pei/ >> + * >> + * SPDX-License-Identifier: Intel >> + */ >> + >> +#ifndef _MRC_UTIL_H_ >> +#define _MRC_UTIL_H_ >> + >> +/* Turn on this macro to enable MRC debugging output */ >> +#undef MRC_DEBUG >> + >> +/* MRC Debug Support */ >> +#define DPF debug_cond >> + >> +/* debug print type */ >> + >> +#ifdef MRC_DEBUG >> +#define D_ERROR 0x0001 >> +#define D_INFO 0x0002 >> +#define D_REGRD 0x0004 >> +#define D_REGWR 0x0008 >> +#define D_FCALL 0x0010 >> +#define D_TRN 0x0020 >> +#define D_TIME 0x0040 >> +#else >> +#define D_ERROR 0 >> +#define D_INFO 0 >> +#define D_REGRD 0 >> +#define D_REGWR 0 >> +#define D_FCALL 0 >> +#define D_TRN 0 >> +#define D_TIME 0 >> +#endif >> + >> +#define ENTERFN(...) debug_cond(D_FCALL, "<%s>\n", __func__) >> +#define LEAVEFN(...) debug_cond(D_FCALL, "</%s>\n", __func__) >> +#define REPORTFN(...) debug_cond(D_FCALL, "<%s/>\n", __func__) >> + >> +/* Generic Register Bits */ >> +#define BIT0 0x00000001 >> +#define BIT1 0x00000002 >> +#define BIT2 0x00000004 >> +#define BIT3 0x00000008 >> +#define BIT4 0x00000010 >> +#define BIT5 0x00000020 >> +#define BIT6 0x00000040 >> +#define BIT7 0x00000080 >> +#define BIT8 0x00000100 >> +#define BIT9 0x00000200 >> +#define BIT10 0x00000400 >> +#define BIT11 0x00000800 >> +#define BIT12 0x00001000 >> +#define BIT13 0x00002000 >> +#define BIT14 0x00004000 >> +#define BIT15 0x00008000 >> +#define BIT16 0x00010000 >> +#define BIT17 0x00020000 >> +#define BIT18 0x00040000 >> +#define BIT19 0x00080000 >> +#define BIT20 0x00100000 >> +#define BIT21 0x00200000 >> +#define BIT22 0x00400000 >> +#define BIT23 0x00800000 >> +#define BIT24 0x01000000 >> +#define BIT25 0x02000000 >> +#define BIT26 0x04000000 >> +#define BIT27 0x08000000 >> +#define BIT28 0x10000000 >> +#define BIT29 0x20000000 >> +#define BIT30 0x40000000 >> +#define BIT31 0x80000000 >> + >> +/* Message Bus Port */ >> +#define MEM_CTLR 0x01 >> +#define HOST_BRIDGE 0x03 >> +#define MEM_MGR 0x05 >> +#define HTE 0x11 >> +#define DDRPHY 0x12 >> + >> +/* number of sample points */ >> +#define SAMPLE_CNT 3 >> +/* number of PIs to increment per sample */ >> +#define SAMPLE_DLY 26 >> + >> +enum { >> + /* indicates to decrease delays when looking for edge */ >> + BACKWARD, >> + /* indicates to increase delays when looking for edge */ >> + FORWARD >> +}; >> + >> +enum { >> + RCVN, >> + WDQS, >> + WDQX, >> + RDQS, >> + VREF, >> + WCMD, >> + WCTL, >> + WCLK, >> + MAX_ALGOS, >> +}; >> + >> +void mrc_write_mask(u32 unit, u32 addr, u32 data, u32 mask); >> +void mrc_alt_write_mask(u32 unit, u32 addr, u32 data, u32 mask); >> +void mrc_post_code(uint8_t major, uint8_t minor); >> +void delay_n(uint32_t ns); >> +void delay_u(uint32_t ms); >> +void select_mem_mgr(void); >> +void select_hte(void); >> +void dram_init_command(uint32_t data); >> +void dram_wake_command(void); >> +void training_message(uint8_t channel, uint8_t rank, uint8_t byte_lane); >> + >> +void set_rcvn(uint8_t channel, uint8_t rank, >> + uint8_t byte_lane, uint32_t pi_count); >> +uint32_t get_rcvn(uint8_t channel, uint8_t rank, uint8_t byte_lane); >> +void set_rdqs(uint8_t channel, uint8_t rank, >> + uint8_t byte_lane, uint32_t pi_count); >> +uint32_t get_rdqs(uint8_t channel, uint8_t rank, uint8_t byte_lane); >> +void set_wdqs(uint8_t channel, uint8_t rank, >> + uint8_t byte_lane, uint32_t pi_count); >> +uint32_t get_wdqs(uint8_t channel, uint8_t rank, uint8_t byte_lane); >> +void set_wdq(uint8_t channel, uint8_t rank, >> + uint8_t byte_lane, uint32_t pi_count); >> +uint32_t get_wdq(uint8_t channel, uint8_t rank, uint8_t byte_lane); >> +void set_wcmd(uint8_t channel, uint32_t pi_count); >> +uint32_t get_wcmd(uint8_t channel); >> +void set_wclk(uint8_t channel, uint8_t rank, uint32_t pi_count); >> +uint32_t get_wclk(uint8_t channel, uint8_t rank); >> +void set_wctl(uint8_t channel, uint8_t rank, uint32_t pi_count); >> +uint32_t get_wctl(uint8_t channel, uint8_t rank); >> +void set_vref(uint8_t channel, uint8_t byte_lane, uint32_t setting); >> +uint32_t get_vref(uint8_t channel, uint8_t byte_lane); >> + >> +uint32_t get_addr(uint8_t channel, uint8_t rank); >> +uint32_t sample_dqs(struct mrc_params *mrc_params, uint8_t channel, >> + uint8_t rank, bool rcvn); >> +void find_rising_edge(struct mrc_params *mrc_params, uint32_t delay[], >> + uint8_t channel, uint8_t rank, bool rcvn); >> +uint32_t byte_lane_mask(struct mrc_params *mrc_params); >> +uint32_t check_rw_coarse(struct mrc_params *mrc_params, uint32_t address); >> +uint32_t check_bls_ex(struct mrc_params *mrc_params, uint32_t address); >> +void lfsr32(uint32_t *lfsr_ptr); >> +void clear_pointers(void); >> +void print_timings(struct mrc_params *mrc_params); > > If these are all truly exported, can we please put the function > comments here in the header file? No, they are only used internally by MRC. >> + >> +#endif /* _MRC_UTIL_H_ */ >> -- >> 1.8.2.1 >> > > Regards, > Simon Regards, Bin _______________________________________________ U-Boot mailing list U-Boot@lists.denx.de http://lists.denx.de/mailman/listinfo/u-boot