[RFC PATCH v4 05/10] fadump: Convert firmware-assisted cpu state dump data into elf notes.
From: Mahesh Salgaonkar mah...@linux.vnet.ibm.com When registered for firmware assisted dump on powerpc, firmware preserves the registers for the active CPUs during a system crash. This patch reads the cpu register data stored in Firmware-assisted dump format (except for crashing cpu) and converts it into elf notes and updates the PT_NOTE program header accordingly. The exact register state for crashing cpu is saved to fadump crash info structure in scratch area during crash_fadump() and read during second kernel boot. Change in v4: - Fixes a issue where memblock_free() is invoked from build_cpu_notes() function during error_out path. Invoke cpu_notes_buf_free() in error_out path instead of memblock_free(). Change in v2: - Moved the crash_fadump() invocation from generic code to panic notifier. - Introduced cpu_notes_buf_alloc() function to allocate cpu notes buffer using get_free_pages(). The reason is, with the use of subsys_initcall the setup_fadump() is now called after mem_init(). Hence use of get_free_pages() to allocate memory is more approriate then using memblock_alloc(). Signed-off-by: Mahesh Salgaonkar mah...@linux.vnet.ibm.com --- arch/powerpc/include/asm/fadump.h | 43 + arch/powerpc/kernel/fadump.c | 312 arch/powerpc/kernel/setup-common.c |8 + arch/powerpc/kernel/traps.c|5 + 4 files changed, 366 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/include/asm/fadump.h b/arch/powerpc/include/asm/fadump.h index ced923a..0c14097 100644 --- a/arch/powerpc/include/asm/fadump.h +++ b/arch/powerpc/include/asm/fadump.h @@ -59,6 +59,18 @@ /* Dump status flag */ #define FADUMP_ERROR_FLAG 0x2000 +#define FADUMP_CPU_ID_MASK ((1UL 32) - 1) + +#define CPU_UNKNOWN(~((u32)0)) + +/* Utility macros */ +#define SKIP_TO_NEXT_CPU(reg_entry)\ +({ \ + while (reg_entry-reg_id != REG_ID(CPUEND)) \ + reg_entry++;\ + reg_entry++;\ +}) + /* Kernel Dump section info */ struct fadump_section { u32 request_flag; @@ -113,6 +125,9 @@ struct fw_dump { unsigned long reserve_bootvar; unsigned long fadumphdr_addr; + unsigned long cpu_notes_buf; + unsigned long cpu_notes_buf_size; + int ibm_configure_kernel_dump; unsigned long fadump_enabled:1; @@ -137,13 +152,40 @@ static inline u64 str_to_u64(const char *str) return val; } #define STR_TO_HEX(x) str_to_u64(x) +#define REG_ID(x) str_to_u64(x) #define FADUMP_CRASH_INFO_MAGICSTR_TO_HEX(FADMPINF) +#define REGSAVE_AREA_MAGIC STR_TO_HEX(REGSAVE) + +/* The firmware-assisted dump format. + * + * The register save area is an area in the partition's memory used to preserve + * the register contents (CPU state data) for the active CPUs during a firmware + * assisted dump. The dump format contains register save area header followed + * by register entries. Each list of registers for a CPU starts with + * CPUSTRT and ends with CPUEND. + */ + +/* Register save area header. */ +struct fadump_reg_save_area_header { + u64 magic_number; + u32 version; + u32 num_cpu_offset; +}; + +/* Register entry. */ +struct fadump_reg_entry { + u64 reg_id; + u64 reg_value; +}; /* fadump crash info structure */ struct fadump_crash_info_header { u64 magic_number; u64 elfcorehdr_addr; + u32 crashing_cpu; + struct pt_regs regs; + struct cpumask cpu_online_mask; }; /* Crash memory ranges */ @@ -159,6 +201,7 @@ extern int early_init_dt_scan_fw_dump(unsigned long node, extern int fadump_reserve_mem(void); extern int setup_fadump(void); extern int is_fadump_active(void); +extern void crash_fadump(struct pt_regs *, const char *); #else /* CONFIG_FA_DUMP */ static inline int is_fadump_active(void) { return 0; } #endif diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c index bbbda82..70d6287 100644 --- a/arch/powerpc/kernel/fadump.c +++ b/arch/powerpc/kernel/fadump.c @@ -244,6 +244,7 @@ static unsigned long get_dump_area_size(void) size += fw_dump.boot_memory_size; size += sizeof(struct fadump_crash_info_header); size += sizeof(struct elfhdr); /* ELF core header.*/ + size += sizeof(struct elf_phdr); /* place holder for cpu notes */ /* Program headers for crash memory regions. */ size += sizeof(struct elf_phdr) * (memblock_num_regions(memory) + 2); @@ -397,6 +398,283 @@ static void register_fw_dump(struct fadump_mem_struct *fdm) } } +void crash_fadump(struct pt_regs *regs, const char *str) +{ + struct fadump_crash_info_header *fdh = NULL; + + if
[RFC PATCH v4 03/10] fadump: Register for firmware assisted dump.
From: Mahesh Salgaonkar mah...@linux.vnet.ibm.com This patch registers for firmware-assisted dump using rtas token ibm,configure-kernel-dump. During registration firmware is informed about the reserved area where it saves the CPU state data, HPTE table and contents of RMR region at the time of kernel crash. Apart from this, firmware also preserves the contents of entire partition memory even if it is not specified during registration. This patch also populates sysfs files under /sys/kernel to display fadump status and reserved memory regions. Change in v3: - Re-factored the implementation to work with kdump service start/stop. Introduce fadump_registered sysfs control file which will be used by kdump init scripts to start/stop firmware assisted dump. echo 1 to /sys/kernel/fadump_registered file for fadump registration and echo 0 to /sys/kernel/fadump_registered file for fadump un-registration. - Introduced the locking mechanism to handle simultaneous writes to /sys/kernel/fadump_registered file. Change in v2: - Removed few debug print statements. - Moved the setup_fadump() call from setup_system() and now calling it subsys_initcall. - Moved fadump_region attribute under debugfs. - Clear the TCE entries if firmware assisted dump is active. Signed-off-by: Mahesh Salgaonkar mah...@linux.vnet.ibm.com --- arch/powerpc/include/asm/fadump.h | 57 ++ arch/powerpc/kernel/fadump.c | 352 + arch/powerpc/kernel/iommu.c |8 + arch/powerpc/mm/hash_utils_64.c | 11 + 4 files changed, 424 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/include/asm/fadump.h b/arch/powerpc/include/asm/fadump.h index 0b040c1..3b2f8cc 100644 --- a/arch/powerpc/include/asm/fadump.h +++ b/arch/powerpc/include/asm/fadump.h @@ -42,6 +42,58 @@ #define FADUMP_HPTE_REGION 0x0002 #define FADUMP_REAL_MODE_REGION0x0011 +/* Dump request flag */ +#define FADUMP_REQUEST_FLAG0x0001 + +/* FAD commands */ +#define FADUMP_REGISTER1 +#define FADUMP_UNREGISTER 2 +#define FADUMP_INVALIDATE 3 + +/* Kernel Dump section info */ +struct fadump_section { + u32 request_flag; + u16 source_data_type; + u16 error_flags; + u64 source_address; + u64 source_len; + u64 bytes_dumped; + u64 destination_address; +}; + +/* ibm,configure-kernel-dump header. */ +struct fadump_section_header { + u32 dump_format_version; + u16 dump_num_sections; + u16 dump_status_flag; + u32 offset_first_dump_section; + + /* Fields for disk dump option. */ + u32 dd_block_size; + u64 dd_block_offset; + u64 dd_num_blocks; + u32 dd_offset_disk_path; + + /* Maximum time allowed to prevent an automatic dump-reboot. */ + u32 max_time_auto; +}; + +/* + * Firmware Assisted dump memory structure. This structure is required for + * registering future kernel dump with power firmware through rtas call. + * + * No disk dump option. Hence disk dump path string section is not included. + */ +struct fadump_mem_struct { + struct fadump_section_headerheader; + + /* Kernel dump sections */ + struct fadump_section cpu_state_data; + struct fadump_section hpte_region; + struct fadump_section rmr_region; +}; + +/* Firmware-assisted dump configuration details. */ struct fw_dump { unsigned long cpu_state_data_size; unsigned long hpte_region_size; @@ -56,10 +108,15 @@ struct fw_dump { unsigned long fadump_enabled:1; unsigned long fadump_supported:1; unsigned long dump_active:1; + unsigned long dump_registered:1; }; extern int early_init_dt_scan_fw_dump(unsigned long node, const char *uname, int depth, void *data); extern int fadump_reserve_mem(void); +extern int setup_fadump(void); +extern int is_fadump_active(void); +#else /* CONFIG_FA_DUMP */ +static inline int is_fadump_active(void) { return 0; } #endif #endif diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c index 05dffc0..ed38f86 100644 --- a/arch/powerpc/kernel/fadump.c +++ b/arch/powerpc/kernel/fadump.c @@ -29,6 +29,9 @@ #include linux/string.h #include linux/memblock.h +#include linux/delay.h +#include linux/debugfs.h +#include linux/seq_file.h #include asm/page.h #include asm/prom.h @@ -46,6 +49,10 @@ struct dump_section { } __packed; static struct fw_dump fw_dump; +static struct fadump_mem_struct fdm; +static const struct fadump_mem_struct *fdm_active; + +static DEFINE_MUTEX(fadump_mutex); /* Scan the Firmware Assisted dump configuration details. */ int __init early_init_dt_scan_fw_dump(unsigned long node, @@ -74,7 +81,8 @@ int __init early_init_dt_scan_fw_dump(unsigned long node, * The 'ibm,kernel-dump' rtas node is present only if there is * dump data
[RFC PATCH v4 04/10] fadump: Initialize elfcore header and add PT_LOAD program headers.
From: Mahesh Salgaonkar mah...@linux.vnet.ibm.com Build the crash memory range list by traversing through system memory during the first kernel before we register for firmware-assisted dump. After the successful dump registration, initialize the elfcore header and populate PT_LOAD program headers with crash memory ranges. The elfcore header is saved in the scratch area within the reserved memory. The scratch area starts at the end of the memory reserved for saving RMR region contents. The scratch area contains fadump crash info structure that contains magic number for fadump validation and physical address where the eflcore header can be found. This structure will also be used to pass some important crash info data to the second kernel which will help second kernel to populate ELF core header with correct data before it gets exported through /proc/vmcore. Since the firmware preserves the entire partition memory at the time of crash the contents of the scratch area will be preserved till second kernel boot. NOTE: The current design implementation does not address a possibility of introducing additional fields (in future) to this structure without affecting compatibility. It's on TODO list to come up with better approach to address this. Reserved dump area start = +-+ | CPU state dump data| +-+ | HPTE region data | +-+ | RMR region data| Scratch area start = +-+ | fadump crash info structure { | | magic nummber | +--| elfcorehdr_addr | | | } | + +-+ | ELF core header| Reserved dump area end = +-+ Change in v4: - Move the init_elfcore_header() function and 'memblock_num_regions' macro from generic code to power specific code as these are used only by firmware assisted dump implementation which is power specific feature. Signed-off-by: Mahesh Salgaonkar mah...@linux.vnet.ibm.com --- arch/powerpc/include/asm/fadump.h | 43 +++ arch/powerpc/kernel/fadump.c | 235 + 2 files changed, 276 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/include/asm/fadump.h b/arch/powerpc/include/asm/fadump.h index 3b2f8cc..ced923a 100644 --- a/arch/powerpc/include/asm/fadump.h +++ b/arch/powerpc/include/asm/fadump.h @@ -37,6 +37,12 @@ */ #define MIN_BOOT_MEM ((RMR_END (0x1UL 28)) ? (0x1UL 28) : RMR_END) +#define memblock_num_regions(memblock_type)(memblock.memblock_type.cnt) + +#ifndef ELF_CORE_EFLAGS +#define ELF_CORE_EFLAGS 0 +#endif + /* Firmware provided dump sections */ #define FADUMP_CPU_STATE_DATA 0x0001 #define FADUMP_HPTE_REGION 0x0002 @@ -50,6 +56,9 @@ #define FADUMP_UNREGISTER 2 #define FADUMP_INVALIDATE 3 +/* Dump status flag */ +#define FADUMP_ERROR_FLAG 0x2000 + /* Kernel Dump section info */ struct fadump_section { u32 request_flag; @@ -103,6 +112,7 @@ struct fw_dump { /* cmd line option during boot */ unsigned long reserve_bootvar; + unsigned long fadumphdr_addr; int ibm_configure_kernel_dump; unsigned long fadump_enabled:1; @@ -111,6 +121,39 @@ struct fw_dump { unsigned long dump_registered:1; }; +/* + * Copy the ascii values for first 8 characters from a string into u64 + * variable at their respective indexes. + * e.g. + * The string FADMPINF will be converted into 0x4641444d50494e46 + */ +static inline u64 str_to_u64(const char *str) +{ + u64 val = 0; + int i; + + for (i = 0; i sizeof(val); i++) + val = (*str) ? (val 8) | *str++ : val 8; + return val; +} +#define STR_TO_HEX(x) str_to_u64(x) + +#define FADUMP_CRASH_INFO_MAGICSTR_TO_HEX(FADMPINF) + +/* fadump crash info structure */ +struct fadump_crash_info_header { + u64 magic_number; + u64 elfcorehdr_addr; +}; + +/* Crash memory ranges */ +#define INIT_CRASHMEM_RANGES (INIT_MEMBLOCK_REGIONS + 2) + +struct fad_crash_memory_ranges { + unsigned long long base; + unsigned long long size; +}; + extern int early_init_dt_scan_fw_dump(unsigned long node, const char *uname, int depth, void *data); extern int fadump_reserve_mem(void); diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c index ed38f86..bbbda82 100644 --- a/arch/powerpc/kernel/fadump.c
[RFC PATCH v4 06/10] fadump: Add PT_NOTE program header for vmcoreinfo
From: Mahesh Salgaonkar mah...@linux.vnet.ibm.com Introduce a PT_NOTE program header that points to physical address of vmcoreinfo_note buffer declared in kernel/kexec.c. The vmcoreinfo note buffer is populated during crash_fadump() at the time of system crash. Signed-off-by: Mahesh Salgaonkar mah...@linux.vnet.ibm.com --- arch/powerpc/kernel/fadump.c | 29 + 1 files changed, 29 insertions(+), 0 deletions(-) diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c index 70d6287..e68ee3a 100644 --- a/arch/powerpc/kernel/fadump.c +++ b/arch/powerpc/kernel/fadump.c @@ -816,6 +816,19 @@ static void setup_crash_memory_ranges(void) } } +/* + * If the given physical address falls within the boot memory region then + * return the relocated address that points to the dump region reserved + * for saving initial boot memory contents. + */ +static inline unsigned long relocate(unsigned long paddr) +{ + if (paddr RMR_START paddr fw_dump.boot_memory_size) + return fdm.rmr_region.destination_address + paddr; + else + return paddr; +} + static int create_elfcore_headers(char *bufp) { struct elfhdr *elf; @@ -847,6 +860,22 @@ static int create_elfcore_headers(char *bufp) (elf-e_phnum)++; + /* setup ELF PT_NOTE for vmcoreinfo */ + phdr = (struct elf_phdr *)bufp; + bufp += sizeof(struct elf_phdr); + phdr-p_type= PT_NOTE; + phdr-p_flags = 0; + phdr-p_vaddr = 0; + phdr-p_align = 0; + + phdr-p_paddr = relocate(paddr_vmcoreinfo_note()); + phdr-p_offset = phdr-p_paddr; + phdr-p_memsz = vmcoreinfo_max_size; + phdr-p_filesz = vmcoreinfo_max_size; + + /* Increment number of program headers. */ + (elf-e_phnum)++; + /* setup PT_LOAD sections. */ for (i = 0; i crash_mem_ranges; i++) { ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[RFC PATCH v4 08/10] fadump: Invalidate registration and release reserved memory for general use.
From: Mahesh Salgaonkar mah...@linux.vnet.ibm.com This patch introduces an sysfs interface '/sys/kernel/fadump_release_mem' to invalidate the last fadump registration, invalidate '/proc/vmcore', release the reserved memory for general use and re-register for future kernel dump. Once the dump is copied to the disk, the userspace tool will echo 1 to '/sys/kernel/fadump_release_mem'. Release the reserved memory region excluding the size of the memory required for future kernel dump registration. Change in v3: - Syncronize the fadump invalidation step to handle simultaneous writes to /sys/kernel/fadump_release_mem. Change in v2: - Introduced cpu_notes_buf_free() function to free memory allocated for cpu notes buffer. Signed-off-by: Mahesh Salgaonkar mah...@linux.vnet.ibm.com --- arch/powerpc/include/asm/fadump.h |3 + arch/powerpc/kernel/fadump.c | 157 - 2 files changed, 156 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/include/asm/fadump.h b/arch/powerpc/include/asm/fadump.h index 0c14097..8ddfbc7 100644 --- a/arch/powerpc/include/asm/fadump.h +++ b/arch/powerpc/include/asm/fadump.h @@ -202,6 +202,9 @@ extern int fadump_reserve_mem(void); extern int setup_fadump(void); extern int is_fadump_active(void); extern void crash_fadump(struct pt_regs *, const char *); +extern void fadump_cleanup(void); + +extern void vmcore_cleanup(void); #else /* CONFIG_FA_DUMP */ static inline int is_fadump_active(void) { return 0; } #endif diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c index e68ee3a..b449b55 100644 --- a/arch/powerpc/kernel/fadump.c +++ b/arch/powerpc/kernel/fadump.c @@ -33,6 +33,8 @@ #include linux/debugfs.h #include linux/seq_file.h #include linux/crash_dump.h +#include linux/kobject.h +#include linux/sysfs.h #include asm/page.h #include asm/prom.h @@ -986,6 +988,131 @@ static int fadump_unregister_dump(struct fadump_mem_struct *fdm) return 0; } +static int fadump_invalidate_dump(struct fadump_mem_struct *fdm) +{ + int rc = 0; + unsigned int wait_time; + + pr_debug(Invalidating firmware-assisted dump registration\n); + + /* TODO: Add upper time limit for the delay */ + do { + rc = rtas_call(fw_dump.ibm_configure_kernel_dump, 3, 1, NULL, + FADUMP_INVALIDATE, fdm, + sizeof(struct fadump_mem_struct)); + + wait_time = rtas_busy_delay_time(rc); + if (wait_time) + mdelay(wait_time); + } while (wait_time); + + if (rc) { + printk(KERN_ERR Failed to invalidate firmware-assisted dump + rgistration. unexpected error(%d).\n, rc); + return rc; + } + fw_dump.dump_active = 0; + fdm_active = NULL; + return 0; +} + +void fadump_cleanup(void) +{ + /* Invalidate the registration only if dump is active. */ + if (fw_dump.dump_active) { + init_fadump_mem_struct(fdm, + fdm_active-cpu_state_data.destination_address); + fadump_invalidate_dump(fdm); + } +} + +/* + * Release the memory that was reserved in early boot to preserve the memory + * contents. The released memory will be available for general use. + */ +static void fadump_release_memory(unsigned long begin, unsigned long end) +{ + unsigned long addr; + unsigned long ra_start, ra_end; + + ra_start = fw_dump.reserve_dump_area_start; + ra_end = ra_start + fw_dump.reserve_dump_area_size; + + for (addr = begin; addr end; addr += PAGE_SIZE) { + /* +* exclude the dump reserve area. Will reuse it for next +* fadump registration. +*/ + if (addr = ra_end ((addr + PAGE_SIZE) ra_start)) + continue; + + ClearPageReserved(pfn_to_page(addr PAGE_SHIFT)); + init_page_count(pfn_to_page(addr PAGE_SHIFT)); + free_page((unsigned long)__va(addr)); + totalram_pages++; + } +} + +static void fadump_invalidate_release_mem(void) +{ + unsigned long reserved_area_start, reserved_area_end; + unsigned long destination_address; + + mutex_lock(fadump_mutex); + if (!fw_dump.dump_active) { + mutex_unlock(fadump_mutex); + return; + } + + destination_address = fdm_active-cpu_state_data.destination_address; + fadump_cleanup(); + mutex_unlock(fadump_mutex); + + /* +* Save the current reserved memory bounds we will require them +* later for releasing the memory for general use. +*/ + reserved_area_start = fw_dump.reserve_dump_area_start; + reserved_area_end = reserved_area_start + + fw_dump.reserve_dump_area_size; + /* +* Setup reserve_dump_area_start and its size so
[RFC PATCH v4 09/10] fadump: Invalidate the fadump registration during machine shutdown.
From: Mahesh Salgaonkar mah...@linux.vnet.ibm.com If dump is active during system reboot, shutdown or halt then invalidate the fadump registration as it does not get invalidated automatically. Signed-off-by: Mahesh Salgaonkar mah...@linux.vnet.ibm.com --- arch/powerpc/kernel/setup-common.c |8 1 files changed, 8 insertions(+), 0 deletions(-) diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index ce35aaf..67e5caa 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -110,6 +110,14 @@ EXPORT_SYMBOL(ppc_do_canonicalize_irqs); /* also used by kexec */ void machine_shutdown(void) { +#ifdef CONFIG_FA_DUMP + /* +* if fadump is active, cleanup the fadump registration before we +* shutdown. +*/ + fadump_cleanup(); +#endif + if (ppc_md.machine_shutdown) ppc_md.machine_shutdown(); } ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[RFC PATCH v4 10/10] fadump: Introduce config option for firmware assisted dump feature
From: Mahesh Salgaonkar mah...@linux.vnet.ibm.com This patch introduces a new config option CONFIG_FA_DUMP for firmware assisted dump feature on Powerpc (ppc64) architecture. Signed-off-by: Mahesh Salgaonkar mah...@linux.vnet.ibm.com --- arch/powerpc/Kconfig | 13 + 1 files changed, 13 insertions(+), 0 deletions(-) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 6926b61..7ce773c 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -379,6 +379,19 @@ config PHYP_DUMP If unsure, say N +config FA_DUMP + bool Firmware-assisted dump + depends on PPC64 PPC_RTAS CRASH_DUMP + help + A robust mechanism to get reliable kernel crash dump with + assistance from firmware. This approach does not use kexec, + instead firmware assists in booting the kdump kernel + while preserving memory contents. Firmware-assisted dump + is meant to be a kdump replacement offering robustness and + speed not possible without system firmware assistance. + + If unsure, say N + config PPCBUG_NVRAM bool Enable reading PPCBUG NVRAM during boot if PPLUS || LOPEC default y if PPC_PREP ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[RFC PATCH v4 01/10] fadump: Add documentation for firmware-assisted dump.
From: Mahesh Salgaonkar mah...@linux.vnet.ibm.com Documentation for firmware-assisted dump. This document is based on the original documentation written for phyp assisted dump by Linas Vepstas and Manish Ahuja, with few changes to reflect the current implementation. Change in v3: - Modified the documentation to reflect introdunction of fadump_registered sysfs file and few minor changes. Change in v2: - Modified the documentation to reflect the change of fadump_region file under debugfs filesystem. Signed-off-by: Mahesh Salgaonkar mah...@linux.vnet.ibm.com --- Documentation/powerpc/firmware-assisted-dump.txt | 262 ++ 1 files changed, 262 insertions(+), 0 deletions(-) create mode 100644 Documentation/powerpc/firmware-assisted-dump.txt diff --git a/Documentation/powerpc/firmware-assisted-dump.txt b/Documentation/powerpc/firmware-assisted-dump.txt new file mode 100644 index 000..ba6724a --- /dev/null +++ b/Documentation/powerpc/firmware-assisted-dump.txt @@ -0,0 +1,262 @@ + + Firmware-Assisted Dump + + July 2011 + +The goal of firmware-assisted dump is to enable the dump of +a crashed system, and to do so from a fully-reset system, and +to minimize the total elapsed time until the system is back +in production use. + +As compared to kdump or other strategies, firmware-assisted +dump offers several strong, practical advantages: + +-- Unlike kdump, the system has been reset, and loaded + with a fresh copy of the kernel. In particular, + PCI and I/O devices have been reinitialized and are + in a clean, consistent state. +-- Once the dump is copied out, the memory that held the dump + is immediately available to the running kernel. A further + reboot isn't required. + +The above can only be accomplished by coordination with, +and assistance from the Power firmware. The procedure is +as follows: + +-- The first kernel registers the sections of memory with the + Power firmware for dump preservation during OS initialization. + This registered sections of memory is reserved by the first + kernel during early boot. + +-- When a system crashes, the Power firmware will save + the low memory (boot memory of size larger of 5% of system RAM + or 256MB) of RAM to a previously registered save region. It + will also save system registers, and hardware PTE's. + + NOTE: The term 'boot memory' means size of the low memory chunk + that is required for a kernel to boot successfully when + booted with restricted memory. By default, the boot memory + size will be calculated to larger of 5% of system RAM or + 256MB. Alternatively, user can also specify boot memory + size through boot parameter 'fadump_reserve_mem=' which + will override the default calculated size. + +-- After the low memory (boot memory) area has been saved, the + firmware will reset PCI and other hardware state. It will + *not* clear the RAM. It will then launch the bootloader, as + normal. + +-- The freshly booted kernel will notice that there is a new + node (ibm,dump-kernel) in the device tree, indicating that + there is crash data available from a previous boot. During + the early boot OS will reserve rest of the memory above + boot memory size effectively booting with restricted memory + size. This will make sure that the second kernel will not + touch any of the dump memory area. + +-- Userspace tools will read /proc/vmcore to obtain the contents + of memory, which holds the previous crashed kernel dump in ELF + format. The userspace tools may copy this info to disk, or + network, nas, san, iscsi, etc. as desired. + +-- Once the userspace tool is done saving dump, it will echo + '1' to /sys/kernel/fadump_release_mem to release the reserved + memory back to general use, except the memory required for + next firmware-assisted dump registration. + + e.g. + # echo 1 /sys/kernel/fadump_release_mem + +Please note that the firmware-assisted dump feature +is only available on Power6 and above systems with recent +firmware versions. + +Implementation details: +-- + +During boot, a check is made to see if firmware supports +this feature on that particular machine. If it does, then +we check to see if an active dump is waiting for us. If yes +then everything but boot memory size of RAM is reserved during +early boot (See Fig. 2). This area is released once we collect a +dump from user land scripts (kdump scripts) that are run. If +there is dump data, then the /sys/kernel/fadump_release_mem +file is created, and the reserved memory is held. + +If there is no waiting dump data, then only the memory required +to hold CPU state, HPTE region, boot memory dump and elfcore +header, is reserved at the top of memory (see Fig. 1). This area +is *not* released: this region will be kept permanently reserved, +so that it can act as a
[PATCH][v2] powerpc/usb: fix type cast for address of ioremap to compatible with 64-bit
Below are codes for accessing usb sysif_regs in driver: usb_sys_regs = (struct usb_sys_interface *) ((u32)dr_regs + USB_DR_SYS_OFFSET); these codes work in 32-bit, but in 64-bit, use u32 to type cast the address of ioremap is not right, and accessing members of 'usb_sys_regs' will cause call trace, so use (void *) for both 32-bit and 64-bit. Signed-off-by: Shaohui Xie shaohui@freescale.com --- changes for v2: 1. use (void *) instead of unsigned long and the double cast according to Timur's comment. drivers/usb/gadget/fsl_udc_core.c |3 +-- 1 files changed, 1 insertions(+), 2 deletions(-) diff --git a/drivers/usb/gadget/fsl_udc_core.c b/drivers/usb/gadget/fsl_udc_core.c index c81fbad..398c5e6 100644 --- a/drivers/usb/gadget/fsl_udc_core.c +++ b/drivers/usb/gadget/fsl_udc_core.c @@ -2497,8 +2497,7 @@ static int __init fsl_udc_probe(struct platform_device *pdev) #ifndef CONFIG_ARCH_MXC if (pdata-have_sysif_regs) - usb_sys_regs = (struct usb_sys_interface *) - ((u32)dr_regs + USB_DR_SYS_OFFSET); + usb_sys_regs = (void *)dr_regs + USB_DR_SYS_OFFSET; #endif /* Initialize USB clocks */ -- 1.6.4 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[RFC PATCH v4 02/10] fadump: Reserve the memory for firmware assisted dump.
From: Mahesh Salgaonkar mah...@linux.vnet.ibm.com Reserve the memory during early boot to preserve CPU state data, HPTE region and RMR region data in case of kernel crash. At the time of crash, powerpc firmware will store CPU state data, HPTE region data and move RMR region data to the reserved memory area. If the firmware-assisted dump fails to reserve the memory, then fallback to existing kexec-based kdump. The most of the code implementation to reserve memory has been adapted from phyp assisted dump implementation written by Linas Vepstas and Manish Ahuja Change in v2: - Modified to use standard pr_debug() macro. - Modified early_init_dt_scan_fw_dump() to get the size of ibm,configure-kernel-dump-sizes property and use it to iterate through an array of dump sections. - Introduced boot option 'fadump_reserve_mem=' to let user specify the fadump boot memory to be reserved. Signed-off-by: Mahesh Salgaonkar mah...@linux.vnet.ibm.com --- arch/powerpc/include/asm/fadump.h | 65 ++ arch/powerpc/kernel/Makefile |1 arch/powerpc/kernel/fadump.c | 250 + arch/powerpc/kernel/prom.c| 15 ++ 4 files changed, 330 insertions(+), 1 deletions(-) create mode 100644 arch/powerpc/include/asm/fadump.h create mode 100644 arch/powerpc/kernel/fadump.c diff --git a/arch/powerpc/include/asm/fadump.h b/arch/powerpc/include/asm/fadump.h new file mode 100644 index 000..0b040c1 --- /dev/null +++ b/arch/powerpc/include/asm/fadump.h @@ -0,0 +1,65 @@ +/* + * Firmware Assisted dump header file. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright 2011 IBM Corporation + * Author: Mahesh Salgaonkar mah...@linux.vnet.ibm.com + */ + +#ifndef __PPC64_FA_DUMP_H__ +#define __PPC64_FA_DUMP_H__ + +#ifdef CONFIG_FA_DUMP + +/* + * The RMR region will be saved for later dumping when kernel crashes. + * Set this to 256MB. + */ +#define RMR_START 0x0 +#define RMR_END(ppc64_rma_size) + +/* + * On some Power systems where RMO is 128MB, it still requires minimum of + * 256MB for kernel to boot successfully. + */ +#define MIN_BOOT_MEM ((RMR_END (0x1UL 28)) ? (0x1UL 28) : RMR_END) + +/* Firmware provided dump sections */ +#define FADUMP_CPU_STATE_DATA 0x0001 +#define FADUMP_HPTE_REGION 0x0002 +#define FADUMP_REAL_MODE_REGION0x0011 + +struct fw_dump { + unsigned long cpu_state_data_size; + unsigned long hpte_region_size; + unsigned long boot_memory_size; + unsigned long reserve_dump_area_start; + unsigned long reserve_dump_area_size; + /* cmd line option during boot */ + unsigned long reserve_bootvar; + + int ibm_configure_kernel_dump; + + unsigned long fadump_enabled:1; + unsigned long fadump_supported:1; + unsigned long dump_active:1; +}; + +extern int early_init_dt_scan_fw_dump(unsigned long node, + const char *uname, int depth, void *data); +extern int fadump_reserve_mem(void); +#endif +#endif diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index ce4f7f1..59b549c 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -60,6 +60,7 @@ obj-$(CONFIG_IBMVIO) += vio.o obj-$(CONFIG_IBMEBUS) += ibmebus.o obj-$(CONFIG_GENERIC_TBSYNC) += smp-tbsync.o obj-$(CONFIG_CRASH_DUMP) += crash_dump.o +obj-$(CONFIG_FA_DUMP) += fadump.o ifeq ($(CONFIG_PPC32),y) obj-$(CONFIG_E500) += idle_e500.o endif diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c new file mode 100644 index 000..05dffc0 --- /dev/null +++ b/arch/powerpc/kernel/fadump.c @@ -0,0 +1,250 @@ +/* + * Firmware Assisted dump: A robust mechanism to get reliable kernel crash + * dump with assistance from firmware. This approach does not use kexec, + * instead firmware assists in booting the kdump kernel while preserving + * memory contents. The most of the code implementation has been adapted + * from phyp assisted dump implementation written by Linas Vepstas and + * Manish Ahuja + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either
[RFC PATCH v4 07/10] fadump: Introduce cleanup routine to invalidate /proc/vmcore.
From: Mahesh Salgaonkar mah...@linux.vnet.ibm.com With the firmware-assisted dump support we don't require a reboot when we are in second kernel after crash. The second kernel after crash is a normal kernel boot and has knowledge about entire system RAM with the page tables initialized for entire system RAM. Hence once the dump is saved to disk, we can just release the reserved memory area for general use and continue with second kernel as production kernel. Hence when we release the reserved memory that contains dump data, the '/proc/vmcore' will not be valid anymore. Hence this patch introduces a cleanup routine that invalidates and removes the /proc/vmcore file. This routine will be invoked before we release the reserved dump memory area. Signed-off-by: Mahesh Salgaonkar mah...@linux.vnet.ibm.com --- fs/proc/vmcore.c | 23 +++ 1 files changed, 23 insertions(+), 0 deletions(-) diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c index cd99bf5..fae5526 100644 --- a/fs/proc/vmcore.c +++ b/fs/proc/vmcore.c @@ -699,3 +699,26 @@ static int __init vmcore_init(void) return 0; } module_init(vmcore_init) + +/* Cleanup function for vmcore module. */ +void vmcore_cleanup(void) +{ + struct list_head *pos, *next; + + if (proc_vmcore) { + remove_proc_entry(proc_vmcore-name, proc_vmcore-parent); + proc_vmcore = NULL; + } + + /* clear the vmcore list. */ + list_for_each_safe(pos, next, vmcore_list) { + struct vmcore *m; + + m = list_entry(pos, struct vmcore, list); + list_del(m-list); + kfree(m); + } + kfree(elfcorebuf); + elfcorebuf = NULL; +} +EXPORT_SYMBOL_GPL(vmcore_cleanup); ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: New location of powerpc git tree
Hi Ben, On Mon, 07 Nov 2011 10:29:10 +1100 Benjamin Herrenschmidt b...@kernel.crashing.org wrote: I've moved the powerpc git tree back to kernel.org. The URL should be back to normal for users: git://git.kernel.org/pub/scm/linux/kernel/git/benh/powerpc.git OK, I have switched back to that, now. -- Cheers, Stephen Rothwells...@canb.auug.org.au http://www.canb.auug.org.au/~sfr/ pgpndqlV2U5u8.pgp Description: PGP signature ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH 4/7] powerpc/85xx: add support to JOG feature using cpufreq interface
On Fri, Nov 04, 2011 at 02:42:54PM -0500, Scott Wood wrote: On 11/04/2011 07:36 AM, Zhao Chenhui wrote: From: Li Yang le...@freescale.com Some 85xx silicons like MPC8536 and P1022 has the JOG PM feature. The patch adds the support to change CPU frequency using the standard cpufreq interface. Add the all PLL ratio core support. The ratio CORE to CCB can 1:1, 1.5, 2:1, 2.5:1, 3:1, 3.5:1 and 4:1 Signed-off-by: Dave Liu dave...@freescale.com Signed-off-by: Li Yang le...@freescale.com Signed-off-by: Jerry Huang chang-ming.hu...@freescale.com Signed-off-by: Zhao Chenhui chenhui.z...@freescale.com --- arch/powerpc/platforms/85xx/Makefile |1 + arch/powerpc/platforms/85xx/cpufreq.c | 255 + arch/powerpc/platforms/Kconfig|8 + 3 files changed, 264 insertions(+), 0 deletions(-) create mode 100644 arch/powerpc/platforms/85xx/cpufreq.c Please name this something more specific, such as 85xx/cpufreq-jog.c Other 85xx/qoriq chips, such as p4080, have different mechanisms for updating CPU frequency. +static struct cpufreq_frequency_table mpc85xx_freqs[] = { + {2, 0}, + {3, 0}, + {4, 0}, + {5, 0}, + {6, 0}, + {7, 0}, + {8, 0}, + {0, CPUFREQ_TABLE_END}, +}; Only p1022 can handle 1:1 (index 2). +static void set_pll(unsigned int pll, int cpu) +{ + int shift; + u32 busfreq, corefreq, val; + u32 core_spd, mask, tmp; + + tmp = in_be32(guts + PMJCR); + shift = (cpu == 1) ? CORE1_RATIO_SHIFT : CORE0_RATIO_SHIFT; + busfreq = fsl_get_sys_freq(); + val = (pll CORE_RATIO_MASK) shift; + + corefreq = ((busfreq * pll) 1); Use / 2, not 1. Same asm code, more readable. + /* must set the bit[18/19] if the requested core freq 533 MHz */ + core_spd = (cpu == 1) ? PMJCR_CORE1_SPD_MASK : PMJCR_CORE0_SPD_MASK; + if (corefreq FREQ_533MHz) + val |= core_spd; this is the cutoff for p1022 -- on mpc8536 the manual says the cutoff is 800 MHz. + mask = (cpu == 1) ? (PMJCR_CORE1_RATIO_MASK | PMJCR_CORE1_SPD_MASK) : + (PMJCR_CORE0_RATIO_MASK | PMJCR_CORE0_SPD_MASK); + tmp = ~mask; + tmp |= val; + out_be32(guts + PMJCR, tmp); clrsetbits_be32() + val = in_be32(guts + PMJCR); + out_be32(guts + POWMGTCSR, + POWMGTCSR_LOSSLESS_MASK | POWMGTCSR_JOG_MASK); setbits32() + pr_debug(PMJCR request %08x at CPU %d\n, tmp, cpu); +} + +static void verify_pll(int cpu) +{ + int shift; + u32 busfreq, pll, corefreq; + + shift = (cpu == 1) ? CORE1_RATIO_SHIFT : CORE0_RATIO_SHIFT; + busfreq = fsl_get_sys_freq(); + pll = (in_be32(guts + PORPLLSR) shift) CORE_RATIO_MASK; + + corefreq = (busfreq * pll) 1; + corefreq /= 100; + pr_debug(PORPLLSR core freq %dMHz at CPU %d\n, corefreq, cpu); +} It looks like the entire point of this function is to make a debug print... #ifdef DEBUG the contents? Or if we mark fsl_get_sys_freq() as __pure (or better, read this once at init, since it involves searching the device tree), will it all get optimized away? + /* initialize frequency table */ + pr_info(core %d frequency table:\n, policy-cpu); + for (i = 0; mpc85xx_freqs[i].frequency != CPUFREQ_TABLE_END; i++) { + mpc85xx_freqs[i].frequency = + (busfreq * mpc85xx_freqs[i].index) 1; + pr_info(%d: %dkHz\n, i, mpc85xx_freqs[i].frequency); + } This should be pr_debug. + /* the latency of a transition, the unit is ns */ + policy-cpuinfo.transition_latency = 2000; + + cur_pll = get_pll(policy-cpu); + pr_debug(current pll is at %d\n, cur_pll); + + for (i = 0; mpc85xx_freqs[i].frequency != CPUFREQ_TABLE_END; i++) { + if (mpc85xx_freqs[i].index == cur_pll) + policy-cur = mpc85xx_freqs[i].frequency; + } You could combine these loops. + /* this ensures that policy-cpuinfo_min +* and policy-cpuinfo_max are set correctly */ comment style +static int mpc85xx_cpufreq_target(struct cpufreq_policy *policy, + unsigned int target_freq, + unsigned int relation) +{ + struct cpufreq_freqs freqs; + unsigned int new; + + cpufreq_frequency_table_target(policy, + mpc85xx_freqs, + target_freq, + relation, + new); + + freqs.old = policy-cur; + freqs.new = mpc85xx_freqs[new].frequency; + freqs.cpu = policy-cpu; + + mutex_lock(mpc85xx_switch_mutex); + cpufreq_notify_transition(freqs, CPUFREQ_PRECHANGE); + + pr_info(Setting frequency for core %d to %d kHz, \ +PLL ratio is %d/2\n, +policy-cpu, +
[RFC PATCH v4 00/10] fadump: Firmware-assisted dump support for Powerpc.
Hi All, Please find the version 4 of the patchset that implements firmware-assisted dump mechanism to capture kernel crash dump for Powerpc architecture. The firmware-assisted dump is a robust mechanism to get reliable kernel crash dump with assistance from firmware. This approach does not use kexec, instead firmware assists in booting the kdump kernel while preserving memory contents. Changes in v4: -- patch 04/10: - Move the init_elfcore_header() function and 'memblock_num_regions' macro from generic code to power specific code as these are used only by firmware assisted dump implementation which is power specific feature. patch 05/10: - Fixes a issue where memblock_free() is invoked from build_cpu_notes() function during error_out path. Invoke cpu_notes_buf_free() in error_out path instead of memblock_free(). Changes in v3: - - Re-factored the implementation to work with kdump service start/stop. Introduce fadump_registered sysfs control file which will be used by kdump init scripts to start/stop firmware assisted dump. echo 1 to /sys/kernel/fadump_registered file for fadump registration and echo 0 to /sys/kernel/fadump_registered file for fadump un-registration. - Introduced the locking mechanism to handle simultaneous writes to sysfs control files fadump_registered and fadump_release_mem Affected patches are: 01/10, 03/10, 08/10. Changes in v2: - patch 01/10: - Modified the documentation to reflect the change of fadump_region file under debugfs filesystem. patch 02/10: - Modified to use standard pr_debug() macro. - Modified early_init_dt_scan_fw_dump() to get the size of ibm,configure-kernel-dump-sizes property and use it to iterate through an array of dump sections. - Introduced boot option 'fadump_reserve_mem=' to let user specify the fadump boot memory to be reserved. patch 03/10: - Removed few debug print statements. - Moved the setup_fadump() call from setup_system() and now calling it subsys_initcall. - Moved fadump_region attribute under debugfs. - Clear the TCE entries if firmware assisted dump is active. patch 05/10: - Moved the crash_fadump() invocation from generic code to panic notifier. - Introduced cpu_notes_buf_alloc() function to allocate cpu notes buffer using get_free_pages(). patch 08/10: - Introduced cpu_notes_buf_free() function to free memory allocated for cpu notes buffer. The most of the code implementation has been adapted from phyp assisted dump implementation written by Linas Vepstas and Manish Ahuja. The first patch is a documentation that talks about firmware-assisted dump mechanism, implementation details and TODO list. I have tested the patches on following system configuration: 1. LPAR on Power6 with 4GB RAM and 8 CPUs 2. LPAR on Power7 with 2GB RAM and 20 CPUs 3. LPAR on Power7 with 1TB RAM and 896 CPUs These patches cleanly apply on commit c3b92c878 in linux-2.6 git tree. Please review the patchset and let me know your comments. Thanks, -Mahesh. --- Mahesh Salgaonkar (10): fadump: Add documentation for firmware-assisted dump. fadump: Reserve the memory for firmware assisted dump. fadump: Register for firmware assisted dump. fadump: Initialize elfcore header and add PT_LOAD program headers. fadump: Convert firmware-assisted cpu state dump data into elf notes. fadump: Add PT_NOTE program header for vmcoreinfo fadump: Introduce cleanup routine to invalidate /proc/vmcore. fadump: Invalidate registration and release reserved memory for general use. fadump: Invalidate the fadump registration during machine shutdown. fadump: Introduce config option for firmware assisted dump feature Documentation/powerpc/firmware-assisted-dump.txt | 262 arch/powerpc/Kconfig | 13 arch/powerpc/include/asm/fadump.h| 211 arch/powerpc/kernel/Makefile |1 arch/powerpc/kernel/fadump.c | 1313 ++ arch/powerpc/kernel/iommu.c |8 arch/powerpc/kernel/prom.c | 15 arch/powerpc/kernel/setup-common.c | 16 arch/powerpc/kernel/traps.c |5 arch/powerpc/mm/hash_utils_64.c | 11 fs/proc/vmcore.c | 23 11 files changed, 1876 insertions(+), 2 deletions(-) create mode 100644 Documentation/powerpc/firmware-assisted-dump.txt create mode 100644 arch/powerpc/include/asm/fadump.h create mode 100644 arch/powerpc/kernel/fadump.c -- Signature ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH 6/7] fsl_pmc: Add API to enable device as wakeup event source
On Fri, Nov 04, 2011 at 07:08:24PM -0500, Tabi Timur-B04825 wrote: On Fri, Nov 4, 2011 at 7:39 AM, Zhao Chenhui chenhui.z...@freescale.com wrote: + if (!pmc_regs) { + printk(KERN_WARNING PMC is unavailable\n); Use pr_warn() and the other pr_xxx functions. + pmcdr_mask = (u32 *)of_get_property(clk_np, fsl,pmcdr-mask, NULL); The typecast is unnecessary here. + /* clear to enable clock in low power mode */ + if (enable) + clrbits32(pmc_regs-pmcdr, *pmcdr_mask); + else + setbits32(pmc_regs-pmcdr, *pmcdr_mask); You need to use be32_to_cpup() when dereferencing a pointer to a device tree property. -- Timur Tabi Linux kernel developer at Freescale Thanks. I will fix them all. -chenhui ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH 6/7] fsl_pmc: Add API to enable device as wakeup event source
On Fri, Nov 04, 2011 at 04:14:25PM -0500, Scott Wood wrote: On 11/04/2011 07:39 AM, Zhao Chenhui wrote: @@ -45,6 +46,72 @@ static int has_lossless; * code can be compatible with both 32-bit 36-bit */ extern void mpc85xx_enter_deep_sleep(u64 ccsrbar, u32 powmgtreq); +#ifdef CONFIG_FSL_PMC +/** + * pmc_enable_wake - enable OF device as wakeup event source + * @pdev: platform device affected + * @state: PM state from which device will issue wakeup events + * @enable: True to enable event generation; false to disable + * + * This enables the device as a wakeup event source, or disables it. + * + * RETURN VALUE: + * 0 is returned on success + * -EINVAL is returned if device is not supposed to wake up the system + * Error code depending on the platform is returned if both the platform and + * the native mechanism fail to enable the generation of wake-up events + */ +int pmc_enable_wake(struct platform_device *pdev, + suspend_state_t state, bool enable) pmc is too generic for a global function. If this can be either enable or disable, perhaps it should be something like mpc85xx_pmc_set_wake(). +{ + int ret = 0; + struct device_node *clk_np; + u32 *pmcdr_mask; + + if (!pmc_regs) { + printk(KERN_WARNING PMC is unavailable\n); + return -ENOMEM; + } -ENOMEM is not appropriate here, maybe -ENODEV? Should print __func__ so the user knows what's complaining. + if (enable !device_may_wakeup(pdev-dev)) + return -EINVAL; + + clk_np = of_parse_phandle(pdev-dev.of_node, clk-handle, 0); + if (!clk_np) + return -EINVAL; + + pmcdr_mask = (u32 *)of_get_property(clk_np, fsl,pmcdr-mask, NULL); + if (!pmcdr_mask) { + ret = -EINVAL; + goto out; + } + + /* clear to enable clock in low power mode */ + if (enable) + clrbits32(pmc_regs-pmcdr, *pmcdr_mask); + else + setbits32(pmc_regs-pmcdr, *pmcdr_mask); We should probably initialize PMCDR to all bits set (or at least all ones we know are valid) -- the default should be not a wakeup source. I think it should be initialized in u-boot. +/** + * pmc_enable_lossless - enable lossless ethernet in low power mode + * @enable: True to enable event generation; false to disable + */ +void pmc_enable_lossless(int enable) +{ + if (enable has_lossless) + setbits32(pmc_regs-pmcsr, PMCSR_LOSSLESS); + else + clrbits32(pmc_regs-pmcsr, PMCSR_LOSSLESS); +} +EXPORT_SYMBOL_GPL(pmc_enable_lossless); +#endif Won't we overwrite this later? -Scott Do you have any idea? -chenhui ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
suspecting ibm_newemac driver problem.
Hello All, I am using ppc460ex based custom board. using linux 2.6.30.2 kernel on this board it has ibm_newemac driver in it. I am using this board as a iSCSI target which has RAID system behind it. || |||| || n/w driver (ibm_newemac)||n/w layer ||iSCSI Target || RAID |||| || The problem i am facing is when i start iometer based i/o. I am doing i/o with following specifications. Workload type : All in one. no of outstanding i/o : 32. no of worker threads : 8. With this kind of workload n/w stucks in between. and board becomes unresponsive. If i try to ping to the board then ping fails with 100% packet loss. While debugging the issue i found out that sock-sk_state_change callback is called from the TCP layer and state of the socket is not TCP_ESTABLISHED in the iSCSI target driver. So i am assuming that this is a problem related to n/w driver. Did anyone faced similar kind of issue before? Please point me in right direction. Thanks and Regards, Harshal Shete. -- View this message in context: http://old.nabble.com/suspecting-ibm_newemac-driver-problem.-tp32788715p32788715.html Sent from the linuxppc-dev mailing list archive at Nabble.com. ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH] gpio: mpc8xxx: don't allow input-only pins to be output for MPC5121
Add a 5121-custom reject if an input-only pin is requested to be output (see 18.3.1.1 in the refman). Also, rewrite mach-specific quirk setup to consume less lines. Signed-off-by: Wolfram Sang w.s...@pengutronix.de --- drivers/gpio/gpio-mpc8xxx.c | 17 - 1 files changed, 12 insertions(+), 5 deletions(-) diff --git a/drivers/gpio/gpio-mpc8xxx.c b/drivers/gpio/gpio-mpc8xxx.c index ec3fcf0..25dc736 100644 --- a/drivers/gpio/gpio-mpc8xxx.c +++ b/drivers/gpio/gpio-mpc8xxx.c @@ -115,6 +115,14 @@ static int mpc8xxx_gpio_dir_in(struct gpio_chip *gc, unsigned int gpio) return 0; } +static int mpc5121_gpio_dir_out(struct gpio_chip *gc, unsigned int gpio, int val) +{ + /* GPIO 28..31 are input only on MPC5121 */ + if (gpio = 28) + return -EINVAL; + + return mpc8xxx_gpio_dir_out(gc, gpio, val); +} static int mpc8xxx_gpio_dir_out(struct gpio_chip *gc, unsigned int gpio, int val) { struct of_mm_gpio_chip *mm = to_of_mm_gpio_chip(gc); @@ -340,11 +348,10 @@ static void __init mpc8xxx_add_controller(struct device_node *np) mm_gc-save_regs = mpc8xxx_gpio_save_regs; gc-ngpio = MPC8XXX_GPIO_PINS; gc-direction_input = mpc8xxx_gpio_dir_in; - gc-direction_output = mpc8xxx_gpio_dir_out; - if (of_device_is_compatible(np, fsl,mpc8572-gpio)) - gc-get = mpc8572_gpio_get; - else - gc-get = mpc8xxx_gpio_get; + gc-direction_output = of_device_is_compatible(np, fsl,mpc5121-gpio) ? + mpc5121_gpio_dir_out : mpc8xxx_gpio_dir_out; + gc-get = of_device_is_compatible(np, fsl,mpc8572-gpio) ? + mpc8572_gpio_get : mpc8xxx_gpio_get; gc-set = mpc8xxx_gpio_set; gc-to_irq = mpc8xxx_gpio_to_irq; -- 1.7.7.1 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH v2 1/5] [ppc] Process dynamic relocations for kernel
On Fri, 2011-11-04 at 14:06 +0530, Suzuki Poulose wrote: On 11/03/11 05:06, Josh Poimboeuf wrote: On Tue, 2011-10-25 at 17:23 +0530, Suzuki K. Poulose wrote: @@ -137,6 +137,9 @@ get_type: lwz r0, 8(r9) /* r_addend */ add r0, r0, r3 /* final addend */ stwxr0, r4, r7 /* memory[r4+r7]) = (u32)r0 */ + dcbst r4,r7 /* flush dcache line to memory */ + sync/* wait for flush to complete */ + icbir4,r7 /* invalidate icache line */ Doing it this way has two drawbacks : 1) Placing it here in relocate would do the flushing for each and every update. I agree. My kernel had around 80,000 relocations, which means 80,000 d-cache line flushes (for a 32k d-cache) and 80,000 i-cache line invalidates (for a 32k i-cache). Which is obviously a little overkill. Although I didn't notice a performance hit during boot. 2) I would like to keep this code as generic as possible for the PPC32 code. Could we move this to the place from relocate is called and flush the d-cache and i-cache entirely ? Why not put the cache flushing code at the end of relocate? Would some of the other PPC32 platforms not require the cache flushing? My PPC32 knowledge is 4xx-centric, so please feel free to rewrite the patch as needed to accommodate other PPC32 cores. Thanks, Josh ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
RE: [PATCH v2 1/5] [ppc] Process dynamic relocations for kernel
On Fri, 2011-11-04 at 14:06 +0530, Suzuki Poulose wrote: On 11/03/11 05:06, Josh Poimboeuf wrote: On Tue, 2011-10-25 at 17:23 +0530, Suzuki K. Poulose wrote: @@ -137,6 +137,9 @@ get_type: lwz r0, 8(r9) /* r_addend */ add r0, r0, r3 /* final addend */ stwxr0, r4, r7 /* memory[r4+r7]) = (u32)r0 */ + dcbst r4,r7 /* flush dcache line to memory */ + sync/* wait for flush to complete */ + icbir4,r7 /* invalidate icache line */ Doing it this way has two drawbacks : 1) Placing it here in relocate would do the flushing for each and every update. I agree. My kernel had around 80,000 relocations, which means 80,000 d-cache line flushes (for a 32k d-cache) and 80,000 i-cache line invalidates (for a 32k i-cache). Which is obviously a little overkill. Although I didn't notice a performance hit during boot. The I-cache invalidates shouldn't be needed, the un-relocated code can't be in the I-cache (on the grounds that executing it would crash the system). A single sync at the end is probably enough as well. I guess it is possible for the cpu to prefetch/preload into the i-cache through the jump into the relocated code? So maybe a full i-cache invalidate right at the end? (or a jump indirect? - which is probably there anyway) The d-cache will need some kind of flush, since the modified lines have to be written out, the only time it generates additional memeory cycles are if there are two (or more) reloations in the same d-cache line. Otherwise the early write-back might help! David ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH 6/7] fsl_pmc: Add API to enable device as wakeup event source
On 11/07/2011 05:22 AM, Zhao Chenhui wrote: On Fri, Nov 04, 2011 at 04:14:25PM -0500, Scott Wood wrote: On 11/04/2011 07:39 AM, Zhao Chenhui wrote: + if (enable !device_may_wakeup(pdev-dev)) + return -EINVAL; + + clk_np = of_parse_phandle(pdev-dev.of_node, clk-handle, 0); + if (!clk_np) + return -EINVAL; + + pmcdr_mask = (u32 *)of_get_property(clk_np, fsl,pmcdr-mask, NULL); + if (!pmcdr_mask) { + ret = -EINVAL; + goto out; + } + + /* clear to enable clock in low power mode */ + if (enable) + clrbits32(pmc_regs-pmcdr, *pmcdr_mask); + else + setbits32(pmc_regs-pmcdr, *pmcdr_mask); We should probably initialize PMCDR to all bits set (or at least all ones we know are valid) -- the default should be not a wakeup source. I think it should be initialized in u-boot. I don't see it. If you mean you think this should be added to U-Boot, I disagree. U-Boot does not use this, and we should not add gratuitous U-Boot dependencies to Linux -- especially in cases where there are existing U-Boots in use for relevant boards, that do not have this. +/** + * pmc_enable_lossless - enable lossless ethernet in low power mode + * @enable: True to enable event generation; false to disable + */ +void pmc_enable_lossless(int enable) +{ + if (enable has_lossless) + setbits32(pmc_regs-pmcsr, PMCSR_LOSSLESS); + else + clrbits32(pmc_regs-pmcsr, PMCSR_LOSSLESS); +} +EXPORT_SYMBOL_GPL(pmc_enable_lossless); +#endif Won't we overwrite this later? -Scott Do you have any idea? Set a flag that the code that enters (deep) sleep can use. Also, rename function to mpc85xx_pmc_set_lossless_ethernet(). -Scott ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH] powerpc: Export PIR data through sysfs
On 11/06/2011 10:47 PM, Ananth N Mavinakayanahalli wrote: The Processor Identification Register (PIR) on powerpc provides information to decode the processor identification tag. Decoding this information platform specfic. Export PIR data via sysfs. (Powerpc manuals state this register is 'optional'. I am not sure though if there are any Linux supported powerpc platforms that don't have it. Code in the kernel referencing PIR isn't under a platform ifdef). Those references are in platform-specific files, under #ifdef CONFIG_SMP, often in areas that would only be executed in the presence of multiple CPUs (e.g. secondary release). The reference in misc_32.S is inside #ifdef CONFIG_KEXEC and is fairly recent -- it may not have been tested on these systems. I don't see PIR (other than in the acronym definition section) in manuals for UP-only cores such as e300, 8xx, and 750. What use does userspace have for this? If you want to return the currently executing CPU (which unless you're pinned could change as soon as the value is read...), why not just return smp_processor_id() or hard_smp_processor_id()? -Scott ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH 2/4] powerpc/time: Use clocksource_register_hz
On Sat, 2011-11-05 at 11:55 +1100, Paul Mackerras wrote: On Thu, Nov 03, 2011 at 09:14:44AM -0400, John Stultz wrote: On Thu, 2011-11-03 at 11:59 +1100, Anton Blanchard wrote: plain text document attachment (clock3) Use clocksource_register_hz which calculates the shift/mult factors for us. Signed-off-by: Anton Blanchard an...@samba.org --- Index: linux-build/arch/powerpc/kernel/time.c === --- linux-build.orig/arch/powerpc/kernel/time.c 2011-11-03 10:19:59.493679032 +1100 +++ linux-build/arch/powerpc/kernel/time.c2011-11-03 10:20:00.965704053 +1100 @@ -86,8 +86,6 @@ static struct clocksource clocksource_rt .rating = 400, .flags= CLOCK_SOURCE_IS_CONTINUOUS, .mask = CLOCKSOURCE_MASK(64), - .shift= 22, - .mult = 0, /* To be filled in */ .read = rtc_read, }; @@ -97,8 +95,6 @@ static struct clocksource clocksource_ti .rating = 400, .flags= CLOCK_SOURCE_IS_CONTINUOUS, .mask = CLOCKSOURCE_MASK(64), - .shift= 22, - .mult = 0, /* To be filled in */ .read = timebase_read, }; So I've held off on ppc conversion to clocksource_register_hz due to the fact that the ppc vdso gettimeofday at least used to make assumptions that shift was 22. Is that no longer the case? It is still the case; specifically, update_vsyscall() in arch/powerpc/kernel/time.c converts a multiplier value to a 'tb_to_xs' multiplier (timebase to xsec conversion factor, where 1 xsec = 2^-20 seconds) using a factor which assumes a shift of 22. The factor needs to be 2^(20 + 64 - shift) / 1e9, so we could accommodate other shift values by changing the line that computes new_tb_to_xs to do new_tb_to_xs = (u64) mult * (19342813113834067ULL shift); assuming the shift value is easily available to update_vsyscall (I assume it would be clock-shift). Ok. That sounds reasonable. clock-shift should be correct there. thanks -john ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH 6/7] fsl_pmc: Add API to enable device as wakeup event source
On 11/04/2011 07:08 PM, Tabi Timur-B04825 wrote: On Fri, Nov 4, 2011 at 7:39 AM, Zhao Chenhui chenhui.z...@freescale.com wrote: + /* clear to enable clock in low power mode */ + if (enable) + clrbits32(pmc_regs-pmcdr, *pmcdr_mask); + else + setbits32(pmc_regs-pmcdr, *pmcdr_mask); You need to use be32_to_cpup() when dereferencing a pointer to a device tree property. Or just use of_property_read_u32(). -Scott ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH] powerpc/p1023: set IRQ[4:6, 11] to high level sensitive for PCIe
On 11/07/2011 02:32 AM, Roy Zang wrote: P1023 external IRQ[4:6, 11] do not pin out, but the interrupts are shared with PCIe controller. The silicon internally ties the interrupts to L, so change the IRQ[4:6,11] to high level sensitive for PCIe. Some extra commentary on why this works would be nice. The manual says: If a PCI Express INTx interrupt is being used, then the PIC must be configured so that external interrupts are level-sensitive (EIVPRn[S] = 1). and In general, these signals should be considered mutually exclusive. If a PCI Express INTx signal is being used, the PIC must be configured so that external interrupts are level sensitive (EIVPRn[S] = 1). If an IRQn signal is being used as edge-triggered (EIVPRn[S] = 0), the system must not allow inbound PCI Express INTx transactions. Note that it is possible to share IRQn and INTx if the external interrupt is level sensitive; however, if an interrupt occurs, the interrupt service routine must poll both the external sources connected to the IRQn input and the PCI Express INTx sources to determine from which path the external interrupt came. In any case, IRQn should be pulled to the negated state as determined by the associated polarity setting in EIVPRn[P]. So it looks like there's some magic whereby the configuration of the MPIC affects how the PCIe feeds the interrupt in. Is there (or will there be) an erratum, or anything in the manual besides not being documented as external interrupts, about these specific interrupts being tied low in silicon or needing to be active high? -Scott ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
RE: fpga driver on custom PPC target platform (P4080) ...
Ah, my compatible attribute was wrong: Compatible = nxp,pca9539; Should have been: Compatible = nxp,pca953x; The tree now seems to bind the i2c gpio drivers properly ... on to the localbus mappings! Ah. In that case the drivers should probably be extended to recognize the first compatible. wildcard compatible strings are a bad idea, unfortunately there are some already in the wild, as you've seen. We should try to phase them out though. -- David Gibson | I'll have my music baroque, and my code david AT gibson.dropbear.id.au| minimalist, thank you. NOT _the_ _other_ | _way_ _around_! http://www.ozlabs.org/~dgibson Thanks, David It did occur that a specific binding should be preferred over a generic one Rob. ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH 4/7] powerpc/85xx: add support to JOG feature using cpufreq interface
On 11/07/2011 04:27 AM, Zhao Chenhui wrote: On Fri, Nov 04, 2011 at 02:42:54PM -0500, Scott Wood wrote: On 11/04/2011 07:36 AM, Zhao Chenhui wrote: + cpufreq_frequency_table_target(policy, + mpc85xx_freqs, + target_freq, + relation, + new); + + freqs.old = policy-cur; + freqs.new = mpc85xx_freqs[new].frequency; + freqs.cpu = policy-cpu; + + mutex_lock(mpc85xx_switch_mutex); + cpufreq_notify_transition(freqs, CPUFREQ_PRECHANGE); + + pr_info(Setting frequency for core %d to %d kHz, \ +PLL ratio is %d/2\n, +policy-cpu, +mpc85xx_freqs[new].frequency, +mpc85xx_freqs[new].index); + + set_pll(mpc85xx_freqs[new].index, policy-cpu); + + cpufreq_notify_transition(freqs, CPUFREQ_POSTCHANGE); + mutex_unlock(mpc85xx_switch_mutex); + + ppc_proc_freq = freqs.new * 1000ul; ppc_proc_freq is global -- can CPUs not have their frequencies adjusted separately? It should be under the lock, if the lock is needed at all. There is only one ppc_proc_freq. no lock. I realize there's only one. I'm asking whether CPUs can have their frequencies set indpendently -- if the answer is no, and this function is not specific to a CPU, my only concern is the lock. Either this function can be called multiple times in parallel, in which case the ppc_proc_freq update should be inside the lock, or it can't, in which case why do we need the lock at all? -Scott ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
RE: fpga driver on custom PPC target platform (P4080) ...
In my continuing saga of dev/tree driver development, I have a problem which might be obvious to those who have more experience in such matters. I'm a bit perplexed on the tree nodes for the localbus/simplebus nodes for my FPGA. CS0 is reserved for booting (from NOR flash as required by our design), CS1 is tied to an FPGA which will always be present. CS2 actually is tied to both of two (optional) fpga's, which have been previously mapped by U-Boot (BRn/ORn configuration). Should I specify a ranges command as follows? This seems somehow wrong, to me, and I'm wondering if there is an alternative representation which would work better in this case. If you recall, the programming control lines are handled on the I2C bus, via a gpio controller. In an ideal world, the optional FPE1 and FPE2 fpgas will have the identical .bts stream, and should support the option to program both simultaneously, or each individually, but I'm at a loss as how to best represent this in the tree. localbus@ffe124000 { compatible = fsl,p4080-elbc, fsl,elbc, simple-bus; reg = 0xf 0xfe124000 0 0x1000; interrupts = 25 2 0 0; interrupt-parent = mpic; #address-cells = 2; #size-cells = 1; /* Local bus region mappings */ ranges = 0 0 0xf 0xe800 0x0800 /* CS0: Boot flash */ 1 0 0xf 0xd000 0x7fff /* CS1: FPGA0 - LIM */ 2 0 0xf 0xd100 0x7fff /* CS2: FPGA1 - FPE1 */ 2 0 0xf 0xd200 0x7fff ; /* CS2: FPGA2 - FPE2 */ flash@0,0 { compatible = cfi-flash; reg = 0 0 0x0800; bank-width = 2; device-width = 2; #size-cells = 1; #address-cells = 1; partition@0 { label = rcw; reg = 0x0 0x2; read-only; }; partition@4 { label = saveenv; reg = 0x4 0x2; }; partition@700 { label = fman-firmware; reg = 0x700 0x2; read-only; }; partition@7f8 { label = u-boot; reg = 0x7f8 0x8; read-only; }; }; lim: fpga@1, { } fpe1: fpga@2, { } fpe2: fpga@2, { } Again, any pointers here would be greatly appreciated ... Cheers, Rob Sciuk ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: Regression: patch hvc_console: display printk messages on console. causing infinite loop with 3.2-rc0 + Xen.
On Mon, Nov 07, 2011 at 05:19:42PM +1100, Stephen Rothwell wrote: Hi Greg, On Wed, 2 Nov 2011 18:30:12 -0700 Greg KH gre...@suse.de wrote: On Wed, Nov 02, 2011 at 12:13:09PM +1100, Stephen Rothwell wrote: On Thu, 27 Oct 2011 07:48:06 +0200 Greg KH gre...@suse.de wrote: On Thu, Oct 27, 2011 at 01:30:08AM -0400, Konrad Rzeszutek Wilk wrote: Hey Miche. The git commit 361162459f62dc0826b82c9690a741a940f457f0: hvc_console: display printk messages on console. is causing an infinite loop when booting Linux under Xen, as so: Ick, not good, thanks for letting us know. Indeed. I am wondering why it was put in a tree and sent to Linus without any Acks or even being replied to by anyone. It appeared in the tty tree between Oct 14 and Oct 25 (while I was unfortunately on vacation). If anyone had tried to boot this on any PowerPC server, it would have been immediately obvious (as it was when I booted Linus' tree last night). And the original author expressed doubts as to his understanding of how it should all work anyway. Just a little more care, please. I would vote for reverting the original and having it resubmitted with corrections at some later date. You are right, I will go do that, sorry for the problems. Ping ... Linus can you please just revert 361162459f62dc0826b82c9690a741a940f457f0 hvc_console: display printk messages on console as it breaks consoles for all PowerPC server machines. Thanks for doing this, I was going to include it in my next pull request after 3.2-rc1 was out, but you are right, it should have gone in sooner. greg k-h ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: fpga driver on custom PPC target platform (P4080) ...
On 11/7/2011 10:09 AM, Robert Sciuk wrote: In my continuing saga of dev/tree driver development, I have a problem which might be obvious to those who have more experience in such matters. I'm a bit perplexed on the tree nodes for the localbus/simplebus nodes for my FPGA. CS0 is reserved for booting (from NOR flash as required by our design), CS1 is tied to an FPGA which will always be present. CS2 actually is tied to both of two (optional) fpga's, which have been previously mapped by U-Boot (BRn/ORn configuration). Should I specify a ranges command as follows? This seems somehow wrong, to me, and I'm wondering if there is an alternative representation which would work better in this case. If you recall, the programming control lines are handled on the I2C bus, via a gpio controller. In an ideal world, the optional FPE1 and FPE2 fpgas will have the identical .bts stream, and should support the option to program both simultaneously, or each individually, but I'm at a loss as how to best represent this in the tree. I would be tempted to add another level of hierarchy as a container for the two FPEs on CS2. localbus@ffe124000 { compatible = fsl,p4080-elbc, fsl,elbc, simple-bus; reg =0xf 0xfe124000 0 0x1000; interrupts =25 2 0 0; interrupt-parent =mpic; #address-cells =2; #size-cells =1; /* Local bus region mappings */ ranges =0 0 0xf 0xe800 0x0800 /* CS0: Boot flash */ 1 0 0xf 0xd000 0x7fff /* CS1: FPGA0 - LIM */ 2 0 0xf 0xd100 0x7fff /* CS2: FPGA1 - FPE1 */ 2 0 0xf 0xd200 0x7fff; /* CS2: FPGA2 - FPE2 */ flash@0,0 { compatible = cfi-flash; reg =0 0 0x0800; bank-width =2; device-width =2; #size-cells =1; #address-cells =1; partition@0 { label = rcw; reg =0x0 0x2; read-only; }; partition@4 { label = saveenv; reg =0x4 0x2; }; partition@700 { label = fman-firmware; reg =0x700 0x2; read-only; }; partition@7f8 { label = u-boot; reg =0x7f8 0x8; read-only; }; }; lim: fpga@1, { } fpe1: fpga@2, { } fpe2: fpga@2, { } Again, any pointers here would be greatly appreciated ... Cheers, Rob Sciuk ___ devicetree-discuss mailing list devicetree-disc...@lists.ozlabs.org https://lists.ozlabs.org/listinfo/devicetree-discuss ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
RE: fpga driver on custom PPC target platform (P4080) ...
-Original Message- From: Mitch Bradley [mailto:w...@firmworks.com] I would be tempted to add another level of hierarchy as a container for the two FPEs on CS2. Hi, Mitch. As a long time Forth(er), I'm somewhat honoured to have this exchange with you 8-) Do you mean something like this localbus@ffe124000 { compatible = fsl,p4080-elbc, fsl,elbc, simple-bus; reg = 0xf 0xfe124000 0 0x1000; interrupts = 25 2 0 0; interrupt-parent = mpic; #address-cells = 2; #size-cells = 1; /* Local bus region mappings */ ranges = 0 0 0xf 0xe800 0x0800 /* CS0: Boot flash */ 1 0 0xf 0xd000 0x7fff /* CS1: FPGA0 - LIM */ 2 0 0xf 0xd100 0x7fff /* CS2: FPGA1 - FPE1 */ 2 1 0xf 0xd200 0x7fff ; /* CS2: FPGA2 - FPE2 */ flash@0,0 { compatible = cfi-flash; reg = 0 0 0x0800; bank-width = 2; device-width = 2; #size-cells = 1; #address-cells = 1; partition@0 { label = rcw; reg = 0x0 0x2; read-only; }; partition@4 { label = saveenv; reg = 0x4 0x2; }; partition@700 { label = fman-firmware; reg = 0x700 0x2; read-only; }; partition@7f8 { label = u-boot; reg = 0x7f8 0x8; read-only; }; }; lim: fpga@1, { } nitro: daughtercard@2 { fpe1: fpga@2,0 { } fpe2: fpga@2,1 { } } } ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: fpga driver on custom PPC target platform (P4080) ...
On 11/07/2011 02:09 PM, Robert Sciuk wrote: In my continuing saga of dev/tree driver development, I have a problem which might be obvious to those who have more experience in such matters. I'm a bit perplexed on the tree nodes for the localbus/simplebus nodes for my FPGA. CS0 is reserved for booting (from NOR flash as required by our design), CS1 is tied to an FPGA which will always be present. CS2 actually is tied to both of two (optional) fpga's, which have been previously mapped by U-Boot (BRn/ORn configuration). Should I specify a ranges command as follows? This seems somehow wrong, to me, and I'm wondering if there is an alternative representation which would work better in this case. If you recall, the programming control lines are handled on the I2C bus, via a gpio controller. In an ideal world, the optional FPE1 and FPE2 fpgas will have the identical .bts stream, and should support the option to program both simultaneously, or each individually, but I'm at a loss as how to best represent this in the tree. If you need to poke an i2c bus to switch access between certain localbus children, you should remove simple-bus from the compatible -- or perhaps do something like: localbus@ffe124000 { compatible = fsl,p4080-elbc, fsl,elbc, simple-bus; ... flash@0,0 { ... }; switched-bank@2,0 { // no simple-bus here compatible = something specific to your board's setup; ranges = 0 0 2 0 0x8000; // reg is here just to make the unit-addres valid reg = 2 0 0; #address-cells = 2; #size-cells = 1; // specify a phandle to the i2c device and any other // relevant details for identifying which knob of the // switch needs to be turned... // replace x/y with appropriate switch ID, and 0 0x8000 // with appropriate portion of the window being used by // each device fpga@x,0 { compatible = ... reg = x 0 0x8000; ... }; fpga@y,0 { compatible = ... reg = y 0 0x8000; ... }; }; }; localbus@ffe124000 { compatible = fsl,p4080-elbc, fsl,elbc, simple-bus; reg = 0xf 0xfe124000 0 0x1000; interrupts = 25 2 0 0; interrupt-parent = mpic; #address-cells = 2; #size-cells = 1; /* Local bus region mappings */ ranges = 0 0 0xf 0xe800 0x0800 /* CS0: Boot flash */ 1 0 0xf 0xd000 0x7fff /* CS1: FPGA0 - LIM */ 2 0 0xf 0xd100 0x7fff /* CS2: FPGA1 - FPE1 */ 2 0 0xf 0xd200 0x7fff ; /* CS2: FPGA2 - FPE2 */ The binding for FSL localbus nodes (Documentation/devicetree/bindings/powerpc/fsl/lbc.txt) says that there is a one-to-one correspondence between ranges entries and chipselects, based on how the eLBC is actually programmed. The details of what is attached come in the subnodes. I don't see how the above mapping is possible with eLBC -- you're splitting CS2 among 0xd100..0xd1007fff and 0xd200..0xd2007fff. Since you have CS1 at 0xd000, alignment restrictions prevent CS2 from covering both of those regions -- unless you've got overlapping mappings, with CS2 being at least 0xd000..0xd3ff, and are relying on CS1 taking priority due to being lower-numbered. I hope you're not doing that, and that these aren't the real addresses (or they can be changed) -- but if you must do this, that breaks the one-to-one model, so you'd need both ranges entries. Also note that the final cell in each ranges entry should be the size, not the size minus one. fpe1: fpga@2, { } fpe2: fpga@2, { This would be fine for a case where the devices are not switched, but rather decode different addresses within the chipselect. E.g. CS3 of arch/powerpc/boot/dts/socrates.dts -Scott ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
RE: fpga driver on custom PPC target platform (P4080) ...
... switched-bank@2,0 { // no simple-bus here compatible = something specific to your board's setup; ranges = 0 0 2 0 0x8000; // reg is here just to make the unit-addres valid reg = 2 0 0; #address-cells = 2; #size-cells = 1; // specify a phandle to the i2c device and any other // relevant details for identifying which knob of the // switch needs to be turned... // replace x/y with appropriate switch ID, and 0 0x8000 // with appropriate portion of the window being used by // each device fpga@x,0 { compatible = ... reg = x 0 0x8000; ... }; fpga@y,0 { compatible = ... reg = y 0 0x8000; ... }; }; }; localbus@ffe124000 { compatible = fsl,p4080-elbc, fsl,elbc, simple- bus; reg = 0xf 0xfe124000 0 0x1000; interrupts = 25 2 0 0; interrupt-parent = mpic; #address-cells = 2; #size-cells = 1; /* Local bus region mappings */ ranges = 0 0 0xf 0xe800 0x0800 /* CS0: Boot flash */ 1 0 0xf 0xd000 0x7fff /* CS1: FPGA0 - LIM */ 2 0 0xf 0xd100 0x7fff /* CS2: FPGA1 - FPE1 */ 2 0 0xf 0xd200 0x7fff ; /* CS2: FPGA2 - FPE2 */ The binding for FSL localbus nodes (Documentation/devicetree/bindings/powerpc/fsl/lbc.txt) says that there is a one-to-one correspondence between ranges entries and chipselects, based on how the eLBC is actually programmed. The details of what is attached come in the subnodes. I don't see how the above mapping is possible with eLBC -- you're splitting CS2 among 0xd100..0xd1007fff and 0xd200..0xd2007fff. Since you have CS1 at 0xd000, alignment restrictions prevent CS2 from covering both of those regions -- unless you've got overlapping mappings, with CS2 being at least 0xd000..0xd3ff, and are relying on CS1 taking priority due to being lower-numbered. I hope you're not doing that, and that these aren't the real addresses (or they can be changed) -- but if you must do this, that breaks the one-to-one model, so you'd need both ranges entries. Also note that the final cell in each ranges entry should be the size, not the size minus one. fpe1: fpga@2, { } fpe2: fpga@2, { This would be fine for a case where the devices are not switched, but rather decode different addresses within the chipselect. E.g. CS3 of arch/powerpc/boot/dts/socrates.dts -Scott Thanks, Scott ... I'm looking both at the localbus docs and the Socrates ... I will stare at both for the requisite time, and re-emerge when I understand the issues. Rob. ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH] powerpc/kvm: annotate kvm_rma_init as __init
kvm_rma_init() is only called at boot-time, by setup_arch, which is also __init. Signed-off-by: Nishanth Aravamudan n...@us.ibm.com diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c index d431203..73fdcc1 100644 --- a/arch/powerpc/kvm/book3s_hv_builtin.c +++ b/arch/powerpc/kvm/book3s_hv_builtin.c @@ -85,7 +85,7 @@ static inline int lpcr_rmls(unsigned long rma_size) * to allocate contiguous physical memory for the real memory * areas for guests. */ -void kvm_rma_init(void) +void __init kvm_rma_init(void) { unsigned long i; unsigned long j, npages; -- Nishanth Aravamudan n...@us.ibm.com IBM Linux Technology Center ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH] powerpc: Remove buggy 9-year-old test for binutils 2.12.1
Recent binutils refuses to assemble AltiVec opcodes when in e500/SPE mode, as some of those opcodes alias the SPE instructions. This triggers an ancient binutils version check even when building a kernel with CONFIG_ALTIVEC disabled. In theory, the check could be conditionalized on CONFIG_ALTIVEC, but in practice it has long outlived its utility. It is virtually impossible to find binutils older than 2.12.1 (released 2002) in the wild anymore. Even ancient RedHat Enterprise Linux 4 has binutils-2.14. To fix the kernel build when done natively on e500 systems with this new binutils, the test is simply removed. Signed-off-by: Kyle Moffett kyle.d.moff...@boeing.com --- arch/powerpc/Makefile |6 -- 1 files changed, 0 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile index 57af16e..70ba0c0 100644 --- a/arch/powerpc/Makefile +++ b/arch/powerpc/Makefile @@ -255,12 +255,6 @@ checkbin: echo 'disable kernel modules' ; \ false ; \ fi - @if ! /bin/echo dssall | $(AS) -many -o $(TOUT) /dev/null 21 ; then \ - echo -n '*** ${VERSION}.${PATCHLEVEL} kernels no longer build ' ; \ - echo 'correctly with old versions of binutils.' ; \ - echo '*** Please upgrade your binutils to 2.12.1 or newer' ; \ - false ; \ - fi CLEAN_FILES += $(TOUT) -- 1.7.2.5 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
RE: fpga driver on custom PPC target platform (P4080) ...
-Original Message- From: Scott Wood [mailto:scottw...@freescale.com] Sent: Monday, November 07, 2011 5:14 PM To: Robert Sciuk Cc: linuxppc-dev@lists.ozlabs.org; devicetree-disc...@lists.ozlabs.org Subject: Re: fpga driver on custom PPC target platform (P4080) ... ... If you need to poke an i2c bus to switch access between certain localbus children, you should remove simple-bus from the compatible -- or perhaps do something like: Actually, the gpio expander on the I2C bus don't select the localbus children, but the programming pins are driven by the i2c bus, and each FPGA has its own gpio expander (PCA9539). ... Ultimately, I will link the i2c address and localbus (8 bit programming port) via phandles, but both of the optional fpga's have their databus tied to the same chip select on the localbus. The intent was to perform simultaneous programming, but I would like the option to do both simultaneous and individual programming ... eg: set either one or both of the FPGAs to programming mode, and then mmap the localbus port from user land (uio driver), and transmit the bit stream. The DONE (success/fail) bit would be returned by the device close ... or at least that's how I dreamed it might work ... again, I will stare at the fsl elbc device tree doc, and I can certainly re-map the addressing (36bit ... it’s a P4080 target). localbus@ffe124000 { compatible = fsl,p4080-elbc, fsl,elbc, simple-bus; ... flash@0,0 { ... }; switched-bank@2,0 { // no simple-bus here compatible = something specific to your board's setup; ranges = 0 0 2 0 0x8000; // reg is here just to make the unit-addres valid reg = 2 0 0; #address-cells = 2; #size-cells = 1; // specify a phandle to the i2c device and any other // relevant details for identifying which knob of the // switch needs to be turned... // replace x/y with appropriate switch ID, and 0 0x8000 // with appropriate portion of the window being used by // each device fpga@x,0 { compatible = ... reg = x 0 0x8000; ... }; fpga@y,0 { compatible = ... reg = y 0 0x8000; ... }; }; }; localbus@ffe124000 { compatible = fsl,p4080-elbc, fsl,elbc, simple- bus; reg = 0xf 0xfe124000 0 0x1000; interrupts = 25 2 0 0; interrupt-parent = mpic; #address-cells = 2; #size-cells = 1; /* Local bus region mappings */ ranges = 0 0 0xf 0xe800 0x0800 /* CS0: Boot flash */ 1 0 0xf 0xd000 0x7fff /* CS1: FPGA0 - LIM */ 2 0 0xf 0xd100 0x7fff /* CS2: FPGA1 - FPE1 */ 2 0 0xf 0xd200 0x7fff ; /* CS2: FPGA2 - FPE2 */ The binding for FSL localbus nodes (Documentation/devicetree/bindings/powerpc/fsl/lbc.txt) says that there is a one-to-one correspondence between ranges entries and chipselects, based on how the eLBC is actually programmed. The details of what is attached come in the subnodes. I don't see how the above mapping is possible with eLBC -- you're splitting CS2 among 0xd100..0xd1007fff and 0xd200..0xd2007fff. Since you have CS1 at 0xd000, alignment restrictions prevent CS2 from covering both of those regions -- unless you've got overlapping mappings, with CS2 being at least 0xd000..0xd3ff, and are relying on CS1 taking priority due to being lower-numbered. I hope you're not doing that, and that these aren't the real addresses (or they can be changed) -- but if you must do this, that breaks the one-to-one model, so you'd need both ranges entries. Also note that the final cell in each ranges entry should be the size, not the size minus one. fpe1: fpga@2, { } fpe2: fpga@2, { This would be fine for a case where the devices are not switched, but rather decode different addresses within the chipselect. E.g. CS3 of arch/powerpc/boot/dts/socrates.dts -Scott ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH] powerpc/kvm: fix build with older toolchains
Fix KVM build for older toolchains (found with .powerpc64-unknown-linux-gnu-gcc (crosstool-NG-1.8.1) 4.3.2): AS arch/powerpc/kvm/book3s_hv_rmhandlers.o arch/powerpc/kvm/book3s_hv_rmhandlers.S: Assembler messages: arch/powerpc/kvm/book3s_hv_rmhandlers.S:1388: Error: Unrecognized opcode: `popcntw' make[1]: *** [arch/powerpc/kvm/book3s_hv_rmhandlers.o] Error 1 make: *** [_module_arch/powerpc/kvm] Error 2 Signed-off-by: Nishanth Aravamudan n...@us.ibm.com diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index 0607909..a053db1 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -1385,7 +1385,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_206) addir6,r5,VCORE_NAPPING_THREADS 31:lwarx r4,0,r6 or r4,r4,r0 - popcntw r7,r4 + PPC_POPCNTW(r7,r4) cmpwr7,r8 bge 2f stwcx. r4,0,r6 -- Nishanth Aravamudan n...@us.ibm.com IBM Linux Technology Center ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH] powerpc: add KVM as module to defconfigs
Add HV mode KVM to Book3 server 64bit defconfigs as a module. Doesn't add much to the size: textdata bss dechex filename 8244109 4686767 994000 13924876 d47a0c vmlinux.vanilla 8256092 4691607 994128 13941827 d4bc43 vmlinux.kvm This should enable more testing of this configuration. Signed-off-by: Michael Neuling mi...@neuling.org --- arch/powerpc/configs/ppc64_defconfig |4 arch/powerpc/configs/pseries_defconfig |4 2 files changed, 8 insertions(+) Index: linux-ozlabs/arch/powerpc/configs/ppc64_defconfig === --- linux-ozlabs.orig/arch/powerpc/configs/ppc64_defconfig +++ linux-ozlabs/arch/powerpc/configs/ppc64_defconfig @@ -485,3 +485,7 @@ CONFIG_CRYPTO_TWOFISH=m CONFIG_CRYPTO_LZO=m # CONFIG_CRYPTO_ANSI_CPRNG is not set # CONFIG_CRYPTO_HW is not set +CONFIG_VIRTUALIZATION=y +CONFIG_KVM_BOOK3S_64=m +CONFIG_KVM_BOOK3S_64_HV=y +CONFIG_VHOST_NET=m Index: linux-ozlabs/arch/powerpc/configs/pseries_defconfig === --- linux-ozlabs.orig/arch/powerpc/configs/pseries_defconfig +++ linux-ozlabs/arch/powerpc/configs/pseries_defconfig @@ -362,3 +362,7 @@ CONFIG_CRYPTO_TWOFISH=m CONFIG_CRYPTO_LZO=m # CONFIG_CRYPTO_ANSI_CPRNG is not set # CONFIG_CRYPTO_HW is not set +CONFIG_VIRTUALIZATION=y +CONFIG_KVM_BOOK3S_64=m +CONFIG_KVM_BOOK3S_64_HV=y +CONFIG_VHOST_NET=m ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[git pull] Please pull powerpc.git merge branch
Hi Linus ! Me again :-) So yesterday you pulled my -next branch with the bulk of what happened since the last merge window up to when I left for my long vacation (5 weeks). Today are a few remaining nits that happened during that time. Essentially small fixes, a defconfig bit, some janitorial stuff (IRQ_DISABLED removal), a board support for 52xx that I forgot to pull from Anatolij, and a fix to properly parse NUMA info from the device-tree on powernc. Ah, also finally add main memory to /proc/iomem. I've resisted that for a long time but too much userspace gunk relies on it so here we go. Cheers, Ben. The following changes since commit 1ea6b8f48918282bdca0b32a34095504ee65bab5: Linux 3.2-rc1 (2011-11-07 16:16:02 -0800) are available in the git repository at: git://git.kernel.org/pub/scm/linux/kernel/git/benh/powerpc.git merge Anton Blanchard (1): powerpc: Add System RAM to /proc/iomem Benjamin Herrenschmidt (1): Merge remote-tracking branch 'agust/next' into merge Dipankar Sarma (1): powerpc/numa: NUMA topology support for PowerNV Geoff Levand (2): powerpc/ps3: Fix PS3 repository build warnings powerpc/ps3: Fix lv1_gpu_attribute hcall Heiko Schocher (2): powerpc/5200: add support for charon board powerpc, tqm5200: update tqm5200_defconfig to fit for charon board. Michael Neuling (1): powerpc: Add KVM as module to defconfigs Nishanth Aravamudan (1): powerpc/kvm: Fix build with older toolchains Yong Zhang (2): powerpc/irq: Remove IRQF_DISABLED powerpc/ps3: irq: Remove IRQF_DISABLED arch/powerpc/boot/dts/charon.dts | 236 arch/powerpc/configs/52xx/tqm5200_defconfig| 20 ++- arch/powerpc/configs/ppc64_defconfig |4 + arch/powerpc/configs/pseries_defconfig |4 + arch/powerpc/include/asm/floppy.h |4 +- arch/powerpc/include/asm/lv1call.h |2 +- arch/powerpc/include/asm/xics.h|4 +- arch/powerpc/kernel/smp.c |2 +- arch/powerpc/kvm/book3s_hv_rmhandlers.S|2 +- arch/powerpc/mm/mem.c | 30 +++ arch/powerpc/mm/numa.c | 24 ++- arch/powerpc/platforms/52xx/mpc5200_simple.c |1 + arch/powerpc/platforms/cell/beat.c |2 +- arch/powerpc/platforms/cell/celleb_scc_pciex.c |2 +- arch/powerpc/platforms/cell/iommu.c|3 +- arch/powerpc/platforms/cell/pmu.c |2 +- arch/powerpc/platforms/cell/spu_base.c |9 +- arch/powerpc/platforms/powermac/pic.c |1 - arch/powerpc/platforms/powermac/smp.c |4 +- arch/powerpc/platforms/ps3/device-init.c |2 +- arch/powerpc/platforms/ps3/repository.c| 32 ++-- arch/powerpc/sysdev/mpic.c |2 - arch/powerpc/sysdev/ppc4xx_soc.c |2 +- arch/powerpc/sysdev/xics/xics-common.c |5 +- drivers/ps3/ps3-vuart.c|2 +- drivers/ps3/ps3stor_lib.c |2 +- sound/ppc/snd_ps3.c|2 +- 27 files changed, 343 insertions(+), 62 deletions(-) create mode 100644 arch/powerpc/boot/dts/charon.dts ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [git pull] Please pull powerpc.git merge branch
On Tue, 2011-11-08 at 15:21 +1100, Benjamin Herrenschmidt wrote: Hi Linus ! Me again :-) So yesterday you pulled my -next branch with the bulk of what happened since the last merge window up to when I left for my long vacation (5 weeks). Today are a few remaining nits that happened during that time. .. Add added a build fix (the defconfig change enables KVM which hits a compile problem with some config combinations) so new pull request bellow. Sorry about that. Cheers, Ben. The following changes since commit 1ea6b8f48918282bdca0b32a34095504ee65bab5: Linux 3.2-rc1 (2011-11-07 16:16:02 -0800) are available in the git repository at: git://git.kernel.org/pub/scm/linux/kernel/git/benh/powerpc.git merge Alexander Graf (1): powerpc/kvm: Fix build failure with HV KVM and CBE Anton Blanchard (1): powerpc: Add System RAM to /proc/iomem Benjamin Herrenschmidt (1): Merge remote-tracking branch 'agust/next' into merge Dipankar Sarma (1): powerpc/numa: NUMA topology support for PowerNV Geoff Levand (2): powerpc/ps3: Fix PS3 repository build warnings powerpc/ps3: Fix lv1_gpu_attribute hcall Heiko Schocher (2): powerpc/5200: add support for charon board powerpc, tqm5200: update tqm5200_defconfig to fit for charon board. Michael Neuling (1): powerpc: Add KVM as module to defconfigs Nishanth Aravamudan (1): powerpc/kvm: Fix build with older toolchains Yong Zhang (2): powerpc/irq: Remove IRQF_DISABLED powerpc/ps3: irq: Remove IRQF_DISABLED arch/powerpc/boot/dts/charon.dts | 236 arch/powerpc/configs/52xx/tqm5200_defconfig| 20 ++- arch/powerpc/configs/ppc64_defconfig |4 + arch/powerpc/configs/pseries_defconfig |4 + arch/powerpc/include/asm/floppy.h |4 +- arch/powerpc/include/asm/lv1call.h |2 +- arch/powerpc/include/asm/xics.h|4 +- arch/powerpc/kernel/exceptions-64s.S |6 +- arch/powerpc/kernel/smp.c |2 +- arch/powerpc/kvm/book3s_hv_rmhandlers.S|2 +- arch/powerpc/mm/mem.c | 30 +++ arch/powerpc/mm/numa.c | 24 ++- arch/powerpc/platforms/52xx/mpc5200_simple.c |1 + arch/powerpc/platforms/cell/beat.c |2 +- arch/powerpc/platforms/cell/celleb_scc_pciex.c |2 +- arch/powerpc/platforms/cell/iommu.c|3 +- arch/powerpc/platforms/cell/pmu.c |2 +- arch/powerpc/platforms/cell/spu_base.c |9 +- arch/powerpc/platforms/powermac/pic.c |1 - arch/powerpc/platforms/powermac/smp.c |4 +- arch/powerpc/platforms/ps3/device-init.c |2 +- arch/powerpc/platforms/ps3/repository.c| 32 ++-- arch/powerpc/sysdev/mpic.c |2 - arch/powerpc/sysdev/ppc4xx_soc.c |2 +- arch/powerpc/sysdev/xics/xics-common.c |5 +- drivers/ps3/ps3-vuart.c|2 +- drivers/ps3/ps3stor_lib.c |2 +- sound/ppc/snd_ps3.c|2 +- 28 files changed, 346 insertions(+), 65 deletions(-) create mode 100644 arch/powerpc/boot/dts/charon.dts ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
RE: [PATCH] powerpc/p1023: set IRQ[4:6, 11] to high level sensitive for PCIe
-Original Message- From: Wood Scott-B07421 Sent: Tuesday, November 08, 2011 2:44 AM To: Zang Roy-R61911 Cc: linuxppc-dev@lists.ozlabs.org Subject: Re: [PATCH] powerpc/p1023: set IRQ[4:6, 11] to high level sensitive for PCIe On 11/07/2011 02:32 AM, Roy Zang wrote: P1023 external IRQ[4:6, 11] do not pin out, but the interrupts are shared with PCIe controller. The silicon internally ties the interrupts to L, so change the IRQ[4:6,11] to high level sensitive for PCIe. Some extra commentary on why this works would be nice. I do not know what kind of extra commentary you request. IRQ 4,5,6, 11 are internally tie to low by silicon. To use these interrupts for PCIe, they need to set high level sensitive. It is clear enough for this patch. The manual says: If a PCI Express INTx interrupt is being used, then the PIC must be configured so that external interrupts are level-sensitive (EIVPRn[S] = 1). That is true for all FSL powerpc silicon with PCIe controller beside P1023. and In general, these signals should be considered mutually exclusive. If a PCI Express INTx signal is being used, the PIC must be configured so that external interrupts are level sensitive (EIVPRn[S] = 1). If an IRQn signal is being used as edge-triggered (EIVPRn[S] = 0), the system must not allow inbound PCI Express INTx transactions. Note that it is possible to share IRQn and INTx if the external interrupt is level sensitive; however, if an interrupt occurs, the interrupt service routine must poll both the external sources connected to the IRQn input and the PCI Express INTx sources to determine from which path the external interrupt came. In any case, IRQn should be pulled to the negated state as determined by the associated polarity setting in EIVPRn[P]. So it looks like there's some magic whereby the configuration of the MPIC affects how the PCIe feeds the interrupt in. Is there (or will there be) an erratum, or anything in the manual besides not being documented as external interrupts, about these specific interrupts being tied low in silicon or needing to be active high? I do not think there is (will) an erratum. But I agree the manual needs to document this more clear. thanks. Roy ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH] powerpc: Export PIR data through sysfs
On Mon, Nov 07, 2011 at 11:18:32AM -0600, Scott Wood wrote: On 11/06/2011 10:47 PM, Ananth N Mavinakayanahalli wrote: The Processor Identification Register (PIR) on powerpc provides information to decode the processor identification tag. Decoding this information platform specfic. Export PIR data via sysfs. (Powerpc manuals state this register is 'optional'. I am not sure though if there are any Linux supported powerpc platforms that don't have it. Code in the kernel referencing PIR isn't under a platform ifdef). Those references are in platform-specific files, under #ifdef CONFIG_SMP, often in areas that would only be executed in the presence of multiple CPUs (e.g. secondary release). The reference in misc_32.S is inside #ifdef CONFIG_KEXEC and is fairly recent -- it may not have been tested on these systems. I don't see PIR (other than in the acronym definition section) in manuals for UP-only cores such as e300, 8xx, and 750. I saw that SPRN_PIR is defined for booke in reg_booke.h but wasn't sure if it is applicable to all platforms. Thanks for the clarification. What use does userspace have for this? If you want to return the currently executing CPU (which unless you're pinned could change as soon as the value is read...), why not just return smp_processor_id() or hard_smp_processor_id()? Its not just the current cpu. Decoding PIR can tell you the core id, thread id in case of SMT, and this information can be used by userspace apps to set affinities, etc. How does the following look? Ananth --- From: Ananth N Mavinakayanahalli ana...@in.ibm.com The Processor Identification Register (PIR) on powerpc provides information to decode the processor identification tag. Decoding this information platform specfic. Export PIR data via sysfs. Signed-off-by: Ananth N Mavinakayanahalli ana...@in.ibm.com --- arch/powerpc/kernel/sysfs.c | 13 + 1 file changed, 13 insertions(+) Index: linux-3.1/arch/powerpc/kernel/sysfs.c === --- linux-3.1.orig/arch/powerpc/kernel/sysfs.c +++ linux-3.1/arch/powerpc/kernel/sysfs.c @@ -330,6 +330,11 @@ static struct sysdev_attribute pa6t_attr #endif /* HAS_PPC_PMC_PA6T */ #endif /* HAS_PPC_PMC_CLASSIC */ +#if defined(CONFIG_SMP) defined(SPRN_PIR) +SYSFS_PMCSETUP(pir, SPRN_PIR); +static SYSDEV_ATTR(pir, 0400, show_pir, NULL); +#endif + static void __cpuinit register_cpu_online(unsigned int cpu) { struct cpu *c = per_cpu(cpu_devices, cpu); @@ -394,6 +399,10 @@ static void __cpuinit register_cpu_onlin sysdev_create_file(s, attr_dscr); #endif /* CONFIG_PPC64 */ +#if defined(CONFIG_SMP) defined(SPRN_PIR) + sysdev_create_file(s, attr_pir); +#endif + cacheinfo_cpu_online(cpu); } @@ -464,6 +473,10 @@ static void unregister_cpu_online(unsign sysdev_remove_file(s, attr_dscr); #endif /* CONFIG_PPC64 */ +#if defined(CONFIG_SMP) defined(SPRN_PIR) + sysdev_remove_file(s, attr_pir); +#endif + cacheinfo_cpu_offline(cpu); } ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
Re: [PATCH v2 1/5] [ppc] Process dynamic relocations for kernel
On 11/07/11 20:43, Josh Poimboeuf wrote: On Fri, 2011-11-04 at 14:06 +0530, Suzuki Poulose wrote: On 11/03/11 05:06, Josh Poimboeuf wrote: On Tue, 2011-10-25 at 17:23 +0530, Suzuki K. Poulose wrote: @@ -137,6 +137,9 @@ get_type: lwz r0, 8(r9) /* r_addend */ add r0, r0, r3 /* final addend */ stwxr0, r4, r7 /* memory[r4+r7]) = (u32)r0 */ + dcbst r4,r7 /* flush dcache line to memory */ + sync/* wait for flush to complete */ + icbir4,r7 /* invalidate icache line */ Doing it this way has two drawbacks : 1) Placing it here in relocate would do the flushing for each and every update. I agree. My kernel had around 80,000 relocations, which means 80,000 d-cache line flushes (for a 32k d-cache) and 80,000 i-cache line invalidates (for a 32k i-cache). Which is obviously a little overkill. Although I didn't notice a performance hit during boot. 2) I would like to keep this code as generic as possible for the PPC32 code. Could we move this to the place from relocate is called and flush the d-cache and i-cache entirely ? Why not put the cache flushing code at the end of relocate? Would some of the other PPC32 platforms not require the cache flushing? What I was suggesting is, instead of flushing the cache in relocate(), lets do it like: for e.g, on 440x, (in head_44x.S :) #ifdef CONFIG_RELOCATABLE ... bl relocate #Flush the d-cache and invalidate the i-cache here #endif This would let the different platforms do the the cache invalidation in their own way. Btw, I didn't find an instruction to flush the entire d-cache in PPC440 manual. We have instructions to flush only a block corresponding to an address. However, we have 'iccci' which would invalidate the entire i-cache which, which I think is better than 80,000 i-cache invalidates. Kumar / Josh, Do you have any suggestions here ? My PPC32 knowledge is 4xx-centric, so please feel free to rewrite the patch as needed to accommodate other PPC32 cores. Same here :) Thanks Suzuki ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH] net: fsl_pq_mdio: fix oops when using uninitialized mutex
The get_phy_id() routine (called via fsl_pq_mdio_find_free()) tries to acquire the mdio_lock mutex which is only initialized when of_mdiobus_register() gets called later. This causes the following oops: Unable to handle kernel paging request for data at address 0x Faulting instruction address: 0xc02eda74 Oops: Kernel access of bad area, sig: 11 [#1] P1020 RDB NIP: c02eda74 LR: c01b3aa4 CTR: 0007 REGS: cf039d70 TRAP: 0300 Not tainted (3.2.0-rc1-4-gdc9d867-dirty) MSR: 00029000 EE,ME,CE CR: 24024028 XER: DEAR: , ESR: 0080 TASK = cf034000[1] 'swapper' THREAD: cf038000 GPR00: cf039e28 cf039e20 cf034000 cf368228 0020 0002 ffeb02ad 00d0 GPR08: 1083 d108 cf039e90 100ae780 GPR16: c900 0012 0fff 00ffa000 0015 0001 c047 GPR24: c03b4e89 d1072030 cf034000 0020 cf36822c cf368228 NIP [c02eda74] __mutex_lock_slowpath+0x30/0xb0 LR [c01b3aa4] mdiobus_read+0x38/0x68 Call Trace: [cf039e20] [ffeb] 0xffeb (unreliable) [cf039e50] [c01b3aa4] mdiobus_read+0x38/0x68 [cf039e70] [c01b2af0] get_phy_id+0x24/0x70 [cf039e90] [c01b4128] fsl_pq_mdio_probe+0x364/0x414 [cf039ec0] [c0195050] platform_drv_probe+0x20/0x30 [cf039ed0] [c0193a70] driver_probe_device+0xc8/0x170 [cf039ef0] [c0193b88] __driver_attach+0x70/0x98 [cf039f10] [c019294c] bus_for_each_dev+0x60/0x90 [cf039f40] [c0193cc8] driver_attach+0x24/0x34 [cf039f50] [c0192f88] bus_add_driver+0xbc/0x230 [cf039f70] [c0194594] driver_register+0xb8/0x13c [cf039f90] [c0195b40] platform_driver_register+0x6c/0x7c [cf039fa0] [c03e433c] fsl_pq_mdio_init+0x18/0x28 [cf039fb0] [c03ce824] do_one_initcall+0xdc/0x1b4 [cf039fe0] [c03ce984] kernel_init+0x88/0x118 [cf039ff0] [c000bd5c] kernel_thread+0x4c/0x68 Instruction dump: 9421ffd0 7c0802a6 81230008 bf61001c 3bc30004 7c7f1b78 90010034 38010008 7c5c1378 90030008 93c10008 9121000c 3800 90410010 7d201828 Fix this by moving the of_mdiobus_register() call earlier. Cc: Andy Fleming aflem...@freescale.com Signed-off-by: Baruch Siach bar...@tkos.co.il --- drivers/net/ethernet/freescale/fsl_pq_mdio.c | 14 +++--- 1 files changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/freescale/fsl_pq_mdio.c b/drivers/net/ethernet/freescale/fsl_pq_mdio.c index 52f4e8a..e17fd2f 100644 --- a/drivers/net/ethernet/freescale/fsl_pq_mdio.c +++ b/drivers/net/ethernet/freescale/fsl_pq_mdio.c @@ -385,6 +385,13 @@ static int fsl_pq_mdio_probe(struct platform_device *ofdev) tbiaddr = *prop; } + err = of_mdiobus_register(new_bus, np); + if (err) { + printk (KERN_ERR %s: Cannot register as MDIO bus\n, + new_bus-name); + goto err_free_irqs; + } + if (tbiaddr == -1) { out_be32(tbipa, 0); @@ -403,13 +410,6 @@ static int fsl_pq_mdio_probe(struct platform_device *ofdev) out_be32(tbipa, tbiaddr); - err = of_mdiobus_register(new_bus, np); - if (err) { - printk (KERN_ERR %s: Cannot register as MDIO bus\n, - new_bus-name); - goto err_free_irqs; - } - return 0; err_free_irqs: -- 1.7.7.1 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v4] powerpc: 85xx: separate e500 from e500mc
CONFIG_E500MC breaks e500/e500v2 systems. It defines L1_CACHE_SHIFT to 6, thus breaking clear_pages(), probably others too. This patch adds a new Processor Type entry for e500mc, and makes e500 systems depend on PPC_E500_V1_V2. Cc: Kumar Gala ga...@kernel.crashing.org Signed-off-by: Baruch Siach bar...@tkos.co.il --- Changes from v3: * Rebase on 3.2-rc1 (add P3060_QDS) Changes from v2: * s/CONFIG_PPC_E500/CONFIG_PPC_E500_V1_V2/ as suggested by Scott Wood Changes from v1: * Rebase on 3.1-rc1 * Remove the list of processor families from the PPC_E500 and PPC_E500MC options description. The P20xx can be either e500v2 or e500mc. arch/powerpc/platforms/85xx/Kconfig| 14 +- arch/powerpc/platforms/Kconfig.cputype | 27 +++ 2 files changed, 24 insertions(+), 17 deletions(-) diff --git a/arch/powerpc/platforms/85xx/Kconfig b/arch/powerpc/platforms/85xx/Kconfig index 45023e2..cfa2bc9 100644 --- a/arch/powerpc/platforms/85xx/Kconfig +++ b/arch/powerpc/platforms/85xx/Kconfig @@ -13,6 +13,8 @@ if FSL_SOC_BOOKE if PPC32 +if PPC_E500_V1_V2 + config MPC8540_ADS bool Freescale MPC8540 ADS select DEFAULT_UIMAGE @@ -171,10 +173,13 @@ config SBC8560 help This option enables support for the Wind River SBC8560 board +endif # PPC_E500_V1_V2 + +if PPC_E500MC + config P2041_RDB bool Freescale P2041 RDB select DEFAULT_UIMAGE - select PPC_E500MC select PHYS_64BIT select SWIOTLB select ARCH_REQUIRE_GPIOLIB @@ -187,7 +192,6 @@ config P2041_RDB config P3041_DS bool Freescale P3041 DS select DEFAULT_UIMAGE - select PPC_E500MC select PHYS_64BIT select SWIOTLB select ARCH_REQUIRE_GPIOLIB @@ -200,7 +204,6 @@ config P3041_DS config P3060_QDS bool Freescale P3060 QDS select DEFAULT_UIMAGE - select PPC_E500MC select PHYS_64BIT select SWIOTLB select MPC8xxx_GPIO @@ -212,7 +215,6 @@ config P3060_QDS config P4080_DS bool Freescale P4080 DS select DEFAULT_UIMAGE - select PPC_E500MC select PHYS_64BIT select SWIOTLB select ARCH_REQUIRE_GPIOLIB @@ -222,13 +224,15 @@ config P4080_DS help This option enables support for the P4080 DS board +endif # PPC_E500MC + endif # PPC32 config P5020_DS bool Freescale P5020 DS + depends on PPC_E500MC select DEFAULT_UIMAGE select E500 - select PPC_E500MC select PHYS_64BIT select SWIOTLB select ARCH_REQUIRE_GPIOLIB diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype index fbecae0..e08d1e3 100644 --- a/arch/powerpc/platforms/Kconfig.cputype +++ b/arch/powerpc/platforms/Kconfig.cputype @@ -10,13 +10,13 @@ choice prompt Processor Type depends on PPC32 help - There are five families of 32 bit PowerPC chips supported. + There are six families of 32 bit PowerPC chips supported. The most common ones are the desktop and server CPUs (601, 603, 604, 740, 750, 74xx) CPUs from Freescale and IBM, with their embedded 512x/52xx/82xx/83xx/86xx counterparts. - The other embeeded parts, namely 4xx, 8xx, e200 (55xx) and e500 - (85xx) each form a family of their own that is not compatible - with the others. + The other embedded parts, namely 4xx, 8xx, e200 (55xx), e500 + (85xx), and e500mc each form a family of their own that is not + compatible with the others. If unsure, select 52xx/6xx/7xx/74xx/82xx/83xx/86xx. @@ -24,10 +24,15 @@ config PPC_BOOK3S_32 bool 512x/52xx/6xx/7xx/74xx/82xx/83xx/86xx select PPC_FPU -config PPC_85xx - bool Freescale 85xx +config PPC_E500_V1_V2 + bool Freescale e500v1/e500v2 + select PPC_85xx select E500 +config PPC_E500MC + bool Freescale e500mc/e5500 + select PPC_85xx + config PPC_8xx bool Freescale 8xx select FSL_SOC @@ -129,15 +134,13 @@ config TUNE_CELL config 8xx bool -config E500 +config PPC_85xx + bool select FSL_EMB_PERFMON select PPC_FSL_BOOK3E - bool -config PPC_E500MC - bool e500mc Support - select PPC_FPU - depends on E500 +config E500 + bool config PPC_FPU bool -- 1.7.7.1 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev