Hello community,

here is the log from the commit of package libatlas3 for openSUSE:Factory 
checked in at 2015-08-11 08:26:22
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Comparing /work/SRC/openSUSE:Factory/libatlas3 (Old)
 and      /work/SRC/openSUSE:Factory/.libatlas3.new (New)
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

Package is "libatlas3"

Changes:
--------
--- /work/SRC/openSUSE:Factory/libatlas3/libatlas3.changes      2015-06-09 
12:25:13.000000000 +0200
+++ /work/SRC/openSUSE:Factory/.libatlas3.new/libatlas3.changes 2015-08-11 
08:26:23.000000000 +0200
@@ -1,0 +2,40 @@
+Sun Aug  9 13:01:20 UTC 2015 - p.drou...@gmail.com
+
+- Update to version 3.10.2
+   * Fixed all errataed bugs:
+     + Failure to init workspace can cause NaNs in SYRK
+     + Complex row-major Q-type factorizations produce bad TAU
+     + Failure to cast causes integer overflow on 64-byt platforms
+     + Missing IBM S390 assembly file
+   * Fixed Make.bin to have threaded latime built to do parallel cache flushing
+   * Extended extract string lengths as patched by SAGE folks
+   * Backported fixes & some arch support to configure framework, including
+     host of Itanium and UST1 stuff provided by SAGE folks
+     NOTE: 3.10.2 is terribly out of date, and was released only because the
+     threading rewrite it taking too long.  If possible, you should use a
+     developer release after testing that it works for your particular
+     platform.  In particular, developer releases are *much* faster for any
+     x86 that uses AVX or later SIMD ISA, or any machine with ncores >= 8.
+     The developer release also supports ARM architectures better (though
+     performance is not hugely better if you can get stable installed).
+
+-------------------------------------------------------------------
+Wed Aug  5 13:05:41 UTC 2015 - norm...@linux.vnet.ibm.com
+
+- For ppc64/ppc64le architectures:
+  Add support of Power8 cpu
+  Do not support lvx files for ppc64le (temporarily)
+  In spec create power8 archives files if do not exist yet
+  POWER864VSX   from POWER764VSX and
+  POWER864LEVSX from POWER764LEVSX
+  removed patch:
+    xlf.command.not.found.patch
+    libatlas.ppc64le-abiv2.patch
+  new patches:
+    issue_64.patch
+    atlas.3.10.1-ppc64le_abiv2.patch
+    atlas-new_archdef_for_ppc64le.patch
+    atlas.3.10.1-add_power8_cpu.patch
+    atlas.3.10.2-ppc64le_do_not_use_files_with_lvx.patch
+
+-------------------------------------------------------------------

Old:
----
  atlas3.10.1.tar.bz2
  libatlas.ppc64le-abiv2.patch
  xlf.command.not.found.patch

New:
----
  atlas-new_archdef_for_ppc64le.patch
  atlas.3.10.1-add_power8_cpu.patch
  atlas.3.10.1-ppc64le_abiv2.patch
  atlas.3.10.2-ppc64le_do_not_use_files_with_lvx.patch
  atlas3.10.2.tar.bz2
  issue_64.patch

++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

Other differences:
------------------
++++++ libatlas3.spec ++++++
--- /var/tmp/diff_new_pack.sslbli/_old  2015-08-11 08:26:24.000000000 +0200
+++ /var/tmp/diff_new_pack.sslbli/_new  2015-08-11 08:26:24.000000000 +0200
@@ -19,7 +19,7 @@
 %define enable_native_atlas 0
 
 Name:           libatlas3
-Version:        3.10.1
+Version:        3.10.2
 Release:        0
 Summary:        Automatically Tuned Linear Algebra Software
 License:        BSD-3-Clause and GPL-2.0
@@ -33,12 +33,16 @@
 Source5:        %name-rpmlintrc
 Patch0:         atlas-suse-shared.patch
 Patch1:         atlas-hack.patch
-# for ppc64le
-# http://sourceforge.net/p/math-atlas/mailman/message/32471499/
+# for ppc64 ppc64le
+# https://bugzilla.redhat.com/show_bug.cgi?id=1080073#c40
 Patch10:        getdoublearr.stripwhite.patch
-Patch11:        xlf.command.not.found.patch
+Patch11:        issue_64.patch
 Patch12:        initialize_malloc_memory.invtrsm.wms.oct23.patch 
-Patch13:        libatlas.ppc64le-abiv2.patch 
+Patch13:        atlas.3.10.1-ppc64le_abiv2.patch
+Patch14:        atlas-new_archdef_for_ppc64le.patch
+Patch15:        atlas.3.10.1-add_power8_cpu.patch
+# for ppc64le tempo patch
+Patch16:        atlas.3.10.2-ppc64le_do_not_use_files_with_lvx.patch
 
 BuildRoot:      %{_tmppath}/%{name}-%{version}-build
 BuildRequires:  gcc-fortran
@@ -194,16 +198,51 @@
 %ifarch x86_64 i586
 %patch1
 %endif
-%ifarch ppc64le
+%ifarch ppc64le ppc64
 %patch10 -p1
 %patch11 -p1
 %patch12 -p1
 %patch13 -p1
+%patch14 -p1
+%patch15 -p1
+%endif
+%ifarch ppc64le
+%patch16 -p1
 %endif
 cp %{SOURCE2} doc
 
 cp %{SOURCE3} %{SOURCE4} CONFIG/ARCHS/
 
+# if Power8 archdef do not exist yet
+# then use the Power7 one that may be the same.
+# do that for BE and LE:
+%ifarch ppc64 ppc64le
+P8archdef='POWER864VSX.tar.bz2'
+P7archdef='POWER764VSX.tar.bz2'
+if [  ! -e CONFIG/ARCHS/${P8archdef} ]; then
+    cp  CONFIG/ARCHS/${P7archdef} /tmp/
+    pushd /tmp
+    tar -xjf ${P7archdef}
+    rm -rf POWER864VSX
+    mv POWER764VSX POWER864VSX
+    tar -cjf ${P8archdef} POWER864VSX
+    popd
+    mv /tmp/${P8archdef}  CONFIG/ARCHS/
+fi
+P8archdef='POWER864LEVSX.tar.bz2'
+P7archdef='POWER764LEVSX.tar.bz2'
+if [  ! -e CONFIG/ARCHS/${P8archdef} ]; then
+    cp  CONFIG/ARCHS/${P7archdef} /tmp/
+    pushd /tmp
+    tar -xjf ${P7archdef}
+    rm -rf POWER864LEVSX
+    mv POWER764LEVSX POWER864LEVSX
+    tar -cjf ${P8archdef} POWER864LEVSX
+    popd
+    mv /tmp/${P8archdef}  CONFIG/ARCHS/
+fi
+%endif
+
 %build
 for type in %{types}; do
        if [ "$type" = "base" ]; then
@@ -239,12 +278,6 @@
        sed -i 's#-m64#-m32#g' Make.inc
 %endif
 
-# use the provided archdef file for ppc64le
-# and force its usage in INSTFLAGS.
-%ifarch ppc64le
-       sed -i 's#\(ARCH = POWER.64\)VSX#\1LEVSX#' Make.inc
-       sed -i 's#\(INSTFLAGS =.*\) -a 0#\1 -a 1#' Make.inc
-%endif
        make build %{?_smp_mflags}
        cd lib
        make shared %{?_smp_mflags}

++++++ atlas-new_archdef_for_ppc64le.patch ++++++
Subject: atlas new archdef for ppc64le
From: Michel Normand <norm...@linux.vnet.ibm.com>
Date: Sun, 13 Jun 2014 18:02:47 +0200

Need to define different archdef names
for ppc64 (that is Big Endian) and ppc64le (that is Little Endian).
This is already done upstream in atlas 3.11.30 with issue
https://sourceforge.net/p/math-atlas/patches/66/

Required at least as long as I need the bypass of
atlas.3.10.2-ppc64le_do_not_use_files_with_lvx.patch

Signed-off-by: Michel Normand <norm...@linux.vnet.ibm.com>
---
 CONFIG/src/SpewMakeInc.c |    4 ++++
 1 file changed, 4 insertions(+)

Index: ATLAS/CONFIG/src/SpewMakeInc.c
===================================================================
--- ATLAS.orig/CONFIG/src/SpewMakeInc.c
+++ ATLAS/CONFIG/src/SpewMakeInc.c
@@ -542,6 +542,10 @@ int main(int nargs, char **args)
    fprintf(fpout, "#  -------------------------------------------------\n");
    fprintf(fpout, "   ARCH = %s", machnam[mach]);
    fprintf(fpout, "%d", ptrbits);
+   /* for ppc64le archi add 'LE' characters */
+   #if defined(__powerpc64__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
+      fprintf(fpout, "%s", "LE");
+   #endif
    if (ISAX)
       fprintf(fpout, "%s", ISAXNAM[ISAX]);
    if (!USEIEEE)
++++++ atlas.3.10.1-add_power8_cpu.patch ++++++
From: Michel Normand <norm...@linux.vnet.ibm.com>
Subject: atlas.3.10.1 add power8 cpu
Date: Thu, 18 Sep 2014 15:13:24 +0200

atlas.3.10.1 add Power8 cpu
tracked upstream by issue 67
https://sourceforge.net/p/math-atlas/patches/67/

Signed-off-by: Michel Normand <norm...@linux.vnet.ibm.com>
---
 CONFIG/ARCHS/Make.ext               |    7 +++++++
 CONFIG/include/atlconf.h            |    6 +++---
 CONFIG/src/atlcomp.txt              |    6 ++++++
 CONFIG/src/backend/archinfo_aix.c   |    2 ++
 CONFIG/src/backend/archinfo_linux.c |    1 +
 include/atlas_pca.h                 |    2 +-
 6 files changed, 20 insertions(+), 4 deletions(-)

Index: ATLAS/CONFIG/ARCHS/Make.ext
===================================================================
--- ATLAS.orig/CONFIG/ARCHS/Make.ext
+++ ATLAS/CONFIG/ARCHS/Make.ext
@@ -33,6 +33,7 @@ files = AMD64K10h32SSE3.tar.bz2 AMD64K10
         MIPSR1xK64.tar.bz2 Makefile P432SSE2.tar.bz2 P4E32SSE3.tar.bz2 \
         P4E64SSE3.tar.bz2 PIII32SSE1.tar.bz2 POWER432.tar.bz2 \
         POWER464.tar.bz2 POWER564.tar.bz2 POWER764VSX.tar.bz2 \
+        POWER864VSX.tar.bz2 \
         PPCG432AltiVec.tar.bz2 PPCG532AltiVec.tar.bz2 PPCG564AltiVec.tar.bz2 \
         PPRO32.tar.bz2 USIII32.tar.bz2 USIII64.tar.bz2 USIV32.tar.bz2 \
         USIV64.tar.bz2 UST232.tar.bz2 UST264.tar.bz2 atlas_test1.1.3.tar.bz2 \
@@ -302,6 +303,12 @@ POWER764VSX.tar.bz2 : $(basdr)/POWER764V
            /tmp/POWER764VSX.tar POWER764VSX
        bzip2 /tmp/POWER764VSX.tar
        mv /tmp/POWER764VSX.tar.bz2 ./.
+POWER864VSX.tar.bz2 : $(basdr)/POWER864VSX
+       - rm -f /tmp/POWER864VSX.tar /tmp/POWER864VSX.tar.bz2
+       cd $(basdr) ; tar --dereference --exclude 'CVS' -c -f \
+           /tmp/POWER864VSX.tar POWER864VSX
+       bzip2 /tmp/POWER864VSX.tar
+       mv /tmp/POWER864VSX.tar.bz2 ./.
 IBMz1032.tar.bz2 : $(basdr)/IBMz1032
        - rm -f /tmp/IBMz1032.tar /tmp/IBMz1032.tar.bz2
        cd $(basdr) ; tar --dereference --exclude 'CVS' -c -f \
Index: ATLAS/CONFIG/include/atlconf.h
===================================================================
--- ATLAS.orig/CONFIG/include/atlconf.h
+++ ATLAS/CONFIG/include/atlconf.h
@@ -18,10 +18,10 @@ enum OSTYPE {OSOther=0, OSLinux, OSSunOS
 enum ARCHFAM {AFOther=0, AFPPC, AFSPARC, AFALPHA, AFX86, AFIA64, AFMIPS,
               AFARM, AFS390};
 
-#define NMACH 47
+#define NMACH 48
 static char *machnam[NMACH] =
    {"UNKNOWN", "POWER3", "POWER4", "POWER5", "PPCG4", "PPCG5",
-    "POWER6", "POWER7", "IBMz9", "IBMz10", "IBMz196",
+    "POWER6", "POWER7", "POWER8", "IBMz9", "IBMz10", "IBMz196",
     "x86x87", "x86SSE1", "x86SSE2", "x86SSE3",
     "P5", "P5MMX", "PPRO", "PII", "PIII", "PM", "CoreSolo",
     "CoreDuo", "Core2Solo", "Core2", "Corei1", "Corei2", "Atom", "P4", "P4E",
@@ -30,7 +30,7 @@ static char *machnam[NMACH] =
     "USI", "USII", "USIII", "USIV", "UST2", "UnknownUS",
     "MIPSR1xK", "MIPSICE9", "ARMv7"};
 enum MACHTYPE {MACHOther, IbmPwr3, IbmPwr4, IbmPwr5, PPCG4, PPCG5,
-               IbmPwr6, IbmPwr7,
+               IbmPwr6, IbmPwr7, IbmPwr8,
                IbmZ9, IbmZ10, IbmZ196,  /* s390(x) in Linux */
                x86x87, x86SSE1, x86SSE2, x86SSE3, /* generic targets */
                IntP5, IntP5MMX, IntPPRO, IntPII, IntPIII, IntPM, IntCoreS,
Index: ATLAS/CONFIG/src/atlcomp.txt
===================================================================
--- ATLAS.orig/CONFIG/src/atlcomp.txt
+++ ATLAS/CONFIG/src/atlcomp.txt
@@ -186,6 +186,10 @@ MACH=PPCG5 OS=ALL LVL=1000 COMPS=dmc,icc
    'gcc' '-mpowerpc64 -maltivec -mabi=altivec -mcpu=970 -mtune=970 -O2'
 MACH=PPCG5 OS=ALL LVL=1000 COMPS=skc
    'gcc' '-mpowerpc64 -maltivec -mabi=altivec -mcpu=970 -mtune=970 -O2 
-mvrsave'
+MACH=POWER8 OS=ALL LVL=1010 COMPS=icc,smc,dmc,skc,dkc,xcc,gcc
+   'gcc' '-O2 -mvsx -mcpu=power8 -mtune=power8 -m64 -mvrsave 
-funroll-all-loops'
+MACH=POWER8 OS=ALL LVL=1010 COMPS=f77
+   'gfortran' '-O2 -mvsx -mcpu=power8 -mtune=power8 -m64 -mvrsave 
-funroll-all-loops'
 MACH=POWER7 OS=ALL LVL=1010 COMPS=icc,smc,dmc,skc,dkc,xcc,gcc
    'gcc' '-O2 -mvsx -mcpu=power7 -mtune=power7 -m64 -mvrsave 
-funroll-all-loops'
 MACH=POWER7 OS=ALL LVL=1010 COMPS=f77
@@ -206,6 +210,8 @@ MACH=POWER4 OS=ALL LVL=1010 COMPS=icc,dm
    'gcc' '-mcpu=power4 -mtune=power4 -O3 -fno-schedule-insns 
-fno-rerun-loop-opt'
 MACH=POWER4 OS=ALL LVL=1010 COMPS=f77
    'xlf' '-qtune=pwr4 -qarch=pwr4 -O3 -qmaxmem=-1 -qfloat=hsflt'
+MACH=POWER8 OS=ALL LVL=1010 COMPS=f77
+   'xlf' '-qtune=pwr8 -qarch=pwr8 -O3 -qmaxmem=-1 -qfloat=hsflt'
 #
 # IBM System z or zEnterprise.
 # These compiler flags given by IBM; -O3 -funroll-loops are chosen because
Index: ATLAS/CONFIG/src/backend/archinfo_linux.c
===================================================================
--- ATLAS.orig/CONFIG/src/backend/archinfo_linux.c
+++ ATLAS/CONFIG/src/backend/archinfo_linux.c
@@ -77,6 +77,7 @@ enum MACHTYPE ProbeArch()
          else if (strstr(res, "7455")) mach = PPCG4;
          else if (strstr(res, "PPC970FX")) mach = PPCG5;
          else if (strstr(res, "PPC970MP")) mach = PPCG5;
+         else if (strstr(res, "POWER8")) mach = IbmPwr8;
          else if (strstr(res, "POWER7")) mach = IbmPwr7;
          else if (strstr(res, "POWER6")) mach = IbmPwr6;
          else if (strstr(res, "POWER5")) mach = IbmPwr5;
Index: ATLAS/include/atlas_pca.h
===================================================================
--- ATLAS.orig/include/atlas_pca.h
+++ ATLAS/include/atlas_pca.h
@@ -26,7 +26,7 @@
    #endif
 #elif defined(ATL_ARCH_POWER3) || defined(ATL_ARCH_POWER4) || \
       defined(ATL_ARCH_POWER5) || defined(ATL_ARCH_POWER6) || \
-      defined(ATL_ARCH_POWER7)
+      defined(ATL_ARCH_POWER7) || defined(ATL_ARCH_POWER8)
    #ifdef __GNUC__
       #define ATL_membarrier __asm__ __volatile__ ("dcs")
 /*      #define ATL_USEPCA 1 */
Index: ATLAS/CONFIG/src/backend/archinfo_aix.c
===================================================================
--- ATLAS.orig/CONFIG/src/backend/archinfo_aix.c
+++ ATLAS/CONFIG/src/backend/archinfo_aix.c
@@ -67,6 +67,8 @@ enum MACHTYPE ProbeArch()
       {
          if (strstr(res, "PowerPC_POWER5"))
             mach = IbmPwr5;
+         else if (strstr(res, "PowerPC_POWER8"))
+            mach = IbmPwr8;
          else if (strstr(res, "PowerPC_POWER7"))
             mach = IbmPwr7;
          else if (strstr(res, "PowerPC_POWER6"))
++++++ atlas.3.10.1-ppc64le_abiv2.patch ++++++
From: Michel Normand <norm...@linux.vnet.ibm.com>
Subject: atlas.ppc64le abiv2
Date: Mon, 14 Apr 2014 18:03:06 +0200
References: http://sourceforge.net/p/math-atlas/mailman/message/32471499/

atlas.ppc64le abiv2
* do not use opd section for ABI V2
* define TOC in r2 in function call
  TODO: may be not required everywhere.
based on work of Guy and Thierry

TODO: still have to work on stack FSIZE

TODO: for ATLAS/tune/blas/gemm/CASES/ATL_dmm4x4x80_ppc.c
    need to better understand the change about ld pC0


Signed-off-by: Michel Normand <norm...@linux.vnet.ibm.com>
---
 CONFIG/src/backend/probe_AltiVec.S       |    2 +-
 CONFIG/src/backend/probe_VSX.S           |    2 +-
 src/threads/ATL_DecAtomicCount_ppc.S     |    2 +-
 src/threads/ATL_ResetAtomicCount_ppc.S   |    2 +-
 tune/blas/gemm/CASES/ATL_cmm4x4x128_av.c |    9 ++++++++-
 tune/blas/gemm/CASES/ATL_dmm4x4x2pf_av.c |    9 ++++++++-
 tune/blas/gemm/CASES/ATL_dmm4x4x32_ppc.c |    9 ++++++++-
 tune/blas/gemm/CASES/ATL_dmm4x4x80_ppc.c |   20 ++++++++++++++++++--
 tune/blas/gemm/CASES/ATL_smm4x4x128_av.c |   23 ++++++++++++++++++++++-
 9 files changed, 68 insertions(+), 10 deletions(-)

Index: ATLAS/CONFIG/src/backend/probe_AltiVec.S
===================================================================
--- ATLAS.orig/CONFIG/src/backend/probe_AltiVec.S
+++ ATLAS/CONFIG/src/backend/probe_AltiVec.S
@@ -6,7 +6,7 @@
  *
  */
 .text
-#if defined(ATL_USE64BITS) && defined (ATL_OS_Linux)
+#if defined(ATL_USE64BITS) && defined (ATL_OS_Linux) && _CALL_ELF != 2
         .align 2
         .globl  ATL_asmdecor(do_vsum)
         .section        ".opd","aw"
Index: ATLAS/CONFIG/src/backend/probe_VSX.S
===================================================================
--- ATLAS.orig/CONFIG/src/backend/probe_VSX.S
+++ ATLAS/CONFIG/src/backend/probe_VSX.S
@@ -6,7 +6,7 @@
  *
  */
 .text
-#if defined(ATL_USE64BITS) && defined (ATL_OS_Linux)
+#if defined(ATL_USE64BITS) && defined (ATL_OS_Linux) && _CALL_ELF != 2
         .align 2
         .globl  ATL_asmdecor(do_vsum)
         .section        ".opd","aw"
Index: ATLAS/src/threads/ATL_DecAtomicCount_ppc.S
===================================================================
--- ATLAS.orig/src/threads/ATL_DecAtomicCount_ppc.S
+++ ATLAS/src/threads/ATL_DecAtomicCount_ppc.S
@@ -4,7 +4,7 @@
    .globl _ATL_DecAtomicCount
    _ATL_DecAtomicCount:
 #else
-   #if defined(ATL_USE64BITS)
+   #if defined(ATL_USE64BITS) && _CALL_ELF != 2
 /*
  *      Official Program Descripter section, seg fault w/o it on Linux/PPC64
  */
Index: ATLAS/src/threads/ATL_ResetAtomicCount_ppc.S
===================================================================
--- ATLAS.orig/src/threads/ATL_ResetAtomicCount_ppc.S
+++ ATLAS/src/threads/ATL_ResetAtomicCount_ppc.S
@@ -4,7 +4,7 @@
    .globl _ATL_ResetAtomicCount
    _ATL_ResetAtomicCount:
 #else
-   #if defined(ATL_USE64BITS)
+   #if defined(ATL_USE64BITS) && _CALL_ELF != 2
 /*
  *      Official Program Descripter section, seg fault w/o it on Linux/PPC64
  */
Index: ATLAS/tune/blas/gemm/CASES/ATL_cmm4x4x128_av.c
===================================================================
--- ATLAS.orig/tune/blas/gemm/CASES/ATL_cmm4x4x128_av.c
+++ ATLAS/tune/blas/gemm/CASES/ATL_cmm4x4x128_av.c
@@ -181,7 +181,7 @@ void ATL_USERMM(const int M, const int N
        .globl  Mjoin(_,ATL_USERMM)
 Mjoin(_,ATL_USERMM):
 #else
-   #if defined(ATL_USE64BITS)
+   #if defined(ATL_USE64BITS) && _CALL_ELF != 2
 /*
  *      Official Program Descripter section, seg fault w/o it on Linux/PPC64
  */
@@ -258,8 +258,15 @@ ATL_USERMM:
         eqv     r0, r0, r0      /* all 1s */
         ATL_WriteVRSAVE(r0)     /* signal we use all vector regs */
 #if defined (ATL_USE64BITS)
+#if _CALL_ELF == 2
+/* ABIv2 */
+        ld      pC0, FSIZE+104(r1)
+        ld      ldc, FSIZE+112(r1)
+#else
+/* ABIv1 */
         ld      pC0, FSIZE+120(r1)
         ld      ldc, FSIZE+128(r1)
+#endif
 #elif defined(ATL_AS_OSX_PPC)
         lwz     pC0, FSIZE+60(r1)
         lwz     ldc,  FSIZE+64(r1)
Index: ATLAS/tune/blas/gemm/CASES/ATL_dmm4x4x2pf_av.c
===================================================================
--- ATLAS.orig/tune/blas/gemm/CASES/ATL_dmm4x4x2pf_av.c
+++ ATLAS/tune/blas/gemm/CASES/ATL_dmm4x4x2pf_av.c
@@ -279,7 +279,7 @@ void ATL_USERMM(const int M, const int N
 #endif
 .text
 #ifdef ATL_GAS_LINUX_PPC
-   #if defined(ATL_USE64BITS)
+   #if defined(ATL_USE64BITS) && _CALL_ELF != 2
 /*
  *       No idea what this does, but seg fault without it (I think it is
  *       partially resp for making code callable from both static & PIC code)
@@ -405,8 +405,15 @@ Mjoin(_,ATL_USERMM):
  */
 #ifdef ATL_GAS_LINUX_PPC
    #ifdef ATL_USE64BITS
+      #if _CALL_ELF == 2
+      /* ABIv2 */
+        ld      pC0, 104(r1)
+        ld      incCn, 112(r1)
+      #else
+      /* ABIv1 */
        ld      pC0, 120(r1)
        ld      incCn, 128(r1)
+      #endif
    #else
        lwz     incCn, FSIZE+8(r1)
    #endif
Index: ATLAS/tune/blas/gemm/CASES/ATL_dmm4x4x32_ppc.c
===================================================================
--- ATLAS.orig/tune/blas/gemm/CASES/ATL_dmm4x4x32_ppc.c
+++ ATLAS/tune/blas/gemm/CASES/ATL_dmm4x4x32_ppc.c
@@ -268,7 +268,7 @@ Mjoin(.,ATL_USERMM):
        .globl  Mjoin(_,ATL_USERMM)
 Mjoin(_,ATL_USERMM):
    #else
-      #if defined(ATL_USE64BITS)
+      #if defined(ATL_USE64BITS) && _CALL_ELF != 2
 /*
  *      Official Program Descripter section, seg fault w/o it on Linux/PPC64
  */
@@ -324,8 +324,15 @@ ATL_USERMM:
 #endif
 
 #ifdef ATL_USE64BITS
+#if _CALL_ELF == 2
+/* ABIv2 */
+        ld      pC0, 104(r1)
+        ld      incCn, 112(r1)
+#else
+/* ABIv1 */
         ld      pC0, 120(r1)
         ld      incCn, 128(r1)
+#endif
 #elif defined(ATL_AS_OSX_PPC) || defined(ATL_AS_AIX_PPC)
         lwz     pC0, 68(r1)
         lwz     incCn,  72(r1)
Index: ATLAS/tune/blas/gemm/CASES/ATL_dmm4x4x80_ppc.c
===================================================================
--- ATLAS.orig/tune/blas/gemm/CASES/ATL_dmm4x4x80_ppc.c
+++ ATLAS/tune/blas/gemm/CASES/ATL_dmm4x4x80_ppc.c
@@ -170,13 +170,21 @@ void ATL_USERMM(const int M, const int N
                 const TYPE beta, TYPE *C, const int ldc)
                                   (r10)    8(r1)
 *******************************************************************************
-64 bit ABIs:
+64 bit ABIv1s:
                          r3           r4           r5             r6/f1
 void ATL_USERMM(const int M, const int N, const int K, const TYPE alpha,
                            r7             r8             r9            r10
                 const TYPE *A, const int lda, const TYPE *B, const int ldb,
                              f2   120(r1)        128(r1)
                 const TYPE beta, TYPE *C, const int ldc)
+
+64 bit ABIv2s:
+                         r3           r4           r5             r6/f1
+void ATL_USERMM(const int M, const int N, const int K, const TYPE alpha,
+                           r7             r8             r9            r10
+                const TYPE *A, const int lda, const TYPE *B, const int ldb,
+                             f2   104(r1)        112(r1)
+                const TYPE beta, TYPE *C, const int ldc)
 #endif
 #ifdef ATL_AS_AIX_PPC
         .csect .text[PR]
@@ -202,7 +210,7 @@ Mjoin(.,ATL_USERMM):
        .globl  Mjoin(_,ATL_USERMM)
 Mjoin(_,ATL_USERMM):
    #else
-      #if defined(ATL_USE64BITS)
+      #if defined(ATL_USE64BITS) && _CALL_ELF != 2
 /*
  *      Official Program Descripter section, seg fault w/o it on Linux/PPC64
  */
@@ -257,9 +265,17 @@ ATL_USERMM:
    #endif
 #endif
 
+
 #if defined (ATL_USE64BITS)
+#if _CALL_ELF == 2
+/* ABIv2 */
+        ld      pC0, 104(r1)
+        ld      incCn, 112(r1)
+#else
+/* ABIv1 */
         ld      pC0, 120(r1)
         ld      incCn, 128(r1)
+#endif
 #elif defined(ATL_AS_OSX_PPC) || defined(ATL_AS_AIX_PPC)
         lwz     pC0, 68(r1)
         lwz     incCn,  72(r1)
Index: ATLAS/tune/blas/gemm/CASES/ATL_smm4x4x128_av.c
===================================================================
--- ATLAS.orig/tune/blas/gemm/CASES/ATL_smm4x4x128_av.c
+++ ATLAS/tune/blas/gemm/CASES/ATL_smm4x4x128_av.c
@@ -196,7 +196,7 @@ void ATL_USERMM(const int M, const int N
        .globl  Mjoin(_,ATL_USERMM)
 Mjoin(_,ATL_USERMM):
 #else
-   #if defined(ATL_USE64BITS)
+   #if defined(ATL_USE64BITS) && _CALL_ELF != 2
 /*
  *      Official Program Descripter section, seg fault w/o it on Linux/PPC64
  */
@@ -221,8 +221,15 @@ ATL_USERMM:
  *      kernel instead
  */
 #if defined (ATL_USE64BITS)
+#if _CALL_ELF == 2
+/* ABIv2 */
+        ld      r10, 104(r1)
+        ld      r5, 112(r1)
+#else
+/* ABIv1 */
         ld      r10, 120(r1)
         ld      r5, 128(r1)
+#endif
 #elif defined(ATL_AS_OSX_PPC)
         lwz     r10, 60(r1)
         lwz     r5,  64(r1)
@@ -285,8 +292,15 @@ ATL_USERMM:
         eqv     r0, r0, r0      /* all 1s */
         ATL_WriteVRSAVE(r0)     /* signal we use all vector regs */
 #if defined (ATL_USE64BITS)
+#if _CALL_ELF == 2
+        /* ABIv2 */
+        ld      pC0, FSIZE+104(r1)
+        ld      ldc, FSIZE+112(r1)
+#else
+        /* ABIv1 */
         ld      pC0, FSIZE+120(r1)
         ld      ldc, FSIZE+128(r1)
+#endif
 #elif defined(ATL_AS_OSX_PPC)
         lwz     pC0, FSIZE+60(r1)
         lwz     ldc,  FSIZE+64(r1)
@@ -4258,8 +4272,15 @@ UNALIGNED_C:
         eqv     r0, r0, r0      /* all 1s */
         ATL_WriteVRSAVE(r0)     /* signal we use all vector regs */
 #if defined (ATL_USE64BITS)
+#if _CALL_ELF == 2
+        /* ABIv2 */
+        ld      pC0, FSIZE+104(r1)
+        ld      ldc, FSIZE+112(r1)
+#else
+        /* ABIv1 */
         ld      pC0, FSIZE+120(r1)
         ld      ldc, FSIZE+128(r1)
+#endif
 #elif defined(ATL_AS_OSX_PPC)
         lwz     pC0, FSIZE+60(r1)
         lwz     ldc,  FSIZE+64(r1)
++++++ atlas.3.10.2-ppc64le_do_not_use_files_with_lvx.patch ++++++
From: Michel Normand <norm...@linux.vnet.ibm.com>
Subject: atlas.3.10.2 ppc64le do not use files with lvx
Date: Tue, 12 Aug 2014 16:07:06 +0200

ppc64le do not use files with lvx
This is a temporary patch as long as the related files
are not ported yet to ppc64 little-endian.

Warning: patch to be applied only for ppc64le architecture
and will also need atlas-new_archdef_for_ppc64le.patch

Signed-off-by: Michel Normand <norm...@linux.vnet.ibm.com>
---
 tune/blas/gemm/CASES/ccases.flg |    6 +-----
 tune/blas/gemm/CASES/dcases.flg |    8 +-------
 tune/blas/gemm/CASES/dcases.vnb |    4 ----
 tune/blas/gemm/CASES/scases.flg |    9 +--------
 tune/blas/gemm/CASES/scases.vnb |    3 ---
 tune/blas/gemm/CASES/zcases.flg |    8 +-------
 6 files changed, 4 insertions(+), 34 deletions(-)

Index: ATLAS/tune/blas/gemm/CASES/ccases.flg
===================================================================
--- ATLAS.orig/tune/blas/gemm/CASES/ccases.flg
+++ ATLAS/tune/blas/gemm/CASES/ccases.flg
@@ -1,5 +1,5 @@
 <ID> <flag> <mb> <nb> <kb> <muladd> <lat> <mu> <nu> <ku> <rout> "<Contributer>"
-24
+22
 304 192 4 3 8 0 4 4 3 8 ATL_mm4x3x8p.c     "R. Clint Whaley" \
 gcc
 -mcpu=ultrasparc -mtune=ultrasparc -fomit-frame-pointer -O
@@ -48,13 +48,9 @@ gcc
 328 480 8 8 2 1 1 8 8 2 ATL_mm8x8x2.c  "R. Clint Whaley" \
 gcc
 -fomit-frame-pointer -O2 -fno-tree-loop-optimize
-329 192 4 4 4 1 16 4 4 4 ATL_cmm4x4x128_av.c "R. Clint Whaley" \
-gcc
--x assembler-with-cpp
 331 192 4 4 1 1 1 4 4 1 ATL_smm4x4xURx_mips.c  "R. Clint Whaley" \
 gcc
 -x assembler-with-cpp -mips4
-332 192 8 2 4 1 0 8 2 4 ATL_smm8x2x4_av.c  "IBM"
 333 448 4 4 2 1 1 4 4 2 ATL_smm4x4x2pf_arm.c "R. Clint Whaley" \
 gcc
 -x assembler-with-cpp -mfpu=vfpv3
Index: ATLAS/tune/blas/gemm/CASES/scases.flg
===================================================================
--- ATLAS.orig/tune/blas/gemm/CASES/scases.flg
+++ ATLAS/tune/blas/gemm/CASES/scases.flg
@@ -1,5 +1,5 @@
 <ID> <flag> <mb> <nb> <kb> <muladd> <lat> <mu> <nu> <ku> <rout> "<Contributer>"
-25
+22
 304 192 4 3 8 0 4 4 3 8 ATL_mm4x3x8p.c     "R. Clint Whaley" \
 gcc
 -mcpu=ultrasparc -mtune=ultrasparc -fomit-frame-pointer -O
@@ -48,16 +48,9 @@ gcc
 328 480 8 8 2 1 1 8 8 2 ATL_mm8x8x2.c  "R. Clint Whaley" \
 gcc
 -fomit-frame-pointer -O2 -fno-tree-loop-optimize
-329 192 4 4 4 1 16 4 4 4 ATL_smm4x4x128_av.c "R. Clint Whaley" \
-gcc
--x assembler-with-cpp
-330 200 92 92 92 1 16 92 92 92 ATL_smm4x4x128_av.c "R. Clint Whaley" \
-gcc
--x assembler-with-cpp
 331 192 4 4 1 1 1 4 4 1 ATL_smm4x4xURx_mips.c  "R. Clint Whaley" \
 gcc
 -x assembler-with-cpp -mips4
-332 192 8 2 4 1 0 8 2 4 ATL_smm8x2x4_av.c  "IBM"
 333 448 4 4 2 1 1 4 4 2 ATL_smm4x4x2pf_arm.c "R. Clint Whaley" \
 gcc
 -x assembler-with-cpp -mfpu=vfpv3
Index: ATLAS/tune/blas/gemm/CASES/scases.vnb
===================================================================
--- ATLAS.orig/tune/blas/gemm/CASES/scases.vnb
+++ ATLAS/tune/blas/gemm/CASES/scases.vnb
@@ -31,9 +31,6 @@
 # Defaults: TA='t', TB='n', SSE=0, X87=0, LDBOT=1, RTKU=0, AOUTER=0,
 #           KBMAX=KU, KBMIN=KU, BETAN1=0, RTMN=1
 #
-ID=1  ROUT='ATL_smm4x4x128_av.c' AUTH='R. Clint Whaley' MU=4 NU=4 KU=4 \
-      LDKB=1 LDBOT=1 KBMIN=4 KBMAX=128 ASM=GAS_PPC \
-      COMP='gcc' FLAGS='-x assembler-with-cpp'
 ID=2  ROUT='ATL_smm4x4x16_av.c' AUTH='R. Clint Whaley' MU=4 NU=4 KU=16 \
       LDKB=1 LDBOT=0 KBMIN=16 KBMAX=2048 ASM=GAS_SPARC \
       COMP='gcc' FLAGS='-x assembler-with-cpp'
Index: ATLAS/tune/blas/gemm/CASES/dcases.flg
===================================================================
--- ATLAS.orig/tune/blas/gemm/CASES/dcases.flg
+++ ATLAS/tune/blas/gemm/CASES/dcases.flg
@@ -1,5 +1,5 @@
 <ID> <flag> <mb> <nb> <kb> <muladd> <lat> <mu> <nu> <ku> <rout> "<Contributer>"
-32
+30
 306 192 4 3 8 0 4 4 3 8 ATL_mm4x3x8p.c     "R. Clint Whaley" \
 gcc
 -mcpu=ultrasparc -mtune=ultrasparc -fomit-frame-pointer -O -fno-schedule-insns 
-fno-schedule-insns2
@@ -79,12 +79,6 @@ gcc
 336 192 4 4 1 1 1 4 4 1 ATL_dmm4x4xURx_mips.c  "R. Clint Whaley" \
 gcc
 -x assembler-with-cpp -mips4
-337 192 4 4 1 1 16 4 4 1 ATL_dmm4x4x80_ppc.c "Whaley & Castaldo" \
-gcc
--x assembler-with-cpp
-338 192 8 4 2 1 0 8 4 2  ATL_dmm8x4x2_vsx.c  "IBM" \
-gcc
--O3 -mvsx
 339 448 4 4 2 1 1 4 4 2 ATL_dmm4x4x2pf_arm.c "R. Clint Whaley" \
 gcc
 -x assembler-with-cpp -mfpu=vfpv3
Index: ATLAS/tune/blas/gemm/CASES/dcases.vnb
===================================================================
--- ATLAS.orig/tune/blas/gemm/CASES/dcases.vnb
+++ ATLAS/tune/blas/gemm/CASES/dcases.vnb
@@ -53,10 +53,6 @@ ID=6  ROUT='ATL_dmm4x1x90_x87.c' AUTH='R
 ID=7  ROUT='ATL_dmm8x1x120_sse2.c' AUTH='R. Clint Whaley' \
       MU=8 NU=1 KU=1 KBMAX=512 ASM=GAS_x8664 BETAN1=1 \
       COMP='gcc' FLAGS='-m64 -x assembler-with-cpp'
-ID=70 ROUT='ATL_dmm4x4x80_ppc.c' AUTH='R. Clint Whaley' TA='T', TB='N' \
-      MU=4 NU=4 KU=1 KBMIN=1 KBMAX=80 ASM=GAS_PPC BETAN1=0 LDBOT=0 \
-      LDAB=0 LDISKB=1 RTN=1 RTM=1 RTK=0 \
-      COMP='gcc' FLAGS='-x assembler-with-cpp'
 ID=80 ROUT='ATL_dmm4x4x16r8_US.c' AUTH='R. Clint Whaley' TA='T', TB='N' \
       MU=4 NU=4 KU=24 KBMIN=24 KBMAX=512 ASM=GAS_SPARC BETAN1=0 \
       LDAB=0 RTK=1 RTN=1 RTM=1 LDBOT=0 LDISKB=1 LDAB=1 \
Index: ATLAS/tune/blas/gemm/CASES/zcases.flg
===================================================================
--- ATLAS.orig/tune/blas/gemm/CASES/zcases.flg
+++ ATLAS/tune/blas/gemm/CASES/zcases.flg
@@ -1,5 +1,5 @@
 <ID> <flag> <mb> <nb> <kb> <muladd> <lat> <mu> <nu> <ku> <rout> "<Contributer>"
-31
+29
 306 192 4 3 8 0 4 4 3 8 ATL_mm4x3x8p.c     "R. Clint Whaley" \
 gcc
 -mcpu=ultrasparc -mtune=ultrasparc -fomit-frame-pointer -O -fno-schedule-insns 
-fno-schedule-insns2
@@ -76,12 +76,6 @@ gcc
 336 192 4 4 1 1 1 4 4 1 ATL_dmm4x4xURx_mips.c  "R. Clint Whaley" \
 gcc
 -x assembler-with-cpp -mips4
-337 192 4 4 1 1 16 4 4 1 ATL_dmm4x4x80_ppc.c "Whaley & Castaldo" \
-gcc
--x assembler-with-cpp
-338 192 8 4 2 1 0 8 4 2  ATL_dmm8x4x2_vsx.c  "IBM" \
-gcc
--O3 -mvsx
 339 448 4 4 2 1 1 4 4 2 ATL_dmm4x4x2pf_arm.c "R. Clint Whaley" \
 gcc
 -x assembler-with-cpp -mfpu=vfpv3
++++++ atlas3.10.1.tar.bz2 -> atlas3.10.2.tar.bz2 ++++++
++++ 10483 lines of diff (skipped)

++++++ issue_64.patch ++++++
From: Michel Normand <norm...@linux.vnet.ibm.com>
Subject: issue 64
Date: Mon, 07 Jul 2014 17:15:03 +0200

issue 64, patch as suggested by Clint
but not tested by myself.

Signed-off-by: Michel Normand <norm...@linux.vnet.ibm.com>
---
 tune/blas/level3/invtrsm.c |    3 +++
 1 file changed, 3 insertions(+)

Index: ATLAS/tune/blas/level3/invtrsm.c
===================================================================
--- ATLAS.orig/tune/blas/level3/invtrsm.c
+++ ATLAS/tune/blas/level3/invtrsm.c
@@ -257,6 +257,9 @@ static void MakeHEDiagDom
    int j;
    const int lda2=(lda SHIFT), ldap1=((lda+1)SHIFT);
 
+   /* as per issue 64 */
+   Mjoin(PATL,gegen)(N, N, A, lda, N*N+lda);
+
    if (Order == CblasRowMajor)
    {
       if (Uplo == CblasLower) Uplo = CblasUpper;

Reply via email to