Source: ucx
Version: 1.16.0+ds-5
Severity: normal
Tags: ftbfs patch
User: debian-loonga...@lists.debian.org
Usertags: loong64

Dear maintainers,

Compiling the ucx failed for loong64 in my local ENV.

We need to add loong64 support in d/control.
And we need to add loongarch64 support in ucx source.

The debian ucx source package is pulling the upstream 1.16.x version.
The latest release version of ucx upstream is 1.17.x.
Support for the new architecture will be committed to the ucx upstream master branch.

Due to the large differences between the main branch and the 1.16.x version of the ucx code, I prioritized adding support for loongarch in the ucx 1.16.0+ds-5 source package.
Please consider the patch I attached.
Suggestion to consider adding the attached patch to the debian/patches.
For now, it is recommended that the attached patch be maintained in the debian ucx source package. With the attached patch, the ucx source package was built successfully in my local ENV.
Execute the constructed binary and the output is normal, e.g.
```
root@localhost:/home/ucx/ucx-1.16.0+ds# ./debian/tmp/usr/bin/ucx_info -v
# Library version: 1.16.0
# Library path: /home/ucx/ucx-1.16.0+ds/debian/tmp/usr/lib/loongarch64-linux-gnu/libucs.so.0
# API headers version: 1.16.0
# Git branch '<unknown>', revision 0000000
# Configured with: --build=loongarch64-linux-gnu --prefix=/usr --includedir=${prefix}/include......
-enable-rc --enable-dc --enable-ud --with-mlx5-dv --enable-dm
```

In addition, I will add loongarch64 support to the ucx upstream master branch. Future new upstream releases of the debian ucx source package will include support for the loongarch architecture.

Your opinions are welcome.

Thanks,
Dandan Zhang

diff -Nru ucx-1.16.0+ds/debian/changelog ucx-1.16.0+ds/debian/changelog
--- ucx-1.16.0+ds/debian/changelog      2024-03-24 11:19:06.000000000 +0000
+++ ucx-1.16.0+ds/debian/changelog      2024-05-22 07:30:00.000000000 +0000
@@ -1,3 +1,9 @@
+ucx (1.16.0+ds-5+loong64) unreleased; urgency=medium
+
+  * Add support for loongarch64.
+
+ -- Dandan Zhang <zhangdan...@loongson.cn>  Wed, 22 May 2024 15:30:00 +0800
+
 ucx (1.16.0+ds-5) unstable; urgency=medium
 
   * Fix FTBFS: Don't pass CFLAGS to C++ (cxxflags.patch). Closes: #1066543
diff -Nru ucx-1.16.0+ds/debian/control ucx-1.16.0+ds/debian/control
--- ucx-1.16.0+ds/debian/control        2024-03-24 11:19:06.000000000 +0000
+++ ucx-1.16.0+ds/debian/control        2024-05-22 07:26:40.000000000 +0000
@@ -4,13 +4,13 @@
 Maintainer: Debian Science Maintainers 
<debian-science-maintain...@lists.alioth.debian.org>
 Uploaders: Alastair McKinstry <mckins...@debian.org>
 Build-Depends: debhelper-compat (= 13),
- libibverbs-dev (>= 1.1.7) [amd64 arm64  riscv64 ppc64el s390x],
- libnuma-dev [amd64 arm64 riscv64 ppc64 s390x],
+ libibverbs-dev (>= 1.1.7) [amd64 arm64 loong64 riscv64 ppc64el s390x],
+ libnuma-dev [amd64 arm64 loong64 riscv64 ppc64 s390x],
  librdmacm-dev, 
  libibmad-dev,
  libibumad-dev,
  libfuse3-dev,
- libamdhip64-dev [!riscv64],
+ libamdhip64-dev [!riscv64 !loong64],
  doxygen,
  texlive-latex-base,
  lcov
@@ -20,7 +20,7 @@
 Homepage: https://www.openucx.org
 
 Package: ucx-utils
-Architecture: amd64 arm64 riscv64 ppc64el
+Architecture: amd64 arm64 loong64 riscv64 ppc64el
 Depends: libucx0 (= ${binary:Version}), ${shlibs:Depends}, ${misc:Depends}
 Description: Utilities for the UCX messaging library
  Unified Communication X (UCX) provides an optimized communication layer
@@ -32,7 +32,7 @@
 
 Package: libucx-dev
 Section: libdevel
-Architecture: amd64 arm64  riscv64 ppc64el
+Architecture: amd64 arm64 loong64 riscv64 ppc64el
 Multi-Arch: same
 Depends: libucx0 (= ${binary:Version}), ${misc:Depends}
 Description: Header files for UCX library
@@ -46,7 +46,7 @@
 Package: libucx0
 Section: libs
 Depends: ${shlibs:Depends}, ${misc:Depends}
-Architecture: amd64 arm64  riscv64 ppc64el
+Architecture: amd64 arm64 loong64 riscv64 ppc64el
 Multi-Arch: same
 Description: Unified Communication X libraries
  Unified Communication X (UCX) provides an optimized communication layer
diff -Nru ucx-1.16.0+ds/debian/patches/series 
ucx-1.16.0+ds/debian/patches/series
--- ucx-1.16.0+ds/debian/patches/series 2024-03-24 11:19:06.000000000 +0000
+++ ucx-1.16.0+ds/debian/patches/series 2024-05-22 07:24:44.000000000 +0000
@@ -4,3 +4,4 @@
 gcc-11.patch
 ptr-fix.patch
 cxxflags.patch
+ucx-add-loongarch64-support.patch
diff -Nru ucx-1.16.0+ds/debian/patches/ucx-add-loongarch64-support.patch 
ucx-1.16.0+ds/debian/patches/ucx-add-loongarch64-support.patch
--- ucx-1.16.0+ds/debian/patches/ucx-add-loongarch64-support.patch      
1970-01-01 00:00:00.000000000 +0000
+++ ucx-1.16.0+ds/debian/patches/ucx-add-loongarch64-support.patch      
2024-05-22 07:25:38.000000000 +0000
@@ -0,0 +1,601 @@
+Description: Add loongarch64 support 
+Signed-Off-By: lix...@loongson.cn, zhangdan...@loongson.cn
+Last-Update: 2024-05-22
+
+--- ucx-1.16.0+ds.orig/src/tools/info/sys_info.c
++++ ucx-1.16.0+ds/src/tools/info/sys_info.c
+@@ -41,6 +41,7 @@ static const char *cpu_model_names[] = {
+     [UCS_CPU_MODEL_ZHAOXIN_WUDAOKOU]   = "Wudaokou",
+     [UCS_CPU_MODEL_ZHAOXIN_LUJIAZUI]   = "Lujiazui",
+     [UCS_CPU_MODEL_RV64G]              = "RV64G",
++    [UCS_CPU_MODEL_LOONGARCH64]        = "LoongArch 64-bit",
+ };
+ 
+ 
+@@ -50,6 +51,7 @@ static const char* cpu_vendor_names[] =
+     [UCS_CPU_VENDOR_INTEL]            = "Intel",
+     [UCS_CPU_VENDOR_AMD]              = "AMD",
+     [UCS_CPU_VENDOR_GENERIC_ARM]      = "Generic ARM",
++    [UCS_CPU_VENDOR_GENERIC_LOONGARCH64]   = "Generic LOONGARCH64",
+     [UCS_CPU_VENDOR_GENERIC_PPC]      = "Generic PPC",
+     [UCS_CPU_VENDOR_GENERIC_RV64G]    = "Generic RV64G",
+     [UCS_CPU_VENDOR_FUJITSU_ARM]      = "Fujitsu ARM",
+--- ucx-1.16.0+ds.orig/src/ucm/Makefile.am
++++ ucx-1.16.0+ds/src/ucm/Makefile.am
+@@ -32,7 +32,8 @@ noinst_HEADERS = \
+       bistro/bistro_x86_64.h \
+       bistro/bistro_aarch64.h \
+       bistro/bistro_ppc64.h \
+-      bistro/bistro_rv64.h
++      bistro/bistro_rv64.h \
++      bistro/bistro_loongarch64.h
+ 
+ libucm_la_SOURCES = \
+       event/event.c \
+@@ -46,7 +47,8 @@ libucm_la_SOURCES = \
+       bistro/bistro_x86_64.c \
+       bistro/bistro_aarch64.c \
+       bistro/bistro_ppc64.c \
+-      bistro/bistro_rv64.c
++      bistro/bistro_rv64.c \
++      bistro/bistro_loongarch64.c
+ 
+ if HAVE_UCM_PTMALLOC286
+ libucm_la_CPPFLAGS += \
+--- ucx-1.16.0+ds.orig/src/ucm/bistro/bistro.c
++++ ucx-1.16.0+ds/src/ucm/bistro/bistro.c
+@@ -121,7 +121,7 @@ ucs_status_t ucm_bistro_apply_patch(void
+     return status;
+ }
+ 
+-#if defined(__x86_64__) || defined (__aarch64__) || defined (__riscv)
++#if defined(__x86_64__) || defined (__aarch64__) || defined (__riscv) || 
defined(__loongarch64)
+ struct ucm_bistro_restore_point {
+     void               *addr;     /* address of function to restore */
+     size_t             patch_len; /* patch length */
+--- ucx-1.16.0+ds.orig/src/ucm/bistro/bistro.h
++++ ucx-1.16.0+ds/src/ucm/bistro/bistro.h
+@@ -23,6 +23,8 @@ typedef struct ucm_bistro_restore_point
+ #  include "bistro_x86_64.h"
+ #elif defined(__riscv)
+ #  include "bistro_rv64.h"
++#elif defined(__loongarch64)
++#  include "bistro_loongarch64.h"
+ #else
+ #  error "Unsupported architecture"
+ #endif
+--- /dev/null
++++ ucx-1.16.0+ds/src/ucm/bistro/bistro_loongarch64.c
+@@ -0,0 +1,109 @@
++/**
++ * Copyright (C) Tactical Computing Labs, LLC. 2022. ALL RIGHTS RESERVED.
++ *
++ * See file LICENSE for terms.
++ */
++
++#ifdef HAVE_CONFIG_H
++#  include "config.h"
++#endif
++
++#if defined(__loongarch64)
++
++#include <ucs/arch/cpu.h>
++#include <ucm/bistro/bistro.h>
++#include <ucm/bistro/bistro_int.h>
++#include <ucs/debug/assert.h>
++#include <ucs/sys/math.h>
++#include <ucm/util/sys.h>
++
++#include <assert.h>
++#include <dlfcn.h>
++#include <stdbool.h>
++#include <stdint.h>
++#include <stdlib.h>
++#include <string.h>
++#include <sys/mman.h>
++
++#define T0 12
++#define T2 14
++#define RA  1
++#define ZERO  0
++
++/**
++  * @brief JIRL - Add 12 bit immediate to source register, save to destination
++  * register, jump and link from destination register
++  *
++  * @param[in] _regd source register number (0-31)
++  * @param[in] _regj destination register number (0-31)
++  * @param[in] _imm 16 bit immmediate value
++  */
++#define JIRL(_regd, _regj, _imm) \
++    (((0x13) << 26 ) | ((_imm) << 10) | ((_regj) << 5) | (_regd))
++/**
++  * @brief B - Indirect jump
++  *
++  * @param[in] _imm 26 bit immmediate value
++  */
++#define B(_imm) \
++      ((0x14) << 26) | (((_imm)&0xffff) << 10) | ((_imm) >>16)
++
++/**
++  * @brief PCADDU - Add upper intermediate to PC
++  *
++  * @param[in] _regd register number (0-31)
++  * @param[in] _imm 20 bit immmediate value
++  */
++#define PCADDU(_regd, _imm) (((0xe) << 25) | ((_imm) << 5) | (_regd))
++
++/**
++  * @brief LD - Load from memory with address from register plus immediate
++  *
++  * @param[in] _regs source register number (0-31)
++  * @param[in] _regd destination register number (0-31)
++  * @param[in] _imm 12 bit immmediate value
++  */
++#define LD(_regd, _regj, _imm) \
++    (((0xa3) << 22) | ((_imm) << 10) | ((_regj) << 5) | (_regd))
++
++void ucm_bistro_patch_lock(void *dst)
++{
++    static const ucm_bistro_lock_t self_jmp = {
++        .j = B(0)
++    };
++    ucm_bistro_modify_code(dst, &self_jmp);
++}
++
++ucs_status_t ucm_bistro_patch(void *func_ptr, void *hook, const char *symbol,
++                              void **orig_func_p,
++                              ucm_bistro_restore_point_t **rp)
++{
++    ucs_status_t status;
++    ucm_bistro_patch_t patch;
++
++    patch = (ucm_bistro_patch_t) {
++        .pcaddu   = PCADDU(T0, 0),
++        .ld      = LD(T2, T0, 0x10),
++        .jirl    = JIRL(0, T2, 0),
++        .spare   = 0,
++        .address = (uintptr_t)hook
++    };
++
++    if (orig_func_p != NULL) {
++        return UCS_ERR_UNSUPPORTED;
++    }
++
++    status = ucm_bistro_create_restore_point(func_ptr, sizeof(patch), rp);
++    if (UCS_STATUS_IS_ERR(status)) {
++        return status;
++    }
++
++    return ucm_bistro_apply_patch_atomic(func_ptr, &patch, sizeof(patch));
++}
++
++ucs_status_t ucm_bistro_relocate_one(ucm_bistro_relocate_context_t *ctx)
++{
++    return UCS_ERR_UNSUPPORTED;
++}
++
++#endif
+--- /dev/null
++++ ucx-1.16.0+ds/src/ucm/bistro/bistro_loongarch64.h
+@@ -0,0 +1,59 @@
++/**
++ * Copyright (C) Tactical Computing Labs, LLC. 2022. ALL RIGHTS RESERVED.
++ *
++ * See file LICENSE for terms.
++ */
++
++
++#ifndef UCM_BISTRO_BISTRO_LOONGARCH64_H_
++#define UCM_BISTRO_BISTRO_LOONGARCH64_H_
++
++#include <ucs/type/status.h>
++#include <ucs/sys/compiler_def.h>
++
++#include <stddef.h>
++#include <stdint.h>
++
++#define UCM_BISTRO_PROLOGUE
++#define UCM_BISTRO_EPILOGUE
++
++typedef struct ucm_bistro_patch {
++    uint32_t pcaddu;
++    uint32_t ld;
++    uint32_t jirl;
++    uint32_t spare;
++    uint64_t address;
++} UCS_S_PACKED ucm_bistro_patch_t;
++
++
++/**
++ * Set library function call hook using Binary Instrumentation
++ * method (BISTRO): replace function body by user defined call
++ *
++ * @param func_ptr     Pointer to function to patch.
++ * @param hook         User-defined function-replacer.
++ * @param symbol       Function name to replace.
++ * @param orig_func_p  Unsupported on this architecture and must be NULL.
++ *                     If set to a non-NULL value, this function returns
++ *                     @ref UCS_ERR_UNSUPPORTED.
++ * @param rp           Restore point used to restore original function.
++ *                     Optional, may be NULL.
++ *
++ * @return Error code as defined by @ref ucs_status_t
++ */
++ucs_status_t ucm_bistro_patch(void *func_ptr, void *hook, const char *symbol,
++                              void **orig_func_p,
++                              ucm_bistro_restore_point_t **rp);
++
++/* Lock implementation */
++typedef struct {
++    uint32_t j; /* jump to self */
++} UCS_S_PACKED ucm_bistro_lock_t;
++
++/**
++ * Helper functions to improve atomicity of function patching
++ */
++void ucm_bistro_patch_lock(void *dst);
++
++#endif
++
+--- ucx-1.16.0+ds.orig/src/ucs/Makefile.am
++++ ucx-1.16.0+ds/src/ucs/Makefile.am
+@@ -24,6 +24,7 @@ nobase_dist_libucs_la_HEADERS = \
+       arch/aarch64/bitops.h \
+       arch/ppc64/bitops.h \
+       arch/rv64/bitops.h \
++      arch/loongarch64/bitops.h \
+       arch/x86_64/bitops.h \
+       arch/bitops.h \
+       algorithm/crc.h \
+@@ -84,6 +85,7 @@ nobase_dist_libucs_la_HEADERS = \
+       arch/x86_64/atomic.h \
+       arch/aarch64/global_opts.h \
+       arch/generic/atomic.h \
++      arch/loongarch64/global_opts.h \
+       arch/ppc64/global_opts.h \
+       arch/rv64/global_opts.h \
+       arch/global_opts.h
+@@ -91,6 +93,7 @@ nobase_dist_libucs_la_HEADERS = \
+ noinst_HEADERS = \
+       arch/aarch64/cpu.h \
+       arch/generic/cpu.h \
++      arch/loongarch64/cpu.h \
+       arch/ppc64/cpu.h \
+       arch/rv64/cpu.h \
+       arch/x86_64/cpu.h \
+@@ -145,6 +148,8 @@ libucs_la_SOURCES = \
+       algorithm/string_distance.c \
+       arch/aarch64/cpu.c \
+       arch/aarch64/global_opts.c \
++      arch/loongarch64/cpu.c \
++      arch/loongarch64/global_opts.c \
+       arch/ppc64/timebase.c \
+       arch/ppc64/global_opts.c \
+       arch/rv64/cpu.c \
+--- ucx-1.16.0+ds.orig/src/ucs/arch/atomic.h
++++ ucx-1.16.0+ds/src/ucs/arch/atomic.h
+@@ -18,6 +18,8 @@
+ #  include "generic/atomic.h"
+ #elif defined(__riscv)
+ #  include "generic/atomic.h"
++#elif defined(__loongarch64)
++#  include "generic/atomic.h"
+ #else
+ #  error "Unsupported architecture"
+ #endif
+--- ucx-1.16.0+ds.orig/src/ucs/arch/bitops.h
++++ ucx-1.16.0+ds/src/ucs/arch/bitops.h
+@@ -23,6 +23,8 @@ BEGIN_C_DECLS
+ #  include "aarch64/bitops.h"
+ #elif defined(__riscv)
+ #  include "rv64/bitops.h"
++#elif defined(__loongarch64)
++#  include "loongarch64/bitops.h"
+ #else
+ #  error "Unsupported architecture"
+ #endif
+--- ucx-1.16.0+ds.orig/src/ucs/arch/cpu.c
++++ ucx-1.16.0+ds/src/ucs/arch/cpu.c
+@@ -60,6 +60,10 @@ const ucs_cpu_builtin_memcpy_t ucs_cpu_b
+         .min = UCS_MEMUNITS_INF,
+         .max = UCS_MEMUNITS_INF
+     },
++    [UCS_CPU_VENDOR_GENERIC_LOONGARCH64] = {
++        .min = UCS_MEMUNITS_INF,
++        .max = UCS_MEMUNITS_INF
++    },
+     [UCS_CPU_VENDOR_GENERIC_PPC] = {
+         .min = UCS_MEMUNITS_INF,
+         .max = UCS_MEMUNITS_INF
+@@ -83,6 +87,7 @@ const size_t ucs_cpu_est_bcopy_bw[UCS_CP
+     [UCS_CPU_VENDOR_INTEL]         = UCS_CPU_EST_BCOPY_BW_DEFAULT,
+     [UCS_CPU_VENDOR_AMD]           = UCS_CPU_EST_BCOPY_BW_DEFAULT,
+     [UCS_CPU_VENDOR_GENERIC_ARM]   = UCS_CPU_EST_BCOPY_BW_DEFAULT,
++    [UCS_CPU_VENDOR_GENERIC_LOONGARCH64] = UCS_CPU_EST_BCOPY_BW_DEFAULT,
+     [UCS_CPU_VENDOR_GENERIC_PPC]   = UCS_CPU_EST_BCOPY_BW_DEFAULT,
+     [UCS_CPU_VENDOR_GENERIC_RV64G] = UCS_CPU_EST_BCOPY_BW_DEFAULT,
+     [UCS_CPU_VENDOR_FUJITSU_ARM]   = UCS_CPU_EST_BCOPY_BW_FUJITSU_ARM,
+--- ucx-1.16.0+ds.orig/src/ucs/arch/cpu.h
++++ ucx-1.16.0+ds/src/ucs/arch/cpu.h
+@@ -35,6 +35,7 @@ typedef enum ucs_cpu_model {
+     UCS_CPU_MODEL_AMD_ROME,
+     UCS_CPU_MODEL_AMD_MILAN,
+     UCS_CPU_MODEL_AMD_GENOA,
++    UCS_CPU_MODEL_LOONGARCH64,
+     UCS_CPU_MODEL_ZHAOXIN_ZHANGJIANG,
+     UCS_CPU_MODEL_ZHAOXIN_WUDAOKOU,
+     UCS_CPU_MODEL_ZHAOXIN_LUJIAZUI,
+@@ -66,6 +67,7 @@ typedef enum ucs_cpu_vendor {
+     UCS_CPU_VENDOR_INTEL,
+     UCS_CPU_VENDOR_AMD,
+     UCS_CPU_VENDOR_GENERIC_ARM,
++    UCS_CPU_VENDOR_GENERIC_LOONGARCH64,
+     UCS_CPU_VENDOR_GENERIC_PPC,
+     UCS_CPU_VENDOR_FUJITSU_ARM,
+     UCS_CPU_VENDOR_ZHAOXIN,
+@@ -105,6 +107,8 @@ typedef struct ucs_cpu_builtin_memcpy {
+ #  include "aarch64/cpu.h"
+ #elif defined(__riscv)
+ #  include "rv64/cpu.h"
++#elif defined(__loongarch64)
++#  include "loongarch64/cpu.h"
+ #else
+ #  error "Unsupported architecture"
+ #endif
+--- ucx-1.16.0+ds.orig/src/ucs/arch/global_opts.h
++++ ucx-1.16.0+ds/src/ucs/arch/global_opts.h
+@@ -18,6 +18,8 @@
+ #  include "aarch64/global_opts.h"
+ #elif defined(__riscv)
+ #  include "rv64/global_opts.h"
++#elif defined(__loongarch64)
++#  include "loongarch64/global_opts.h"
+ #else
+ #  error "Unsupported architecture"
+ #endif
+--- /dev/null
++++ ucx-1.16.0+ds/src/ucs/arch/loongarch64/bitops.h
+@@ -0,0 +1,34 @@
++/**
++* Copyright (C) Tactical Computing Labs, LLC. 2022. ALL RIGHTS RESERVED.
++*
++* See file LICENSE for terms.
++*/
++
++#ifndef UCS_ARCH_LOONGARCH64_BITOPS_H_
++#define UCS_ARCH_LOONGARCH64_BITOPS_H_
++
++#include <ucs/sys/compiler_def.h>
++#include <stdint.h>
++
++static UCS_F_ALWAYS_INLINE unsigned __ucs_ilog2_u32(uint32_t n)
++{
++    return 31 - __builtin_clz(n);
++}
++
++static UCS_F_ALWAYS_INLINE unsigned __ucs_ilog2_u64(uint64_t n)
++{
++    return 63 - __builtin_clzll(n);
++}
++
++static UCS_F_ALWAYS_INLINE unsigned ucs_ffs32(uint32_t n)
++{
++    return __ucs_ilog2_u32(n & -n);
++}
++
++static UCS_F_ALWAYS_INLINE unsigned ucs_ffs64(uint64_t n)
++{
++    return __ucs_ilog2_u64(n & -n);
++}
++
++#endif
++
+--- /dev/null
++++ ucx-1.16.0+ds/src/ucs/arch/loongarch64/cpu.c
+@@ -0,0 +1,21 @@
++/**
++* Copyright (C) Tactical Computing Labs, LLC. 2022. ALL RIGHTS RESERVED.
++*
++* See file LICENSE for terms.
++*/
++
++#if defined(__loongarch64)
++
++#ifdef HAVE_CONFIG_H
++#  include "config.h"
++#endif
++
++#include <ucs/arch/cpu.h>
++
++ucs_cpu_vendor_t ucs_arch_get_cpu_vendor()
++{
++    return UCS_CPU_VENDOR_GENERIC_LOONGARCH64;
++}
++
++#endif
++
+--- /dev/null
++++ ucx-1.16.0+ds/src/ucs/arch/loongarch64/cpu.h
+@@ -0,0 +1,113 @@
++/**
++* Copyright (C) Tactical Computing Labs, LLC. 2022. ALL RIGHTS RESERVED.
++* Copyright (C) Rivos Inc. 2023
++*
++* See file LICENSE for terms.
++*/
++
++#ifndef UCS_ARCH_LOONGARCH64_CPU_H_
++#define UCS_ARCH_LOONGARCH64_CPU_H_
++
++#include <ucs/arch/generic/cpu.h>
++#include <ucs/config/global_opts.h>
++#include <ucs/config/types.h>
++#include <ucs/sys/compiler.h>
++#include <ucs/sys/compiler_def.h>
++
++#include <assert.h>
++#include <stddef.h>
++#include <stdint.h>
++#include <string.h>
++#include <sys/syscall.h>
++#include <sys/mman.h>
++#include <unistd.h>
++
++BEGIN_C_DECLS
++
++/** @file cpu.h */
++
++#define UCS_ARCH_CACHE_LINE_SIZE 64
++
++#define ucs_loongarch64_dbar(hint)   asm volatile ("dbar %0 " : : "I"(hint) : 
"memory")
++
++#define crwrw           0b00000
++#define cr_r_           0b00101
++#define c_w_w           0b01010
++
++#define orwrw           0b10000
++#define or_r_           0b10101
++#define o_w_w           0b11010
++
++#define orw_w           0b10010
++#define or_rw           0b10100
++
++#define ucs_memory_bus_store_fence() ucs_loongarch64_dbar(c_w_w) 
++#define ucs_memory_bus_load_fence()  ucs_loongarch64_dbar(cr_r_) 
++
++
++#define ucs_memory_cpu_fence()               ucs_loongarch64_dbar(orwrw)     
++#define ucs_memory_bus_cacheline_wc_flush()  ucs_memory_cpu_fence()
++#define ucs_memory_cpu_store_fence()         ucs_loongarch64_dbar(o_w_w)      
++#define ucs_memory_cpu_load_fence()          ucs_loongarch64_dbar(or_r_)     
++#define ucs_memory_cpu_wc_fence()            ucs_memory_cpu_fence()
++
++static inline double ucs_arch_get_clocks_per_sec()
++{
++    return ucs_arch_generic_get_clocks_per_sec();
++}
++
++static inline ucs_cpu_model_t ucs_arch_get_cpu_model()
++{
++    return UCS_CPU_MODEL_LOONGARCH64;
++}
++
++static inline int ucs_arch_get_cpu_flag()
++{
++    return UCS_CPU_FLAG_UNKNOWN;
++}
++
++static inline void ucs_cpu_init()
++{
++}
++
++ucs_cpu_vendor_t ucs_arch_get_cpu_vendor();
++
++static inline ucs_status_t ucs_arch_get_cache_size(size_t *cache_sizes)
++{
++    return UCS_ERR_UNSUPPORTED;
++}
++
++static inline uint64_t ucs_arch_read_hres_clock()
++{
++    uint64_t cnt_id, time;
++    __asm__ __volatile__ (
++      "rdtime.d %0, %1\n\t"
++      :"=&r"(time), "=&r"(cnt_id)
++    );
++    return time;
++}
++
++#define ucs_arch_wait_mem ucs_arch_generic_wait_mem
++
++#if !HAVE___CLEAR_CACHE
++static inline void ucs_arch_clear_cache(void *start, void *end)
++{
++      usc_memory_cpu_fence();
++}
++#endif
++
++static inline void *ucs_memcpy_relaxed(void *dst, const void *src, size_t len)
++{
++    return memcpy(dst, src, len);
++}
++
++static UCS_F_ALWAYS_INLINE void
++ucs_memcpy_nontemporal(void *dst, const void *src, size_t len)
++{
++    memcpy(dst, src, len);
++}
++
++END_C_DECLS
++
++#endif
++
+--- /dev/null
++++ ucx-1.16.0+ds/src/ucs/arch/loongarch64/global_opts.c
+@@ -0,0 +1,25 @@
++/**
++* Copyright (C) Tactical Computing Labs, LLC. 2022. ALL RIGHTS RESERVED.
++*
++* See file LICENSE for terms.
++*/
++
++#if defined(__loongarch64)
++
++#ifdef HAVE_CONFIG_H
++#  include "config.h"
++#endif
++
++#include <ucs/arch/global_opts.h>
++#include <ucs/config/parser.h>
++
++ucs_config_field_t ucs_arch_global_opts_table[] = {
++    {NULL}
++};
++
++void ucs_arch_print_memcpy_limits(ucs_arch_global_opts_t *config)
++{
++}
++
++#endif
++
+--- /dev/null
++++ ucx-1.16.0+ds/src/ucs/arch/loongarch64/global_opts.h
+@@ -0,0 +1,26 @@
++/**
++* Copyright (C) Tactical Computing Labs, LLC. 2022. ALL RIGHTS RESERVED.
++*
++* See file LICENSE for terms.
++*/
++
++#ifndef UCS_LOONGARCH64_GLOBAL_OPTS_H_
++#define UCS_LOONGARCH64_GLOBAL_OPTS_H_
++
++#include <stddef.h>
++
++#include <ucs/sys/compiler_def.h>
++
++BEGIN_C_DECLS
++
++#define UCS_ARCH_GLOBAL_OPTS_INITALIZER {}
++
++/* built-in memcpy config */
++typedef struct ucs_arch_global_opts {
++    char dummy;
++} ucs_arch_global_opts_t;
++
++END_C_DECLS
++
++#endif
++
+--- ucx-1.16.0+ds.orig/src/ucs/configure.m4
++++ ucx-1.16.0+ds/src/ucs/configure.m4
+@@ -239,7 +239,7 @@ AC_ARG_WITH([cache-line-size],
+         [AS_HELP_STRING([--with-cache-line-size=SIZE],
+             [Build UCX with cache line size defined by user. This parameter
+              overwrites default cache line sizes defines in
+-             UCX (x86-64: 64, Power: 128, ARMv8: 64/128, RISCV: 64). The 
supported values are: 64, 128])],
++             UCX (x86-64: 64, Power: 128, ARMv8: 64/128, RISCV: 64, 
LoongArch: 64). The supported values are: 64, 128])],
+         [],
+         [with_cache_line_size=no])
+ 

Reply via email to