diff --git a/CMakeLists.txt b/CMakeLists.txt index caeaac57827adcc56d47262c178a69d5aa4113e6..845903da62e2245deaa343aa93e3f15e963f0e8f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -113,6 +113,14 @@ if (OS_ARCH STREQUAL "aarch64") add_compile_options(-mtune=cortex-a72 -fsigned-char -g -ggdb3 -march=armv8-a+crc -funwind-tables) elseif (OS_ARCH STREQUAL "x86_64") add_compile_options(-msse4.2 ) +elseif (OS_ARCH STREQUAL "riscv64") + option(HAS_RISCV_ZBC_EXTENSION OFF) + message(STATUS "HAS_RISCV_ZBC_EXTENSION = ${HAS_RISCV_ZBC_EXTENSION}") + if (HAS_RISCV_ZBC_EXTENSION) + add_compile_options(-march=rv64gc_zbc) + else () + add_compile_options(-march=rv64gc) + endif (HAS_RISCV_ZBC_EXTENSION) endif () Add_Definitions(-DWSEC_COMPILE_CAC_OPENSSL -DWSEC_AES_GCM_SUPPORT -DWSEC_USE_OPENSSL_110 -DWSEC_COMPILE_SDP) diff --git a/src/cm_concurrency/cm_atomic.h b/src/cm_concurrency/cm_atomic.h index df4dd3c40ca0d5c4955c410a7d1419b51c0bc52f..777485467c2abc92d5154d51ff1e0e6267b32f53 100644 --- a/src/cm_concurrency/cm_atomic.h +++ b/src/cm_concurrency/cm_atomic.h @@ -95,7 +95,7 @@ static inline bool32 cm_atomic32_cas(atomic32_t *val, int32 oldval, int32 newval typedef volatile int32 atomic32_t; typedef volatile int64 atomic_t; -#if defined(__arm__) || defined(__aarch64__) +#if defined(__arm__) || defined(__aarch64__) || defined(__riscv) static inline int64 cm_atomic_get(atomic_t *val) { return __atomic_load_n(val, __ATOMIC_SEQ_CST); diff --git a/src/cm_concurrency/cm_spinlock.h b/src/cm_concurrency/cm_spinlock.h index 720a65db56d76507108d86bd5a0d6918f4958e9b..7a4910f2c7171cd4a5f06fdbe532eb99610c410f 100644 --- a/src/cm_concurrency/cm_spinlock.h +++ b/src/cm_concurrency/cm_spinlock.h @@ -37,7 +37,7 @@ extern "C" { typedef volatile uint32 spinlock_t; typedef volatile uint32 ip_spinlock_t; -#if defined(__arm__) || defined(__aarch64__) +#if defined(__arm__) || defined(__aarch64__) || defined(__riscv) #define GS_INIT_SPIN_LOCK(lock) \ { \ __atomic_store_n(&lock, 0, __ATOMIC_SEQ_CST); \ @@ -77,7 +77,7 @@ typedef struct st_spin_statis_instance { uint64 ss_wait_usecs; } spin_statis_instance_t; -#if defined(__arm__) || defined(__aarch64__) || defined(__loongarch__) +#if defined(__arm__) || defined(__aarch64__) || defined(__loongarch__) || defined(__riscv) #define fas_cpu_pause() \ { \ __asm__ volatile("nop"); \ @@ -113,7 +113,7 @@ static forceinline void cm_spin_sleep_ex(uint32 tick) #else -#if defined(__arm__) || defined(__aarch64__) +#if defined(__arm__) || defined(__aarch64__) || defined(__riscv) static forceinline uint32 cm_spin_set(spinlock_t *ptr, uint32 value) { uint32 oldvalue = 0; @@ -161,7 +161,7 @@ static forceinline void cm_spin_lock_with_stat(spinlock_t *lock, spin_statis_t * } for (;;) { -#if defined(__arm__) || defined(__aarch64__) +#if defined(__arm__) || defined(__aarch64__) || defined(__riscv) while (__atomic_load_n(lock, __ATOMIC_SEQ_CST) != 0) { #else while (*lock != 0) { @@ -200,7 +200,7 @@ static forceinline void cm_spin_lock_ex(spinlock_t *lock, spin_statis_t *stat, u } for (;;) { -#if defined(__arm__) || defined(__aarch64__) +#if defined(__arm__) || defined(__aarch64__) || defined(__riscv) while (__atomic_load_n(lock, __ATOMIC_SEQ_CST) != 0) { #else while (*lock != 0) { @@ -231,7 +231,7 @@ static forceinline void cm_spin_lock_ex(spinlock_t *lock, spin_statis_t *stat, u } } -#if !defined(__arm__) && !defined(__aarch64__) +#if !defined(__arm__) && !defined(__aarch64__) && !defined(__riscv) static forceinline void cm_spin_unlock(spinlock_t *lock) { if (SECUREC_UNLIKELY(lock == NULL)) { @@ -244,7 +244,7 @@ static forceinline void cm_spin_unlock(spinlock_t *lock) static forceinline bool32 cm_spin_try_lock(spinlock_t *lock) { -#if defined(__arm__) || defined(__aarch64__) +#if defined(__arm__) || defined(__aarch64__) || defined(__riscv) if (__atomic_load_n(lock, __ATOMIC_SEQ_CST) != 0) { #else if (*lock != 0) { @@ -261,7 +261,7 @@ static forceinline bool32 cm_spin_timed_lock(spinlock_t *lock, uint32 timeout_ti uint32 sleep_times = 0; for (;;) { -#if defined(__arm__) || defined(__aarch64__) +#if defined(__arm__) || defined(__aarch64__) || defined(__riscv) while (__atomic_load_n(lock, __ATOMIC_SEQ_CST) != 0) { #else while (*lock != 0) { @@ -307,7 +307,7 @@ static forceinline void cm_spin_lock_by_sid(uint32 sid, spinlock_t *lock, spin_s } for (;;) { -#if defined(__arm__) || defined(__aarch64__) +#if defined(__arm__) || defined(__aarch64__) || defined(__riscv) while (__atomic_load_n(lock, __ATOMIC_SEQ_CST) != 0) { #else while (*lock != 0) { diff --git a/src/cm_concurrency/cm_thread.c b/src/cm_concurrency/cm_thread.c index 108e105febdb7da186be34e4d28ef662dcc37581..52a647f345f30cf020c4aebb64e1af40480e55e0 100644 --- a/src/cm_concurrency/cm_thread.c +++ b/src/cm_concurrency/cm_thread.c @@ -314,7 +314,7 @@ uint32 cm_get_current_thread_id(void) #define __SYS_GET_SPID 186 #elif (defined __aarch64__) #define __SYS_GET_SPID 178 -#elif (defined __loongarch__) +#elif (defined __loongarch__) || (defined __riscv) #include #define __SYS_GET_SPID SYS_gettid #endif diff --git a/src/cm_utils/cm_blackbox.c b/src/cm_utils/cm_blackbox.c index 63d4022c071e79dad07c5a521ca2e16884c99d8b..e63e3442f26ae0fcd0a20026a7b95aaf95df1faf 100644 --- a/src/cm_utils/cm_blackbox.c +++ b/src/cm_utils/cm_blackbox.c @@ -56,7 +56,7 @@ using __cxxabiv1::__cxa_demangle; #if (defined __x86_64__) #define REGFORMAT "%s0x%016llx\n" -#elif (defined __aarch64__) +#elif (defined __aarch64__) || (defined __riscv) #define REGFORMAT "x[%02d] 0x%016llx\n" #endif @@ -131,6 +131,10 @@ void cm_print_reg(box_reg_info_t *reg_info) } LOG_BLACKBOX_INF("sp 0x%016llx\n", reg_info->sp); LOG_BLACKBOX_INF("pc 0x%016llx\n", reg_info->pc); +#elif (defined __riscv) + for (uint32 i = 0; i < 32; i++) { + LOG_BLACKBOX_INF(REGFORMAT, i, reg_info->reg[i]); + } #endif } @@ -142,6 +146,8 @@ void cm_print_assembly(box_reg_info_t *reg_info) pc = (unsigned char*)reg_info->rip; #elif (defined __aarch64__) pc = (unsigned char*)reg_info->sp; +#elif (defined __riscv) + pc = (unsigned char*)reg_info->reg[REG_PC]; #endif for (int32 i = -8; i < 16; i++) { if (i % 8 == 0) { @@ -201,6 +207,10 @@ for (uint32 i = 0; i < 31; i++) { } cpu_info->sp = uc->uc_mcontext.sp; cpu_info->pc = uc->uc_mcontext.pc; +#elif (defined __riscv) +for (uint32 i = 0; i < 32; i++) { + cpu_info->reg[i] = uc->uc_mcontext.__gregs[i]; + } #endif } @@ -370,6 +380,8 @@ void cm_print_call_link(box_reg_info_t *reg_info) pc = (i == start_size) ? (void *)reg_info->rsp : cfa_addr[i]; #elif (defined __aarch64__) pc = (i == start_size) ? (void *)reg_info->reg[29] : cfa_addr[i]; +#elif (defined __riscv) + pc = (i == start_size) ? (void *)reg_info->reg[REG_SP] : cfa_addr[i]; #endif LOG_BLACKBOX_INF("Stack area: %p - %p", i < size - 1 ? cfa_addr[i + 1] : 0x0, pc); cm_dump_mem_in_blackbox(pc, (uint32)(cfa_addr[i + 1] - pc)); diff --git a/src/cm_utils/cm_blackbox.h b/src/cm_utils/cm_blackbox.h index 7b2db629c87d2ff5914a067b0f091a6062764d6f..9ce562b7a42349474f798e0d85734b8612178f98 100644 --- a/src/cm_utils/cm_blackbox.h +++ b/src/cm_utils/cm_blackbox.h @@ -88,6 +88,8 @@ typedef struct st_box_reg_info { uint64 reg[31]; /* arm register */ uint64 sp; uint64 pc; +#elif (defined __riscv) + uint64 reg[32]; #endif } box_reg_info_t; diff --git a/src/cm_utils/cm_checksum.c b/src/cm_utils/cm_checksum.c index 45bbcdef9502f2cca9944d7bd875ecd3e3d145de..888d55c81a7a7095a41f43e8257c46d642805f6b 100644 --- a/src/cm_utils/cm_checksum.c +++ b/src/cm_utils/cm_checksum.c @@ -49,6 +49,7 @@ static inline uint32 cm_crc32c_u8_dendian(uint32 crc, uint8 val) return g_crc32c_table_bigendian[0][((crc >> 24) ^ val) & 0xFF] ^ (crc << 8); } + #if defined(HAVE_ARM_ACLE) bool32 cm_crc32c_aarch_available(void) { @@ -99,6 +100,14 @@ uint32 cm_crc32c_aarch(const void *data, uint32 len, uint32 crc) } #endif +#if defined(__riscv_zbc) +uint32 cm_crc32c_riscv(const void *data, uint32 len, uint32 crc) +{ + return crc32_le_generic(crc, data, len, CRC32C_POLY_LE, + CRC32C_POLY_QT_LE); +} +#endif + uint32 cm_crc32c_sse42(const void *data, uint32 len, uint32 crc) { #if defined(CM_HAVE_SSE4_2) diff --git a/src/cm_utils/cm_checksum.h b/src/cm_utils/cm_checksum.h index c3130a57a576fa9fb3a403561da4da32a651d237..6a2a8def191bc8a920bdb61f73a2ebacac9037c3 100644 --- a/src/cm_utils/cm_checksum.h +++ b/src/cm_utils/cm_checksum.h @@ -43,6 +43,133 @@ #define CM_HAVE_SSE4_2 #include #define CM_HAVE__GET_CPUID +#elif defined(__riscv_zbc) +#if __riscv_xlen == 64 +/* Slide by XLEN bits per iteration */ +# define STEP_ORDER 3 + +/* Each below polynomial quotient has an implicit bit for 2^XLEN */ + +/* Polynomial quotient of (2^(XLEN+32))/CRC32C_POLY, in LE format */ +# define CRC32C_POLY_QT_LE 0xa434f61c6f5389f8 + +static inline uint64 crc32_le_prep(uint32 crc, unsigned long const *ptr) +{ + return (uint64)crc ^ (uint64)(*ptr); +} + +static inline uint32 crc32_le_zbc(unsigned long s, uint32 poly, unsigned long poly_qt) +{ + uint32 crc; + + /* We don't have a "clmulrh" insn, so use clmul + slli instead. */ + __asm__ volatile (".option push\n" + ".option arch,+zbc\n" + "clmul %0, %1, %2\n" + "slli %0, %0, 1\n" + "xor %0, %0, %1\n" + "clmulr %0, %0, %3\n" + "srli %0, %0, 32\n" + ".option pop\n" + : "=&r" (crc) + : "r" (s), + "r" (poly_qt), + "r" ((uint64)poly << 32) + :); + return crc; +} + +#elif __riscv_xlen == 32 +# define STEP_ORDER 2 +/* Each quotient should match the upper half of its analog in RV64 */ +# define CRC32C_POLY_QT_LE 0x6f5389f8 + +static inline uint32 crc32_le_prep(uint32 crc, unsigned long const *ptr) +{ + return crc ^ (uint32)(*ptr); +} + +static inline uint32 crc32_le_zbc(unsigned long s, uint32 poly, unsigned long poly_qt) +{ + uint32 crc; + + /* We don't have a "clmulrh" insn, so use clmul + slli instead. */ + __asm__ volatile (".option push\n" + ".option arch,+zbc\n" + "clmul %0, %1, %2\n" + "slli %0, %0, 1\n" + "xor %0, %0, %1\n" + "clmulr %0, %0, %3\n" + ".option pop\n" + : "=&r" (crc) + : "r" (s), + "r" (poly_qt), + "r" (poly) + :); + return crc; +} +#endif + +#define STEP (1 << STEP_ORDER) +#define OFFSET_MASK (STEP - 1) +#define CRC32C_POLY_LE 0x82F63B78 +#define min(X,Y) ((X) < (Y) ? (X) : (Y)) + +static inline uint32 crc32_le_unaligned(uint32 crc, unsigned char const *p, + size_t len, uint32 poly, + unsigned long poly_qt) +{ + size_t bits = len * 8; + unsigned long s = 0; + uint32 crc_low = 0; + + for (int i = 0; i < len; i++) + s = ((unsigned long)*p++ << (__riscv_xlen - 8)) | (s >> 8); + + s ^= (unsigned long)crc << (__riscv_xlen - bits); + if (__riscv_xlen == 32 || len < sizeof(uint32)) + crc_low = crc >> bits; + + crc = crc32_le_zbc(s, poly, poly_qt); + crc ^= crc_low; + + return crc; +} + +static inline uint32 crc32_le_generic(uint32 crc, unsigned char const *p, + size_t len, uint32 poly, + unsigned long poly_qt) +{ + size_t offset, head_len, tail_len; + unsigned long const *p_ul; + unsigned long s; + + /* Handle the unaligned head. */ + offset = (unsigned long)p & OFFSET_MASK; + if (offset && len) { + head_len = min(STEP - offset, len); + crc = crc32_le_unaligned(crc, p, head_len, poly, poly_qt); + p += head_len; + len -= head_len; + } + + tail_len = len & OFFSET_MASK; + len = len >> STEP_ORDER; + p_ul = (unsigned long const *)p; + + for (int i = 0; i < len; i++) { + s = crc32_le_prep(crc, p_ul); + crc = crc32_le_zbc(s, poly, poly_qt); + p_ul++; + } + + /* Handle the tail bytes. */ + p = (unsigned char const *)p_ul; + if (tail_len) + crc = crc32_le_unaligned(crc, p, tail_len, poly, poly_qt); + + return crc; +} #endif #ifdef __cplusplus @@ -107,6 +234,20 @@ static inline uint32 cm_get_crc32c_aarch(const void *data, uint32 len) } #endif +#if defined(__riscv_zbc) +uint32 cm_crc32c_riscv(const void *data, uint32 len, uint32 crc); + +static inline uint32 cm_get_crc32c_riscv(const void *data, uint32 len) +{ + uint32 crc; + + cm_init_crc32c(&crc); + crc = cm_crc32c_riscv(data, len, crc); + cm_final_crc32c(&crc); + return crc; +} +#endif + static inline uint32 cm_get_crc32_sse42(const void *data, uint32 len) { uint32 crc; @@ -138,6 +279,8 @@ static inline uint32 cm_get_checksum(const void *data, uint32 len) if (cm_crc32c_aarch_available()) { return cm_get_crc32c_aarch(data, len); } +#elif defined(__riscv_zbc) + return cm_get_crc32c_riscv(data, len); #else if (cm_crc32c_sse42_available()) { return cm_get_crc32_sse42(data, len); diff --git a/src/cm_utils/cm_memory.h b/src/cm_utils/cm_memory.h index eae36145a6291c4dd99ee82873c2b431fa2891ab..ed8c68d5a7751d32194a8ecae3b867f726409d2a 100644 --- a/src/cm_utils/cm_memory.h +++ b/src/cm_utils/cm_memory.h @@ -69,6 +69,12 @@ extern "C" { __asm__ volatile("" :: \ : "memory"); \ } +#elif defined(__riscv) +#define CM_MFENCE \ + { \ + __asm__ volatile("fence rw,rw" :: \ + : "memory"); \ + } #endif