Skip to content

Commit 83cd75a

Browse files
committed
snappy: optimize UnalignedCopy64 and IncrementalCopy for RISC-V
Use RISC-V inline assembly (ld/sd) for 8-byte copy operations instead of generic macro-based implementation. Changes: - UnalignedCopy64: direct ld/sd pair for 8-byte copy - IncrementalCopy: 8-byte bulk copies when source/dest don't overlap Performance improvement (direct function benchmark): - Decompress compressible-256K: 728 MB/s -> 2205 MB/s (+203%) - Decompress zeros-256K: 543 MB/s -> 1462 MB/s (+169%) Tests: brpc_snappy_compress_unittest passed (7/7) Signed-off-by: Felix-Gong <gongxiaofei24@iscas.ac.cn>
1 parent eb31fa5 commit 83cd75a

2 files changed

Lines changed: 35 additions & 0 deletions

File tree

src/butil/third_party/snappy/snappy-stubs-internal.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -164,6 +164,16 @@ inline void UNALIGNED_STORE64(void *p, uint64_t v) {
164164
// This can be more efficient than UNALIGNED_LOAD64 + UNALIGNED_STORE64
165165
// on some platforms, in particular ARM.
166166
inline void UnalignedCopy64(const void *src, void *dst) {
167+
#if defined(__riscv) && __riscv_xlen == 64
168+
// RISC-V optimized: single ld/sd pair for 8-byte copy
169+
uint64_t tmp;
170+
__asm__ volatile(
171+
"ld %0, %1\n\t"
172+
"sd %0, %2\n\t"
173+
: "=&r"(tmp)
174+
: "m"(*(const uint64_t*)src), "m"(*(uint64_t*)dst)
175+
: "memory");
176+
#else
167177
if (sizeof(void *) == 8) {
168178
UNALIGNED_STORE64(dst, UNALIGNED_LOAD64(src));
169179
} else {
@@ -173,6 +183,7 @@ inline void UnalignedCopy64(const void *src, void *dst) {
173183
UNALIGNED_STORE32(dst_char, UNALIGNED_LOAD32(src_char));
174184
UNALIGNED_STORE32(dst_char + 4, UNALIGNED_LOAD32(src_char + 4));
175185
}
186+
#endif
176187
}
177188

178189
// Convert to little-endian storage, opposite of network format.

src/butil/third_party/snappy/snappy.cc

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,9 +97,33 @@ static const uint32_t kMaximumTagLength = 5; // COPY_4_BYTE_OFFSET plus the act
9797
// or memmove().
9898
static inline void IncrementalCopy(const char* src, char* op, ssize_t len) {
9999
assert(len > 0);
100+
#if defined(__riscv) && __riscv_xlen == 64
101+
// RISC-V optimized: use 8-byte copies when possible
102+
if (len >= 8 && (op - src >= 8 || src - op >= 8)) {
103+
// Non-overlapping or safe overlap: copy 8 bytes at a time
104+
do {
105+
uint64_t tmp;
106+
__asm__ volatile(
107+
"ld %0, %1\n\t"
108+
"sd %0, %2\n\t"
109+
: "=&r"(tmp)
110+
: "m"(*(const uint64_t*)src), "m"(*(uint64_t*)op)
111+
: "memory");
112+
src += 8;
113+
op += 8;
114+
len -= 8;
115+
} while (len >= 8);
116+
}
117+
// Copy remaining bytes
118+
while (len > 0) {
119+
*op++ = *src++;
120+
--len;
121+
}
122+
#else
100123
do {
101124
*op++ = *src++;
102125
} while (--len > 0);
126+
#endif
103127
}
104128

105129
// Equivalent to IncrementalCopy except that it can write up to ten extra

0 commit comments

Comments
 (0)