-
Notifications
You must be signed in to change notification settings - Fork 153
Description
gcc version 10.2.1 20201007 releases/gcc-10.2.0-350-g136256c32d (Clear Linux OS for Intel Architecture)
./FastMemcpy
benchmark(size=32 bytes, times=16777216):
result(dst aligned, src aligned): memcpy_fast=48ms memcpy=35 ms
result(dst aligned, src unalign): memcpy_fast=49ms memcpy=33 ms
result(dst unalign, src aligned): memcpy_fast=49ms memcpy=34 ms
result(dst unalign, src unalign): memcpy_fast=49ms memcpy=34 msbenchmark(size=64 bytes, times=16777216):
result(dst aligned, src aligned): memcpy_fast=54ms memcpy=34 ms
result(dst aligned, src unalign): memcpy_fast=54ms memcpy=34 ms
result(dst unalign, src aligned): memcpy_fast=54ms memcpy=34 ms
result(dst unalign, src unalign): memcpy_fast=54ms memcpy=34 msbenchmark(size=512 bytes, times=8388608):
result(dst aligned, src aligned): memcpy_fast=85ms memcpy=56 ms
result(dst aligned, src unalign): memcpy_fast=91ms memcpy=52 ms
result(dst unalign, src aligned): memcpy_fast=93ms memcpy=56 ms
result(dst unalign, src unalign): memcpy_fast=94ms memcpy=51 msbenchmark(size=1024 bytes, times=4194304):
result(dst aligned, src aligned): memcpy_fast=85ms memcpy=41 ms
result(dst aligned, src unalign): memcpy_fast=91ms memcpy=43 ms
result(dst unalign, src aligned): memcpy_fast=91ms memcpy=44 ms
result(dst unalign, src unalign): memcpy_fast=90ms memcpy=44 msbenchmark(size=4096 bytes, times=524288):
result(dst aligned, src aligned): memcpy_fast=40ms memcpy=20 ms
result(dst aligned, src unalign): memcpy_fast=44ms memcpy=20 ms
result(dst unalign, src aligned): memcpy_fast=44ms memcpy=21 ms
result(dst unalign, src unalign): memcpy_fast=44ms memcpy=20 msbenchmark(size=8192 bytes, times=262144):
result(dst aligned, src aligned): memcpy_fast=40ms memcpy=23 ms
result(dst aligned, src unalign): memcpy_fast=43ms memcpy=23 ms
result(dst unalign, src aligned): memcpy_fast=43ms memcpy=33 ms
result(dst unalign, src unalign): memcpy_fast=43ms memcpy=34 msbenchmark(size=1048576 bytes, times=2048):
result(dst aligned, src aligned): memcpy_fast=54ms memcpy=43 ms
result(dst aligned, src unalign): memcpy_fast=55ms memcpy=44 ms
result(dst unalign, src aligned): memcpy_fast=55ms memcpy=47 ms
result(dst unalign, src unalign): memcpy_fast=55ms memcpy=48 msbenchmark(size=4194304 bytes, times=512):
result(dst aligned, src aligned): memcpy_fast=88ms memcpy=70 ms
result(dst aligned, src unalign): memcpy_fast=88ms memcpy=78 ms
result(dst unalign, src aligned): memcpy_fast=89ms memcpy=74 ms
result(dst unalign, src unalign): memcpy_fast=91ms memcpy=75 msbenchmark(size=8388608 bytes, times=256):
result(dst aligned, src aligned): memcpy_fast=96ms memcpy=90 ms
result(dst aligned, src unalign): memcpy_fast=94ms memcpy=91 ms
result(dst unalign, src aligned): memcpy_fast=95ms memcpy=91 ms
result(dst unalign, src unalign): memcpy_fast=95ms memcpy=92 msbenchmark random access:
memcpy_fast=802ms memcpy=662ms
./FastMemcpy_Avx
benchmark(size=32 bytes, times=16777216):
result(dst aligned, src aligned): memcpy_fast=49ms memcpy=29 ms
result(dst aligned, src unalign): memcpy_fast=49ms memcpy=29 ms
result(dst unalign, src aligned): memcpy_fast=49ms memcpy=30 ms
result(dst unalign, src unalign): memcpy_fast=49ms memcpy=29 msbenchmark(size=64 bytes, times=16777216):
result(dst aligned, src aligned): memcpy_fast=49ms memcpy=29 ms
result(dst aligned, src unalign): memcpy_fast=49ms memcpy=29 ms
result(dst unalign, src aligned): memcpy_fast=49ms memcpy=30 ms
result(dst unalign, src unalign): memcpy_fast=49ms memcpy=29 msbenchmark(size=512 bytes, times=8388608):
result(dst aligned, src aligned): memcpy_fast=64ms memcpy=56 ms
result(dst aligned, src unalign): memcpy_fast=64ms memcpy=51 ms
result(dst unalign, src aligned): memcpy_fast=66ms memcpy=56 ms
result(dst unalign, src unalign): memcpy_fast=66ms memcpy=52 msbenchmark(size=1024 bytes, times=4194304):
result(dst aligned, src aligned): memcpy_fast=43ms memcpy=41 ms
result(dst aligned, src unalign): memcpy_fast=44ms memcpy=43 ms
result(dst unalign, src aligned): memcpy_fast=44ms memcpy=44 ms
result(dst unalign, src unalign): memcpy_fast=44ms memcpy=44 msbenchmark(size=4096 bytes, times=524288):
result(dst aligned, src aligned): memcpy_fast=20ms memcpy=19 ms
result(dst aligned, src unalign): memcpy_fast=22ms memcpy=21 ms
result(dst unalign, src aligned): memcpy_fast=21ms memcpy=21 ms
result(dst unalign, src unalign): memcpy_fast=21ms memcpy=21 msbenchmark(size=8192 bytes, times=262144):
result(dst aligned, src aligned): memcpy_fast=21ms memcpy=23 ms
result(dst aligned, src unalign): memcpy_fast=22ms memcpy=23 ms
result(dst unalign, src aligned): memcpy_fast=22ms memcpy=34 ms
result(dst unalign, src unalign): memcpy_fast=22ms memcpy=33 msbenchmark(size=1048576 bytes, times=2048):
result(dst aligned, src aligned): memcpy_fast=90ms memcpy=45 ms
result(dst aligned, src unalign): memcpy_fast=90ms memcpy=45 ms
result(dst unalign, src aligned): memcpy_fast=89ms memcpy=48 ms
result(dst unalign, src unalign): memcpy_fast=88ms memcpy=48 msbenchmark(size=4194304 bytes, times=512):
result(dst aligned, src aligned): memcpy_fast=88ms memcpy=72 ms
result(dst aligned, src unalign): memcpy_fast=92ms memcpy=79 ms
result(dst unalign, src aligned): memcpy_fast=88ms memcpy=76 ms
result(dst unalign, src unalign): memcpy_fast=87ms memcpy=77 msbenchmark(size=8388608 bytes, times=256):
result(dst aligned, src aligned): memcpy_fast=95ms memcpy=91 ms
result(dst aligned, src unalign): memcpy_fast=98ms memcpy=92 ms
result(dst unalign, src aligned): memcpy_fast=94ms memcpy=91 ms
result(dst unalign, src unalign): memcpy_fast=95ms memcpy=95 msbenchmark random access:
memcpy_fast=796ms memcpy=687ms