Skip to content

Commit 7e91d34

Browse files
Anjian WenRealFYang
authored andcommitted
8365732: RISC-V: implement AES CTR intrinsics
Reviewed-by: fyang, mli
1 parent 15f2538 commit 7e91d34

File tree

2 files changed

+238
-9
lines changed

2 files changed

+238
-9
lines changed

src/hotspot/cpu/riscv/stubGenerator_riscv.cpp

Lines changed: 225 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2493,8 +2493,8 @@ class StubGenerator: public StubCodeGenerator {
24932493
__ vsetivli(x0, 4, Assembler::e32, Assembler::m1);
24942494
__ vle32_v(res, from);
24952495

2496-
__ mv(t2, 52);
2497-
__ blt(keylen, t2, L_aes128);
2496+
__ mv(t2, 52); // key length could be only {11, 13, 15} * 4 = {44, 52, 60}
2497+
__ bltu(keylen, t2, L_aes128);
24982498
__ beq(keylen, t2, L_aes192);
24992499
// Else we fallthrough to the biggest case (256-bit key size)
25002500

@@ -2572,8 +2572,8 @@ class StubGenerator: public StubCodeGenerator {
25722572
__ vsetivli(x0, 4, Assembler::e32, Assembler::m1);
25732573
__ vle32_v(res, from);
25742574

2575-
__ mv(t2, 52);
2576-
__ blt(keylen, t2, L_aes128);
2575+
__ mv(t2, 52); // key length could be only {11, 13, 15} * 4 = {44, 52, 60}
2576+
__ bltu(keylen, t2, L_aes128);
25772577
__ beq(keylen, t2, L_aes192);
25782578
// Else we fallthrough to the biggest case (256-bit key size)
25792579

@@ -2606,6 +2606,223 @@ class StubGenerator: public StubCodeGenerator {
26062606
return start;
26072607
}
26082608

2609+
// Load big-endian 128-bit from memory.
2610+
void be_load_counter_128(Register counter_hi, Register counter_lo, Register counter) {
2611+
__ ld(counter_lo, Address(counter, 8)); // Load 128-bits from counter
2612+
__ ld(counter_hi, Address(counter));
2613+
__ rev8(counter_lo, counter_lo); // Convert big-endian to little-endian
2614+
__ rev8(counter_hi, counter_hi);
2615+
}
2616+
2617+
// Little-endian 128-bit + 64-bit -> 128-bit addition.
2618+
void add_counter_128(Register counter_hi, Register counter_lo) {
2619+
assert_different_registers(counter_hi, counter_lo, t0);
2620+
__ addi(counter_lo, counter_lo, 1);
2621+
__ seqz(t0, counter_lo); // Check for result overflow
2622+
__ add(counter_hi, counter_hi, t0); // Add 1 if overflow otherwise 0
2623+
}
2624+
2625+
// Store big-endian 128-bit to memory.
2626+
void be_store_counter_128(Register counter_hi, Register counter_lo, Register counter) {
2627+
assert_different_registers(counter_hi, counter_lo, t0, t1);
2628+
__ rev8(t0, counter_lo); // Convert little-endian to big-endian
2629+
__ rev8(t1, counter_hi);
2630+
__ sd(t0, Address(counter, 8)); // Store 128-bits to counter
2631+
__ sd(t1, Address(counter));
2632+
}
2633+
2634+
void counterMode_AESCrypt(int round, Register in, Register out, Register key, Register counter,
2635+
Register input_len, Register saved_encrypted_ctr, Register used_ptr) {
2636+
// Algorithm:
2637+
//
2638+
// generate_aes_loadkeys();
2639+
// load_counter_128(counter_hi, counter_lo, counter);
2640+
//
2641+
// L_next:
2642+
// if (used >= BLOCK_SIZE) goto L_main_loop;
2643+
//
2644+
// L_encrypt_next:
2645+
// *out = *in ^ saved_encrypted_ctr[used]);
2646+
// out++; in++; used++; len--;
2647+
// if (len == 0) goto L_exit;
2648+
// goto L_next;
2649+
//
2650+
// L_main_loop:
2651+
// if (len == 0) goto L_exit;
2652+
// saved_encrypted_ctr = generate_aes_encrypt(counter);
2653+
//
2654+
// add_counter_128(counter_hi, counter_lo);
2655+
// be_store_counter_128(counter_hi, counter_lo, counter);
2656+
// used = 0;
2657+
//
2658+
// if(len < BLOCK_SIZE) goto L_encrypt_next;
2659+
//
2660+
// v_in = load_16Byte(in);
2661+
// v_out = load_16Byte(out);
2662+
// v_saved_encrypted_ctr = load_16Byte(saved_encrypted_ctr);
2663+
// v_out = v_in ^ v_saved_encrypted_ctr;
2664+
// out += BLOCK_SIZE;
2665+
// in += BLOCK_SIZE;
2666+
// len -= BLOCK_SIZE;
2667+
// used = BLOCK_SIZE;
2668+
// goto L_main_loop;
2669+
//
2670+
//
2671+
// L_exit:
2672+
// store(used);
2673+
// result = input_len
2674+
// return result;
2675+
2676+
const Register used = x28;
2677+
const Register len = x29;
2678+
const Register counter_hi = x30;
2679+
const Register counter_lo = x31;
2680+
const Register block_size = t2;
2681+
2682+
const unsigned int BLOCK_SIZE = 16;
2683+
2684+
VectorRegister working_vregs[] = {
2685+
v1, v2, v3, v4, v5, v6, v7, v8,
2686+
v9, v10, v11, v12, v13, v14, v15
2687+
};
2688+
2689+
__ vsetivli(x0, 4, Assembler::e32, Assembler::m1);
2690+
2691+
__ lwu(used, Address(used_ptr));
2692+
__ mv(len, input_len);
2693+
__ mv(block_size, BLOCK_SIZE);
2694+
2695+
// load keys to working_vregs according to round
2696+
generate_aes_loadkeys(key, working_vregs, round);
2697+
2698+
// 128-bit big-endian load
2699+
be_load_counter_128(counter_hi, counter_lo, counter);
2700+
2701+
Label L_next, L_encrypt_next, L_main_loop, L_exit;
2702+
// Check the last saved_encrypted_ctr used value, we fall through
2703+
// to L_encrypt_next when the used value lower than block_size
2704+
__ bind(L_next);
2705+
__ bgeu(used, block_size, L_main_loop);
2706+
2707+
// There is still data left fewer than block_size after L_main_loop
2708+
// or last used, we encrypt them one by one.
2709+
__ bind(L_encrypt_next);
2710+
__ add(t0, saved_encrypted_ctr, used);
2711+
__ lbu(t1, Address(t0));
2712+
__ lbu(t0, Address(in));
2713+
__ xorr(t1, t1, t0);
2714+
__ sb(t1, Address(out));
2715+
__ addi(in, in, 1);
2716+
__ addi(out, out, 1);
2717+
__ addi(used, used, 1);
2718+
__ subi(len, len, 1);
2719+
__ beqz(len, L_exit);
2720+
__ j(L_next);
2721+
2722+
// We will calculate the next saved_encrypted_ctr and encrypt the blocks of data
2723+
// one by one until there is less than a full block remaining if len not zero
2724+
__ bind(L_main_loop);
2725+
__ beqz(len, L_exit);
2726+
__ vle32_v(v16, counter);
2727+
2728+
// encrypt counter according to round
2729+
generate_aes_encrypt(v16, working_vregs, round);
2730+
2731+
__ vse32_v(v16, saved_encrypted_ctr);
2732+
2733+
// 128-bit little-endian increment
2734+
add_counter_128(counter_hi, counter_lo);
2735+
// 128-bit big-endian store
2736+
be_store_counter_128(counter_hi, counter_lo, counter);
2737+
2738+
__ mv(used, 0);
2739+
// Check if we have a full block_size
2740+
__ bltu(len, block_size, L_encrypt_next);
2741+
2742+
// We have one full block to encrypt at least
2743+
__ vle32_v(v17, in);
2744+
__ vxor_vv(v16, v16, v17);
2745+
__ vse32_v(v16, out);
2746+
__ add(out, out, block_size);
2747+
__ add(in, in, block_size);
2748+
__ sub(len, len, block_size);
2749+
__ mv(used, block_size);
2750+
__ j(L_main_loop);
2751+
2752+
__ bind(L_exit);
2753+
__ sw(used, Address(used_ptr));
2754+
__ mv(x10, input_len);
2755+
__ leave();
2756+
__ ret();
2757+
};
2758+
2759+
// CTR AES crypt.
2760+
// Arguments:
2761+
//
2762+
// Inputs:
2763+
// c_rarg0 - source byte array address
2764+
// c_rarg1 - destination byte array address
2765+
// c_rarg2 - K (key) in little endian int array
2766+
// c_rarg3 - counter vector byte array address
2767+
// c_rarg4 - input length
2768+
// c_rarg5 - saved encryptedCounter start
2769+
// c_rarg6 - saved used length
2770+
//
2771+
// Output:
2772+
// x10 - input length
2773+
//
2774+
address generate_counterMode_AESCrypt() {
2775+
assert(UseZvkn, "need AES instructions (Zvkned extension) support");
2776+
assert(UseAESCTRIntrinsics, "need AES instructions (Zvkned extension) support");
2777+
assert(UseZbb, "need basic bit manipulation (Zbb extension) support");
2778+
2779+
__ align(CodeEntryAlignment);
2780+
StubId stub_id = StubId::stubgen_counterMode_AESCrypt_id;
2781+
StubCodeMark mark(this, stub_id);
2782+
2783+
const Register in = c_rarg0;
2784+
const Register out = c_rarg1;
2785+
const Register key = c_rarg2;
2786+
const Register counter = c_rarg3;
2787+
const Register input_len = c_rarg4;
2788+
const Register saved_encrypted_ctr = c_rarg5;
2789+
const Register used_len_ptr = c_rarg6;
2790+
2791+
const Register keylen = c_rarg7; // temporary register
2792+
2793+
const address start = __ pc();
2794+
__ enter();
2795+
2796+
Label L_exit;
2797+
__ beqz(input_len, L_exit);
2798+
2799+
Label L_aes128, L_aes192;
2800+
// Compute #rounds for AES based on the length of the key array
2801+
__ lwu(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
2802+
__ mv(t0, 52); // key length could be only {11, 13, 15} * 4 = {44, 52, 60}
2803+
__ bltu(keylen, t0, L_aes128);
2804+
__ beq(keylen, t0, L_aes192);
2805+
// Else we fallthrough to the biggest case (256-bit key size)
2806+
2807+
// Note: the following function performs crypt with key += 15*16
2808+
counterMode_AESCrypt(15, in, out, key, counter, input_len, saved_encrypted_ctr, used_len_ptr);
2809+
2810+
// Note: the following function performs crypt with key += 13*16
2811+
__ bind(L_aes192);
2812+
counterMode_AESCrypt(13, in, out, key, counter, input_len, saved_encrypted_ctr, used_len_ptr);
2813+
2814+
// Note: the following function performs crypt with key += 11*16
2815+
__ bind(L_aes128);
2816+
counterMode_AESCrypt(11, in, out, key, counter, input_len, saved_encrypted_ctr, used_len_ptr);
2817+
2818+
__ bind(L_exit);
2819+
__ mv(x10, input_len);
2820+
__ leave();
2821+
__ ret();
2822+
2823+
return start;
2824+
}
2825+
26092826
// code for comparing 8 characters of strings with Latin1 and Utf16 encoding
26102827
void compare_string_8_x_LU(Register tmpL, Register tmpU,
26112828
Register strL, Register strU, Label& DIFF) {
@@ -6826,6 +7043,10 @@ static const int64_t right_3_bits = right_n_bits(3);
68267043
StubRoutines::_aescrypt_decryptBlock = generate_aescrypt_decryptBlock();
68277044
}
68287045

7046+
if (UseAESCTRIntrinsics) {
7047+
StubRoutines::_counterMode_AESCrypt = generate_counterMode_AESCrypt();
7048+
}
7049+
68297050
if (UsePoly1305Intrinsics) {
68307051
StubRoutines::_poly1305_processBlocks = generate_poly1305_processBlocks();
68317052
}

src/hotspot/cpu/riscv/vm_version_riscv.cpp

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -434,6 +434,15 @@ void VM_Version::c2_initialize() {
434434
warning("UseAESIntrinsics enabled, but UseAES not, enabling");
435435
UseAES = true;
436436
}
437+
438+
if (FLAG_IS_DEFAULT(UseAESCTRIntrinsics) && UseZbb) {
439+
FLAG_SET_DEFAULT(UseAESCTRIntrinsics, true);
440+
}
441+
442+
if (UseAESCTRIntrinsics && !UseZbb) {
443+
warning("Cannot enable UseAESCTRIntrinsics on cpu without UseZbb support.");
444+
FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
445+
}
437446
} else {
438447
if (UseAES) {
439448
warning("AES instructions are not available on this CPU");
@@ -443,11 +452,10 @@ void VM_Version::c2_initialize() {
443452
warning("AES intrinsics are not available on this CPU");
444453
FLAG_SET_DEFAULT(UseAESIntrinsics, false);
445454
}
446-
}
447-
448-
if (UseAESCTRIntrinsics) {
449-
warning("AES/CTR intrinsics are not available on this CPU");
450-
FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
455+
if (UseAESCTRIntrinsics) {
456+
warning("Cannot enable UseAESCTRIntrinsics on cpu without UseZvkn support.");
457+
FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
458+
}
451459
}
452460
}
453461

0 commit comments

Comments
 (0)