744 lines
18 KiB
ArmAsm
744 lines
18 KiB
ArmAsm
/* blowfish-arm.S - ARM assembly implementation of Blowfish cipher
|
|
*
|
|
* Copyright (C) 2013 Jussi Kivilinna <jussi.kivilinna@iki.fi>
|
|
*
|
|
* This file is part of Libgcrypt.
|
|
*
|
|
* Libgcrypt is free software; you can redistribute it and/or modify
|
|
* it under the terms of the GNU Lesser General Public License as
|
|
* published by the Free Software Foundation; either version 2.1 of
|
|
* the License, or (at your option) any later version.
|
|
*
|
|
* Libgcrypt is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU Lesser General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU Lesser General Public
|
|
* License along with this program; if not, see <http://www.gnu.org/licenses/>.
|
|
*/
|
|
|
|
#include <config.h>
|
|
|
|
#if defined(__ARMEL__)
|
|
#ifdef HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS
|
|
|
|
.text
|
|
|
|
.syntax unified
|
|
.arm
|
|
|
|
/* structure of crypto context */
|
|
#define s0 0
|
|
#define s1 (s0 + (1 * 256) * 4)
|
|
#define s2 (s0 + (2 * 256) * 4)
|
|
#define s3 (s0 + (3 * 256) * 4)
|
|
#define p (s3 + (1 * 256) * 4)
|
|
|
|
/* register macros */
|
|
#define CTXs0 r0
|
|
#define CTXs1 r9
|
|
#define CTXs2 r8
|
|
#define CTXs3 r10
|
|
#define RMASK lr
|
|
#define RKEYL r2
|
|
#define RKEYR ip
|
|
|
|
#define RL0 r3
|
|
#define RR0 r4
|
|
|
|
#define RL1 r9
|
|
#define RR1 r10
|
|
|
|
#define RT0 r11
|
|
#define RT1 r7
|
|
#define RT2 r5
|
|
#define RT3 r6
|
|
|
|
/* helper macros */
|
|
#define ldr_unaligned_le(rout, rsrc, offs, rtmp) \
|
|
ldrb rout, [rsrc, #((offs) + 0)]; \
|
|
ldrb rtmp, [rsrc, #((offs) + 1)]; \
|
|
orr rout, rout, rtmp, lsl #8; \
|
|
ldrb rtmp, [rsrc, #((offs) + 2)]; \
|
|
orr rout, rout, rtmp, lsl #16; \
|
|
ldrb rtmp, [rsrc, #((offs) + 3)]; \
|
|
orr rout, rout, rtmp, lsl #24;
|
|
|
|
#define str_unaligned_le(rin, rdst, offs, rtmp0, rtmp1) \
|
|
mov rtmp0, rin, lsr #8; \
|
|
strb rin, [rdst, #((offs) + 0)]; \
|
|
mov rtmp1, rin, lsr #16; \
|
|
strb rtmp0, [rdst, #((offs) + 1)]; \
|
|
mov rtmp0, rin, lsr #24; \
|
|
strb rtmp1, [rdst, #((offs) + 2)]; \
|
|
strb rtmp0, [rdst, #((offs) + 3)];
|
|
|
|
#define ldr_unaligned_be(rout, rsrc, offs, rtmp) \
|
|
ldrb rout, [rsrc, #((offs) + 3)]; \
|
|
ldrb rtmp, [rsrc, #((offs) + 2)]; \
|
|
orr rout, rout, rtmp, lsl #8; \
|
|
ldrb rtmp, [rsrc, #((offs) + 1)]; \
|
|
orr rout, rout, rtmp, lsl #16; \
|
|
ldrb rtmp, [rsrc, #((offs) + 0)]; \
|
|
orr rout, rout, rtmp, lsl #24;
|
|
|
|
#define str_unaligned_be(rin, rdst, offs, rtmp0, rtmp1) \
|
|
mov rtmp0, rin, lsr #8; \
|
|
strb rin, [rdst, #((offs) + 3)]; \
|
|
mov rtmp1, rin, lsr #16; \
|
|
strb rtmp0, [rdst, #((offs) + 2)]; \
|
|
mov rtmp0, rin, lsr #24; \
|
|
strb rtmp1, [rdst, #((offs) + 1)]; \
|
|
strb rtmp0, [rdst, #((offs) + 0)];
|
|
|
|
#ifdef __ARMEL__
|
|
#define ldr_unaligned_host ldr_unaligned_le
|
|
#define str_unaligned_host str_unaligned_le
|
|
|
|
/* bswap on little-endian */
|
|
#ifdef HAVE_ARM_ARCH_V6
|
|
#define host_to_be(reg, rtmp) \
|
|
rev reg, reg;
|
|
#define be_to_host(reg, rtmp) \
|
|
rev reg, reg;
|
|
#else
|
|
#define host_to_be(reg, rtmp) \
|
|
eor rtmp, reg, reg, ror #16; \
|
|
mov rtmp, rtmp, lsr #8; \
|
|
bic rtmp, rtmp, #65280; \
|
|
eor reg, rtmp, reg, ror #8;
|
|
#define be_to_host(reg, rtmp) \
|
|
eor rtmp, reg, reg, ror #16; \
|
|
mov rtmp, rtmp, lsr #8; \
|
|
bic rtmp, rtmp, #65280; \
|
|
eor reg, rtmp, reg, ror #8;
|
|
#endif
|
|
#else
|
|
#define ldr_unaligned_host ldr_unaligned_be
|
|
#define str_unaligned_host str_unaligned_be
|
|
|
|
/* nop on big-endian */
|
|
#define host_to_be(reg, rtmp) /*_*/
|
|
#define be_to_host(reg, rtmp) /*_*/
|
|
#endif
|
|
|
|
#define host_to_host(x, y) /*_*/
|
|
|
|
/***********************************************************************
|
|
* 1-way blowfish
|
|
***********************************************************************/
|
|
#define F(l, r) \
|
|
and RT0, RMASK, l, lsr#(24 - 2); \
|
|
and RT1, RMASK, l, lsr#(16 - 2); \
|
|
ldr RT0, [CTXs0, RT0]; \
|
|
and RT2, RMASK, l, lsr#(8 - 2); \
|
|
ldr RT1, [CTXs1, RT1]; \
|
|
and RT3, RMASK, l, lsl#2; \
|
|
ldr RT2, [CTXs2, RT2]; \
|
|
add RT0, RT1; \
|
|
ldr RT3, [CTXs3, RT3]; \
|
|
eor RT0, RT2; \
|
|
add RT0, RT3; \
|
|
eor r, RT0;
|
|
|
|
#define load_roundkey_enc(n) \
|
|
ldr RKEYL, [CTXs2, #((p - s2) + (4 * (n) + 0))]; \
|
|
ldr RKEYR, [CTXs2, #((p - s2) + (4 * (n) + 4))];
|
|
|
|
#define add_roundkey_enc() \
|
|
eor RL0, RKEYL; \
|
|
eor RR0, RKEYR;
|
|
|
|
#define round_enc(n) \
|
|
add_roundkey_enc(); \
|
|
load_roundkey_enc(n); \
|
|
\
|
|
F(RL0, RR0); \
|
|
F(RR0, RL0);
|
|
|
|
#define load_roundkey_dec(n) \
|
|
ldr RKEYL, [CTXs2, #((p - s2) + (4 * ((n) - 1) + 4))]; \
|
|
ldr RKEYR, [CTXs2, #((p - s2) + (4 * ((n) - 1) + 0))];
|
|
|
|
#define add_roundkey_dec() \
|
|
eor RL0, RKEYL; \
|
|
eor RR0, RKEYR;
|
|
|
|
#define round_dec(n) \
|
|
add_roundkey_dec(); \
|
|
load_roundkey_dec(n); \
|
|
\
|
|
F(RL0, RR0); \
|
|
F(RR0, RL0);
|
|
|
|
#define read_block_aligned(rin, offs, l0, r0, convert, rtmp) \
|
|
ldr l0, [rin, #((offs) + 0)]; \
|
|
ldr r0, [rin, #((offs) + 4)]; \
|
|
convert(l0, rtmp); \
|
|
convert(r0, rtmp);
|
|
|
|
#define write_block_aligned(rout, offs, l0, r0, convert, rtmp) \
|
|
convert(l0, rtmp); \
|
|
convert(r0, rtmp); \
|
|
str l0, [rout, #((offs) + 0)]; \
|
|
str r0, [rout, #((offs) + 4)];
|
|
|
|
#ifdef __ARM_FEATURE_UNALIGNED
|
|
/* unaligned word reads allowed */
|
|
#define read_block(rin, offs, l0, r0, rtmp0) \
|
|
read_block_aligned(rin, offs, l0, r0, host_to_be, rtmp0)
|
|
|
|
#define write_block(rout, offs, r0, l0, rtmp0, rtmp1) \
|
|
write_block_aligned(rout, offs, r0, l0, be_to_host, rtmp0)
|
|
|
|
#define read_block_host(rin, offs, l0, r0, rtmp0) \
|
|
read_block_aligned(rin, offs, l0, r0, host_to_host, rtmp0)
|
|
|
|
#define write_block_host(rout, offs, r0, l0, rtmp0, rtmp1) \
|
|
write_block_aligned(rout, offs, r0, l0, host_to_host, rtmp0)
|
|
#else
|
|
/* need to handle unaligned reads by byte reads */
|
|
#define read_block(rin, offs, l0, r0, rtmp0) \
|
|
tst rin, #3; \
|
|
beq 1f; \
|
|
ldr_unaligned_be(l0, rin, (offs) + 0, rtmp0); \
|
|
ldr_unaligned_be(r0, rin, (offs) + 4, rtmp0); \
|
|
b 2f; \
|
|
1:;\
|
|
read_block_aligned(rin, offs, l0, r0, host_to_be, rtmp0); \
|
|
2:;
|
|
|
|
#define write_block(rout, offs, l0, r0, rtmp0, rtmp1) \
|
|
tst rout, #3; \
|
|
beq 1f; \
|
|
str_unaligned_be(l0, rout, (offs) + 0, rtmp0, rtmp1); \
|
|
str_unaligned_be(r0, rout, (offs) + 4, rtmp0, rtmp1); \
|
|
b 2f; \
|
|
1:;\
|
|
write_block_aligned(rout, offs, l0, r0, be_to_host, rtmp0); \
|
|
2:;
|
|
|
|
#define read_block_host(rin, offs, l0, r0, rtmp0) \
|
|
tst rin, #3; \
|
|
beq 1f; \
|
|
ldr_unaligned_host(l0, rin, (offs) + 0, rtmp0); \
|
|
ldr_unaligned_host(r0, rin, (offs) + 4, rtmp0); \
|
|
b 2f; \
|
|
1:;\
|
|
read_block_aligned(rin, offs, l0, r0, host_to_host, rtmp0); \
|
|
2:;
|
|
|
|
#define write_block_host(rout, offs, l0, r0, rtmp0, rtmp1) \
|
|
tst rout, #3; \
|
|
beq 1f; \
|
|
str_unaligned_host(l0, rout, (offs) + 0, rtmp0, rtmp1); \
|
|
str_unaligned_host(r0, rout, (offs) + 4, rtmp0, rtmp1); \
|
|
b 2f; \
|
|
1:;\
|
|
write_block_aligned(rout, offs, l0, r0, host_to_host); \
|
|
2:;
|
|
#endif
|
|
|
|
.align 3
|
|
.type __blowfish_enc_blk1,%function;
|
|
|
|
__blowfish_enc_blk1:
|
|
/* input:
|
|
* preloaded: CTX
|
|
* [RL0, RR0]: src
|
|
* output:
|
|
* [RR0, RL0]: dst
|
|
*/
|
|
push {lr};
|
|
|
|
add CTXs1, CTXs0, #(s1 - s0);
|
|
add CTXs2, CTXs0, #(s2 - s0);
|
|
mov RMASK, #(0xff << 2); /* byte mask */
|
|
add CTXs3, CTXs1, #(s3 - s1);
|
|
|
|
load_roundkey_enc(0);
|
|
round_enc(2);
|
|
round_enc(4);
|
|
round_enc(6);
|
|
round_enc(8);
|
|
round_enc(10);
|
|
round_enc(12);
|
|
round_enc(14);
|
|
round_enc(16);
|
|
add_roundkey_enc();
|
|
|
|
pop {pc};
|
|
.size __blowfish_enc_blk1,.-__blowfish_enc_blk1;
|
|
|
|
.align 8
|
|
.globl _gcry_blowfish_arm_do_encrypt
|
|
.type _gcry_blowfish_arm_do_encrypt,%function;
|
|
|
|
_gcry_blowfish_arm_do_encrypt:
|
|
/* input:
|
|
* r0: ctx, CTX
|
|
* r1: u32 *ret_xl
|
|
* r2: u32 *ret_xr
|
|
*/
|
|
push {r2, r4-r11, ip, lr};
|
|
|
|
ldr RL0, [r1];
|
|
ldr RR0, [r2];
|
|
|
|
bl __blowfish_enc_blk1;
|
|
|
|
pop {r2};
|
|
str RR0, [r1];
|
|
str RL0, [r2];
|
|
|
|
pop {r4-r11, ip, pc};
|
|
.size _gcry_blowfish_arm_do_encrypt,.-_gcry_blowfish_arm_do_encrypt;
|
|
|
|
.align 3
|
|
.globl _gcry_blowfish_arm_encrypt_block
|
|
.type _gcry_blowfish_arm_encrypt_block,%function;
|
|
|
|
_gcry_blowfish_arm_encrypt_block:
|
|
/* input:
|
|
* r0: ctx, CTX
|
|
* r1: dst
|
|
* r2: src
|
|
*/
|
|
push {r4-r11, ip, lr};
|
|
|
|
read_block(r2, 0, RL0, RR0, RT0);
|
|
|
|
bl __blowfish_enc_blk1;
|
|
|
|
write_block(r1, 0, RR0, RL0, RT0, RT1);
|
|
|
|
pop {r4-r11, ip, pc};
|
|
.size _gcry_blowfish_arm_encrypt_block,.-_gcry_blowfish_arm_encrypt_block;
|
|
|
|
.align 3
|
|
.globl _gcry_blowfish_arm_decrypt_block
|
|
.type _gcry_blowfish_arm_decrypt_block,%function;
|
|
|
|
_gcry_blowfish_arm_decrypt_block:
|
|
/* input:
|
|
* r0: ctx, CTX
|
|
* r1: dst
|
|
* r2: src
|
|
*/
|
|
push {r4-r11, ip, lr};
|
|
|
|
add CTXs1, CTXs0, #(s1 - s0);
|
|
add CTXs2, CTXs0, #(s2 - s0);
|
|
mov RMASK, #(0xff << 2); /* byte mask */
|
|
add CTXs3, CTXs1, #(s3 - s1);
|
|
|
|
read_block(r2, 0, RL0, RR0, RT0);
|
|
|
|
load_roundkey_dec(17);
|
|
round_dec(15);
|
|
round_dec(13);
|
|
round_dec(11);
|
|
round_dec(9);
|
|
round_dec(7);
|
|
round_dec(5);
|
|
round_dec(3);
|
|
round_dec(1);
|
|
add_roundkey_dec();
|
|
|
|
write_block(r1, 0, RR0, RL0, RT0, RT1);
|
|
|
|
pop {r4-r11, ip, pc};
|
|
.size _gcry_blowfish_arm_decrypt_block,.-_gcry_blowfish_arm_decrypt_block;
|
|
|
|
/***********************************************************************
|
|
* 2-way blowfish
|
|
***********************************************************************/
|
|
#define F2(n, l0, r0, l1, r1, set_nextk, dec) \
|
|
\
|
|
and RT0, RMASK, l0, lsr#(24 - 2); \
|
|
and RT1, RMASK, l0, lsr#(16 - 2); \
|
|
and RT2, RMASK, l0, lsr#(8 - 2); \
|
|
add RT1, #(s1 - s0); \
|
|
\
|
|
ldr RT0, [CTXs0, RT0]; \
|
|
and RT3, RMASK, l0, lsl#2; \
|
|
ldr RT1, [CTXs0, RT1]; \
|
|
add RT3, #(s3 - s2); \
|
|
ldr RT2, [CTXs2, RT2]; \
|
|
add RT0, RT1; \
|
|
ldr RT3, [CTXs2, RT3]; \
|
|
\
|
|
and RT1, RMASK, l1, lsr#(24 - 2); \
|
|
eor RT0, RT2; \
|
|
and RT2, RMASK, l1, lsr#(16 - 2); \
|
|
add RT0, RT3; \
|
|
add RT2, #(s1 - s0); \
|
|
and RT3, RMASK, l1, lsr#(8 - 2); \
|
|
eor r0, RT0; \
|
|
\
|
|
ldr RT1, [CTXs0, RT1]; \
|
|
and RT0, RMASK, l1, lsl#2; \
|
|
ldr RT2, [CTXs0, RT2]; \
|
|
add RT0, #(s3 - s2); \
|
|
ldr RT3, [CTXs2, RT3]; \
|
|
add RT1, RT2; \
|
|
ldr RT0, [CTXs2, RT0]; \
|
|
\
|
|
and RT2, RMASK, r0, lsr#(24 - 2); \
|
|
eor RT1, RT3; \
|
|
and RT3, RMASK, r0, lsr#(16 - 2); \
|
|
add RT1, RT0; \
|
|
add RT3, #(s1 - s0); \
|
|
and RT0, RMASK, r0, lsr#(8 - 2); \
|
|
eor r1, RT1; \
|
|
\
|
|
ldr RT2, [CTXs0, RT2]; \
|
|
and RT1, RMASK, r0, lsl#2; \
|
|
ldr RT3, [CTXs0, RT3]; \
|
|
add RT1, #(s3 - s2); \
|
|
ldr RT0, [CTXs2, RT0]; \
|
|
add RT2, RT3; \
|
|
ldr RT1, [CTXs2, RT1]; \
|
|
\
|
|
and RT3, RMASK, r1, lsr#(24 - 2); \
|
|
eor RT2, RT0; \
|
|
and RT0, RMASK, r1, lsr#(16 - 2); \
|
|
add RT2, RT1; \
|
|
add RT0, #(s1 - s0); \
|
|
and RT1, RMASK, r1, lsr#(8 - 2); \
|
|
eor l0, RT2; \
|
|
\
|
|
ldr RT3, [CTXs0, RT3]; \
|
|
and RT2, RMASK, r1, lsl#2; \
|
|
ldr RT0, [CTXs0, RT0]; \
|
|
add RT2, #(s3 - s2); \
|
|
ldr RT1, [CTXs2, RT1]; \
|
|
eor l1, RKEYL; \
|
|
ldr RT2, [CTXs2, RT2]; \
|
|
\
|
|
eor r0, RKEYR; \
|
|
add RT3, RT0; \
|
|
eor r1, RKEYR; \
|
|
eor RT3, RT1; \
|
|
eor l0, RKEYL; \
|
|
add RT3, RT2; \
|
|
set_nextk(RKEYL, (p - s2) + (4 * (n) + ((dec) * 4))); \
|
|
eor l1, RT3; \
|
|
set_nextk(RKEYR, (p - s2) + (4 * (n) + (!(dec) * 4)));
|
|
|
|
#define load_n_add_roundkey_enc2(n) \
|
|
load_roundkey_enc(n); \
|
|
eor RL0, RKEYL; \
|
|
eor RR0, RKEYR; \
|
|
eor RL1, RKEYL; \
|
|
eor RR1, RKEYR; \
|
|
load_roundkey_enc((n) + 2);
|
|
|
|
#define next_key(reg, offs) \
|
|
ldr reg, [CTXs2, #(offs)];
|
|
|
|
#define dummy(x, y) /* do nothing */
|
|
|
|
#define round_enc2(n, load_next_key) \
|
|
F2((n) + 2, RL0, RR0, RL1, RR1, load_next_key, 0);
|
|
|
|
#define load_n_add_roundkey_dec2(n) \
|
|
load_roundkey_dec(n); \
|
|
eor RL0, RKEYL; \
|
|
eor RR0, RKEYR; \
|
|
eor RL1, RKEYL; \
|
|
eor RR1, RKEYR; \
|
|
load_roundkey_dec((n) - 2);
|
|
|
|
#define round_dec2(n, load_next_key) \
|
|
F2((n) - 3, RL0, RR0, RL1, RR1, load_next_key, 1);
|
|
|
|
#define read_block2_aligned(rin, l0, r0, l1, r1, convert, rtmp) \
|
|
ldr l0, [rin, #(0)]; \
|
|
ldr r0, [rin, #(4)]; \
|
|
convert(l0, rtmp); \
|
|
ldr l1, [rin, #(8)]; \
|
|
convert(r0, rtmp); \
|
|
ldr r1, [rin, #(12)]; \
|
|
convert(l1, rtmp); \
|
|
convert(r1, rtmp);
|
|
|
|
#define write_block2_aligned(rout, l0, r0, l1, r1, convert, rtmp) \
|
|
convert(l0, rtmp); \
|
|
convert(r0, rtmp); \
|
|
convert(l1, rtmp); \
|
|
str l0, [rout, #(0)]; \
|
|
convert(r1, rtmp); \
|
|
str r0, [rout, #(4)]; \
|
|
str l1, [rout, #(8)]; \
|
|
str r1, [rout, #(12)];
|
|
|
|
#ifdef __ARM_FEATURE_UNALIGNED
|
|
/* unaligned word reads allowed */
|
|
#define read_block2(rin, l0, r0, l1, r1, rtmp0) \
|
|
read_block2_aligned(rin, l0, r0, l1, r1, host_to_be, rtmp0)
|
|
|
|
#define write_block2(rout, l0, r0, l1, r1, rtmp0, rtmp1) \
|
|
write_block2_aligned(rout, l0, r0, l1, r1, be_to_host, rtmp0)
|
|
|
|
#define read_block2_host(rin, l0, r0, l1, r1, rtmp0) \
|
|
read_block2_aligned(rin, l0, r0, l1, r1, host_to_host, rtmp0)
|
|
|
|
#define write_block2_host(rout, l0, r0, l1, r1, rtmp0, rtmp1) \
|
|
write_block2_aligned(rout, l0, r0, l1, r1, host_to_host, rtmp0)
|
|
#else
|
|
/* need to handle unaligned reads by byte reads */
|
|
#define read_block2(rin, l0, r0, l1, r1, rtmp0) \
|
|
tst rin, #3; \
|
|
beq 1f; \
|
|
ldr_unaligned_be(l0, rin, 0, rtmp0); \
|
|
ldr_unaligned_be(r0, rin, 4, rtmp0); \
|
|
ldr_unaligned_be(l1, rin, 8, rtmp0); \
|
|
ldr_unaligned_be(r1, rin, 12, rtmp0); \
|
|
b 2f; \
|
|
1:;\
|
|
read_block2_aligned(rin, l0, r0, l1, r1, host_to_be, rtmp0); \
|
|
2:;
|
|
|
|
#define write_block2(rout, l0, r0, l1, r1, rtmp0, rtmp1) \
|
|
tst rout, #3; \
|
|
beq 1f; \
|
|
str_unaligned_be(l0, rout, 0, rtmp0, rtmp1); \
|
|
str_unaligned_be(r0, rout, 4, rtmp0, rtmp1); \
|
|
str_unaligned_be(l1, rout, 8, rtmp0, rtmp1); \
|
|
str_unaligned_be(r1, rout, 12, rtmp0, rtmp1); \
|
|
b 2f; \
|
|
1:;\
|
|
write_block2_aligned(rout, l0, r0, l1, r1, be_to_host, rtmp0); \
|
|
2:;
|
|
|
|
#define read_block2_host(rin, l0, r0, l1, r1, rtmp0) \
|
|
tst rin, #3; \
|
|
beq 1f; \
|
|
ldr_unaligned_host(l0, rin, 0, rtmp0); \
|
|
ldr_unaligned_host(r0, rin, 4, rtmp0); \
|
|
ldr_unaligned_host(l1, rin, 8, rtmp0); \
|
|
ldr_unaligned_host(r1, rin, 12, rtmp0); \
|
|
b 2f; \
|
|
1:;\
|
|
read_block2_aligned(rin, l0, r0, l1, r1, host_to_host, rtmp0); \
|
|
2:;
|
|
|
|
#define write_block2_host(rout, l0, r0, l1, r1, rtmp0, rtmp1) \
|
|
tst rout, #3; \
|
|
beq 1f; \
|
|
str_unaligned_host(l0, rout, 0, rtmp0, rtmp1); \
|
|
str_unaligned_host(r0, rout, 4, rtmp0, rtmp1); \
|
|
str_unaligned_host(l1, rout, 8, rtmp0, rtmp1); \
|
|
str_unaligned_host(r1, rout, 12, rtmp0, rtmp1); \
|
|
b 2f; \
|
|
1:;\
|
|
write_block2_aligned(rout, l0, r0, l1, r1, host_to_host, rtmp0); \
|
|
2:;
|
|
#endif
|
|
|
|
.align 3
|
|
.type _gcry_blowfish_arm_enc_blk2,%function;
|
|
|
|
_gcry_blowfish_arm_enc_blk2:
|
|
/* input:
|
|
* preloaded: CTX
|
|
* [RL0, RR0], [RL1, RR1]: src
|
|
* output:
|
|
* [RR0, RL0], [RR1, RL1]: dst
|
|
*/
|
|
push {RT0,lr};
|
|
|
|
add CTXs2, CTXs0, #(s2 - s0);
|
|
mov RMASK, #(0xff << 2); /* byte mask */
|
|
|
|
load_n_add_roundkey_enc2(0);
|
|
round_enc2(2, next_key);
|
|
round_enc2(4, next_key);
|
|
round_enc2(6, next_key);
|
|
round_enc2(8, next_key);
|
|
round_enc2(10, next_key);
|
|
round_enc2(12, next_key);
|
|
round_enc2(14, next_key);
|
|
round_enc2(16, dummy);
|
|
|
|
host_to_be(RR0, RT0);
|
|
host_to_be(RL0, RT0);
|
|
host_to_be(RR1, RT0);
|
|
host_to_be(RL1, RT0);
|
|
|
|
pop {RT0,pc};
|
|
.size _gcry_blowfish_arm_enc_blk2,.-_gcry_blowfish_arm_enc_blk2;
|
|
|
|
.align 3
|
|
.globl _gcry_blowfish_arm_cfb_dec;
|
|
.type _gcry_blowfish_arm_cfb_dec,%function;
|
|
|
|
_gcry_blowfish_arm_cfb_dec:
|
|
/* input:
|
|
* r0: CTX
|
|
* r1: dst (2 blocks)
|
|
* r2: src (2 blocks)
|
|
* r3: iv (64bit)
|
|
*/
|
|
push {r2, r4-r11, ip, lr};
|
|
|
|
mov lr, r3;
|
|
|
|
/* Load input (iv/r3 is aligned, src/r2 might not be) */
|
|
ldm r3, {RL0, RR0};
|
|
host_to_be(RL0, RT0);
|
|
host_to_be(RR0, RT0);
|
|
read_block(r2, 0, RL1, RR1, RT0);
|
|
|
|
/* Update IV, load src[1] and save to iv[0] */
|
|
read_block_host(r2, 8, r5, r6, RT0);
|
|
stm lr, {r5, r6};
|
|
|
|
bl _gcry_blowfish_arm_enc_blk2;
|
|
/* result in RR0:RL0, RR1:RL1 = r4:r3, r10:r9 */
|
|
|
|
/* r1: dst, r0: src */
|
|
pop {r0};
|
|
|
|
/* dst = src ^ result */
|
|
read_block2_host(r0, r5, r6, r7, r8, lr);
|
|
eor r5, r4;
|
|
eor r6, r3;
|
|
eor r7, r10;
|
|
eor r8, r9;
|
|
write_block2_host(r1, r5, r6, r7, r8, r9, r10);
|
|
|
|
pop {r4-r11, ip, pc};
|
|
.ltorg
|
|
.size _gcry_blowfish_arm_cfb_dec,.-_gcry_blowfish_arm_cfb_dec;
|
|
|
|
.align 3
|
|
.globl _gcry_blowfish_arm_ctr_enc;
|
|
.type _gcry_blowfish_arm_ctr_enc,%function;
|
|
|
|
_gcry_blowfish_arm_ctr_enc:
|
|
/* input:
|
|
* r0: CTX
|
|
* r1: dst (2 blocks)
|
|
* r2: src (2 blocks)
|
|
* r3: iv (64bit, big-endian)
|
|
*/
|
|
push {r2, r4-r11, ip, lr};
|
|
|
|
mov lr, r3;
|
|
|
|
/* Load IV (big => host endian) */
|
|
read_block_aligned(lr, 0, RL0, RR0, be_to_host, RT0);
|
|
|
|
/* Construct IVs */
|
|
adds RR1, RR0, #1; /* +1 */
|
|
adc RL1, RL0, #0;
|
|
adds r6, RR1, #1; /* +2 */
|
|
adc r5, RL1, #0;
|
|
|
|
/* Store new IV (host => big-endian) */
|
|
write_block_aligned(lr, 0, r5, r6, host_to_be, RT0);
|
|
|
|
bl _gcry_blowfish_arm_enc_blk2;
|
|
/* result in RR0:RL0, RR1:RL1 = r4:r3, r10:r9 */
|
|
|
|
/* r1: dst, r0: src */
|
|
pop {r0};
|
|
|
|
/* XOR key-stream with plaintext */
|
|
read_block2_host(r0, r5, r6, r7, r8, lr);
|
|
eor r5, r4;
|
|
eor r6, r3;
|
|
eor r7, r10;
|
|
eor r8, r9;
|
|
write_block2_host(r1, r5, r6, r7, r8, r9, r10);
|
|
|
|
pop {r4-r11, ip, pc};
|
|
.ltorg
|
|
.size _gcry_blowfish_arm_ctr_enc,.-_gcry_blowfish_arm_ctr_enc;
|
|
|
|
.align 3
|
|
.type _gcry_blowfish_arm_dec_blk2,%function;
|
|
|
|
_gcry_blowfish_arm_dec_blk2:
|
|
/* input:
|
|
* preloaded: CTX
|
|
* [RL0, RR0], [RL1, RR1]: src
|
|
* output:
|
|
* [RR0, RL0], [RR1, RL1]: dst
|
|
*/
|
|
add CTXs2, CTXs0, #(s2 - s0);
|
|
mov RMASK, #(0xff << 2); /* byte mask */
|
|
|
|
load_n_add_roundkey_dec2(17);
|
|
round_dec2(15, next_key);
|
|
round_dec2(13, next_key);
|
|
round_dec2(11, next_key);
|
|
round_dec2(9, next_key);
|
|
round_dec2(7, next_key);
|
|
round_dec2(5, next_key);
|
|
round_dec2(3, next_key);
|
|
round_dec2(1, dummy);
|
|
|
|
host_to_be(RR0, RT0);
|
|
host_to_be(RL0, RT0);
|
|
host_to_be(RR1, RT0);
|
|
host_to_be(RL1, RT0);
|
|
|
|
b .Ldec_cbc_tail;
|
|
.ltorg
|
|
.size _gcry_blowfish_arm_dec_blk2,.-_gcry_blowfish_arm_dec_blk2;
|
|
|
|
.align 3
|
|
.globl _gcry_blowfish_arm_cbc_dec;
|
|
.type _gcry_blowfish_arm_cbc_dec,%function;
|
|
|
|
_gcry_blowfish_arm_cbc_dec:
|
|
/* input:
|
|
* r0: CTX
|
|
* r1: dst (2 blocks)
|
|
* r2: src (2 blocks)
|
|
* r3: iv (64bit)
|
|
*/
|
|
push {r2-r11, ip, lr};
|
|
|
|
read_block2(r2, RL0, RR0, RL1, RR1, RT0);
|
|
|
|
/* dec_blk2 is only used by cbc_dec, jump directly in/out instead
|
|
* of function call. */
|
|
b _gcry_blowfish_arm_dec_blk2;
|
|
.Ldec_cbc_tail:
|
|
/* result in RR0:RL0, RR1:RL1 = r4:r3, r10:r9 */
|
|
|
|
/* r0: src, r1: dst, r2: iv */
|
|
pop {r0, r2};
|
|
|
|
/* load IV+1 (src[0]) to r7:r8. Might be unaligned. */
|
|
read_block_host(r0, 0, r7, r8, r5);
|
|
/* load IV (iv[0]) to r5:r6. 'iv' is aligned. */
|
|
ldm r2, {r5, r6};
|
|
|
|
/* out[1] ^= IV+1 */
|
|
eor r10, r7;
|
|
eor r9, r8;
|
|
/* out[0] ^= IV */
|
|
eor r4, r5;
|
|
eor r3, r6;
|
|
|
|
/* load IV+2 (src[1]) to r7:r8. Might be unaligned. */
|
|
read_block_host(r0, 8, r7, r8, r5);
|
|
/* store IV+2 to iv[0] (aligned). */
|
|
stm r2, {r7, r8};
|
|
|
|
/* store result to dst[0-3]. Might be unaligned. */
|
|
write_block2_host(r1, r4, r3, r10, r9, r5, r6);
|
|
|
|
pop {r4-r11, ip, pc};
|
|
.ltorg
|
|
.size _gcry_blowfish_arm_cbc_dec,.-_gcry_blowfish_arm_cbc_dec;
|
|
|
|
#endif /*HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS*/
|
|
#endif /*__ARM_ARCH >= 6*/
|