## RC6 Block cipher

### Introduction

RC6 is a 128-bit block cipher with support for 128, 192, 256-bit key sizes. It was designed by Ron Rivest, Matthew Robshaw, Ray Sidney, Yiqun Lisa Yin and published in 1998. RC6 has a simple structure consisting of two Feistel networks whose data are mixed via data-dependent rotations. It’s a more advanced version of RC5 that operates on four 32-bit words instead of two, and includes mitigations against attacks possible on RC5. The security of RC6 relies on the strength of data-dependent rotations, the mixed use of exclusive-or operations and modular additions. As of February 13th 2019, the patent has expired.

### Compact code

The following is not optimized for performance. It’s a simplified version intended to be used as a reference.

```#define R(v,n)(((v)<<(n))|((v)>>(32-(n))))
#define F(n)for(i=0;i<n;i++)
typedef unsigned int W;

void rc6(void*mk,void*p){
W A=0xB7E15163,B,C,D,i,X,Y,S[44],L[8],*x=p,*k=mk;

// copy 256-bit key
F(8)L[i]=k[i];k=S;

// initialize state
F(44)S[i]=A,A+=0x9E3779B9;
A=B=0;

// create subkeys
F(44*3)
A=S[i%44]=R(S[i%44]+A+B,3),
B=L[i%8]=R(L[i%8]+A+B,A+B);

A=*x;B=x[1];C=x[2];D=x[3];
B+=*k++;D+=*k++;

// apply 20 rounds of encryption
F(20)
X=R(B*(B+B+1),5),
Y=R(D*(D+D+1),5),
A=R(A^X,Y)+*k++,
C=R(C^Y,X)+*k++,
X=A,A=B,B=C,C=D,D=X;

A+=*k++;C+=*k++;
*x=A;x[1]=B;x[2]=C;x[3]=D;
}
```

### x86 assembly

```; -----------------------------------------------
; RC6 block cipher in x86 assembly (encryption only)
;
; https://people.csail.mit.edu/rivest/pubs/RRSY98.pdf
;
; size: 170 bytes
;
; global calls use cdecl convention
;
; -----------------------------------------------

bits 32

%ifndef BIN
global rc6
global _rc6
%endif

%define RC6_ROUNDS 20
%define RC6_KR     (2*(RC6_ROUNDS+2))

%define A esi
%define B ebx
%define C edx
%define D ebp

rc6:
_rc6:
mov    esi, [esp+32+4]     ; edi = key / L
mov    ebx, [esp+32+8]     ; esi = data
xor    ecx, ecx            ; ecx = 0
mov    cl, RC6_KR*4+32     ; allocate space for key and sub keys
sub    esp, ecx            ; esp = S
; copy 256-bit key to local buffer
mov    edi, esp            ; edi = L
mov    cl, 32
rep    movsb
; initialize S / sub keys
push   edi                 ; save S
mov    eax, 0xB7E15163     ; eax = RC6_P
mov    cl, RC6_KR
init_subkeys:
stosd                      ; S[i] = A
add    eax, 0x9E3779B9     ; A += RC6_Q
loop   init_subkeys
pop    edi                 ; restore S
mov    esi, ebx            ; esi = data
mul    ecx                 ; eax = 0, edx = 0
xor    ebx, ebx            ; ebx = 0
set_idx:
xor    ebp, ebp            ; i % RC6_KR
init_key_loop:
cmp    ebp, RC6_KR
je     set_idx

; A = S[i%RC6_KR] = ROTL32(S[i%RC6_KR] + A+B, 3);
add    eax, ebx            ; A += B
add    eax, [edi+ebp*4]    ; A += S[i%RC6_KR]
rol    eax, 3              ; A  = ROTL32(A, 3)
mov    [edi+ebp*4], eax    ; S[i%RC6_KR] = A

; B = L[i%4] = ROTL32(L[i%4] + A+B, A+B);
add    ebx, eax            ; B += A
mov    ecx, ebx            ; save A+B in ecx
push   edx                 ; save i
and    dl, 7               ; %= 8
add    ebx, [edi+edx*4-32] ; B += L[i%8]
rol    ebx, cl             ; B = ROTL32(B, A+B)
mov    [edi+edx*4-32], ebx ; L[i%8] = B
pop    edx                 ; restore i
inc    ebp
inc    edx                 ; i++
cmp    dl, RC6_KR*3        ; i<RC6_KR*3
jnz    init_key_loop

push   esi               ; save ptr to data
lodsd
push   eax               ; save A
lodsd
xchg   eax, B            ; load B
lodsd
xchg   eax, C            ; load C
lodsd
xchg   eax, D            ; load D
pop    A                 ; restore A

push   20                ; ecx = RC6_ROUNDS
pop    ecx
; B += *k; k++;
scasd
; D += *k; k++;
scasd
r6c_l3:
push   ecx
; T0 = ROTL32(B * (2 * B + 1), 5);
lea    eax, [B+B+1]
imul   eax, B
rol    eax, 5
; T1 = ROTL32(D * (2 * D + 1), 5);
lea    ecx, [D+D+1]
imul   ecx, D
rol    ecx, 5
; A = ROTL32(A ^ T0, T1) + *k; k++;
xor    A, eax
rol    A, cl       ; T1 should be ecx
add    A, [edi]    ; += *k;
scasd              ; k++;
; C = ROTL32(C ^ T1, T0) + *k; k++;
xor    C, ecx      ; C ^= T1
xchg   eax, ecx    ;
rol    C, cl       ; rotate by T0
scasd
; swap
xchg   D, eax
xchg   C, eax
xchg   B, eax
xchg   A, eax
xchg   D, eax
; decrease counter
pop    ecx
loop   r6c_l3

; A += *k; k++;
scasd
; C += *k; k++;
scasd
; save ciphertext
pop    esp         ; esp = data
xchg   esp, edi    ; esp = fixed stack, edi = data
xchg   eax, A
stosd              ; save A
xchg   eax, B
stosd              ; save B
xchg   eax, C
stosd              ; save C
xchg   eax, D
stosd              ; save D