Asmcodes: Speck Block Cipher

Introduction

Speck is a family of lightweight block ciphers publicly released by the National Security Agency (NSA) in June 2013. It’s an ARX (add-rotate-xor) design optimized for performance in software implementations and has been suggested for use on resource constrained devices.

Speck supports a variety of block and key sizes. A block is always two words, but the words may be 16, 24, 32, 48 or 64 bits in size. The corresponding key is 2, 3 or 4 words. The round function consists of two rotations, adding the right word to the left word, xoring the key into the left word, then and xoring the left word to the right word. The number of rounds depends on the parameters selected.

There’s some interesting speculation on the reasons for publishing Simon and Speck in post to a crypto mailing list Speculation on the origin of Speck and Simon

There are 2 implementations provided here. 1 will use 64-bit block size, 128-bit key and 27 rounds since this all fits easily onto 32-bit mode of x86 architecture. The other uses 128-bit block size, 256-bit key and 34 rounds since this all fits easily onto x86-64 mode of x86 architecture.

Key schedule

void speck_setkey(void *in, void *out)
{
  uint32_t i, t, k0, k1, k2, k3;

  // copy 128-bit key to local space
  k0 = ((uint32_t*)in)[0];
  k1 = ((uint32_t*)in)[1];
  k2 = ((uint32_t*)in)[2];
  k3 = ((uint32_t*)in)[3];
    
  // expand 128-bit key into round keys
  for (i=0; i<SPECK_RNDS; i++)
  {
    ((uint32_t*)out)[i] = k0;
    
    k1 = (ROTR32(k1, 8) + k0) ^ i;
    k0 = ROTL32(k0, 3) ^ k1;
    
    // rotate left 32-bits
    XCHG(k3, k2, t);
    XCHG(k3, k1, t);
  }
}
%define SPECK_RNDS 27
    
%define k0 eax    
%define k1 ebx    
%define k2 ebp    
%define k3 edx
    
speck_setkeyx:
_speck_setkeyx:
    pushad
    mov    esi, [esp+32+4]   ; esi = in
    mov    edi, [esp+32+8]   ; edi = ks
    lodsd
    xchg   eax, k3
    lodsd
    xchg   eax, k1
    lodsd
    xchg   eax, k2
    lodsd
    xchg   eax, k3
    xor    ecx, ecx
spk_sk:
    ; ((uint32_t*)ks)[i] = k0;
    stosd
    ; k1 = (ROTR32(k1, 8) + k0) ^ i;
    ror    k1, 8
    add    k1, k0
    xor    k1, ecx
    ; k0 = ROTL32(k0, 3) ^ k1;
    rol    k0, 3
    xor    k0, k1
    ; rotate left 32-bits
    xchg   k3, k2
    xchg   k3, k1
    ; i++
    inc    ecx
    cmp    cl, SPECK_RNDS    
    jnz    spk_sk   
    popad
    ret

Encryption/Decryption

void speck_encrypt(void *in, void *keys, int enc)
{
  uint8_t i;
  uint32_t *ks=(uint32_t*)keys;
  
  // copy input to local space
  uint32_t x0=((uint32_t*)in)[0];
  uint32_t x1=((uint32_t*)in)[1];
  
  for (i=0; i<SPECK_RNDS; i++)
  {
    if (enc==SPECK_DECRYPT)
    {
      x1 = ROTR32(x0 ^ x1, 3);
      x0 = ROTL32((x0 ^ ks[SPECK_RNDS-1-i]) - x1, 8);        
    } else {
      x0 = (ROTR32(x0, 8) + x1) ^ ks[i];
      x1 = ROTL32(x1, 3) ^ x0;
    }
  }
  // save result
  ((uint32_t*)in)[0] = x0;
  ((uint32_t*)in)[1] = x1;
}
%define x0 eax    
%define x1 ebx
    
speck_encryptx:
_speck_encryptx:
    pushad
    lea    esi, [esp+32+4]
    lodsd
    xchg   edi, eax          ; edi = ks
    lodsd
    xchg   eax, ecx          ; ecx = enc
    lodsd
    xchg   eax, esi          ; esi = in
    push   esi
    lodsd    
    xchg   eax, x1
    lodsd
    xchg   eax, x1
    test   ecx, ecx
    mov    cl, SPECK_RNDS
    jz     spk_e0
spk_d0:
    ; x1 = ROTR32(x1 ^ x0, 3);
    xor    x1, x0
    ror    x1, 3
    ; x0 = ROTL32((x0 ^ ks[SPECK_RNDS-1-i]) - x1, 8);
    xor    x0, [edi+4*ecx-4]
    sub    x0, x1
    rol    x0, 8
    loop   spk_d0
    jmp    spk_end    
spk_e0:
    ; x0 = (ROTR32(x0, 8) + x1) ^ ks[i];
    ror    x0, 8
    add    x0, x1
    xor    x0, [edi]
    scasd
    ; x1 = ROTL32(x1, 3) ^ x0;
    rol    x1, 3
    xor    x1, x0
    loop   spk_e0
spk_end:
    pop    edi
    ; ((uint32_t*)in)[0] = x0;
    stosd
    xchg   eax, x1
    ; ((uint32_t*)in)[1] = x1;
    stosd    
    popad
    ret

Just encryption

Most block ciphers are usually insecure by themselves. Counter Mode (CTR) is recommended for turning a block cipher into a stream cipher which then only requires encryption. Here’s the function with key scheduling and encryption integrated.

speck64_encryptx:
_speck64_encryptx:    
    pushad    
    mov    esi, [esp+32+8]   ; esi = in
    push   esi               ; save
    
    lodsd
    xchg   eax, x0           ; x0 = in[0]
    lodsd
    xchg   eax, x1           ; x1 = in[1]
    
    mov    esi, [esp+32+8]   ; esi = key
    lodsd
    xchg   eax, k0           ; k0 = key[0] 
    lodsd
    xchg   eax, k1           ; k1 = key[1]
    lodsd
    xchg   eax, k2           ; k2 = key[2]
    lodsd 
    xchg   eax, k3           ; k3 = key[3]    
    xor    eax, eax          ; i = 0
spk_el:
    ; x0 = (ROTR32(x0, 8) + x1) ^ k0;
    ror    x0, 8
    add    x0, x1
    xor    x0, k0
    ; x1 = ROTL32(x1, 3) ^ x0;
    rol    x1, 3
    xor    x1, x0
    ; k1 = (ROTR32(k1, 8) + k0) ^ i;
    ror    k1, 8
    add    k1, k0
    xor    k1, eax
    ; k0 = ROTL32(k0, 3) ^ k1;
    rol    k0, 3
    xor    k0, k1    
    xchg   k3, k2
    xchg   k3, k1
    ; i++
    inc    eax
    cmp    al, SPECK_RNDS    
    jnz    spk_el
    
    pop    edi    
    xchg   eax, x0
    stosd
    xchg   eax, x1
    stosd
    popad
    ret

The x86-64 version to support 256-bit keys and 128-bit blocks is only 24 bytes more.

;
; speck128/256 encryption in 88 bytes
;
%ifndef BIN
    global speck128_encryptx
%endif

%define k0 rdi    
%define k1 rbp    
%define k2 rsi    
%define k3 rcx

%define x0 rbx    
%define x1 rdx

speck128_encryptx:   
    push   rbp
    push   rbx
    push   rdi
    push   rsi   

    mov    k0, [rcx   ]      ; k0 = key[0]
    mov    k1, [rcx+ 8]      ; k1 = key[1]
    mov    k2, [rcx+16]      ; k2 = key[2]
    mov    k3, [rcx+24]      ; k3 = key[3]
    
    push   rdx
    mov    x0, [rdx  ]       ; x0 = in[0]
    mov    x1, [rdx+8]       ; x1 = in[1] 
    
    xor    eax, eax          ; i = 0
spk_el:
    ; x1 = (ROTR64(x1, 8) + x0) ^ k0;
    ror    x1, 8
    add    x1, x0
    xor    x1, k0
    ; x0 =  ROTL64(x0, 3) ^ x1;
    rol    x0, 3
    xor    x0, x1
    ; k1 = (ROTR64(k1, 8) + k0) ^ i;
    ror    k1, 8
    add    k1, k0
    xor    k1, rax
    ; k0 = ROTL64(k0, 3) ^ k1;
    rol    k0, 3
    xor    k0, k1
    
    xchg   k3, k2
    xchg   k3, k1
    ; i++
    add    al, 1
    cmp    al, SPECK_RNDS    
    jnz    spk_el
    
    pop    rax
    mov    [rax  ], x0
    mov    [rax+8], x1
    
    pop    rsi
    pop    rdi
    pop    rbx
    pop    rbp
    ret

Summary

instruction set setkey + encrypt + decrypt (bytes) setkey + encrypt (bytes)
x86 105 64
x86-64 132 88

MSVC generated code is 139 bytes using /O2 /Os flags. x86 assembly is 105 bytes for enc+dec or 64 bytes for just enc. Nice algorithm that would be fun for those new to cryptography.

See sources here

Advertisements
This entry was posted in assembly, cryptography, encryption, programming, security and tagged , , , , . Bookmark the permalink.

Leave a Reply

Fill in your details below or click an icon to log in:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out / Change )

Twitter picture

You are commenting using your Twitter account. Log Out / Change )

Facebook photo

You are commenting using your Facebook account. Log Out / Change )

Google+ photo

You are commenting using your Google+ account. Log Out / Change )

Connecting to %s