SPECK Block Cipher

Introduction

SPECK is a family of lightweight block ciphers designed and published by the National Security Agency (NSA) in June 2013. It uses an ARX (Add-Rotate-Xor) structure optimized for performance in software implementations and has been suggested for use on resource constrained devices or the Internet of Things (IoT). SPECK supports a variety of block and key sizes. A block is always two words, but the words may be 16, 24, 32, 48 or 64 bits in size. The corresponding key is 2, 3 or 4 words. The round function consists of two rotations, adding the right word to the left word, xoring the key into the left word, and xoring the left word to the right word. The number of rounds depends on the parameters selected. There are two variants implemented here in three different assembly languages and C. SPECK-64/128 written in x86,ARM32,ARM64 assembly and SPECK-128/256 written in AMD64,ARM64 assembly. SPECK-64/128 uses 27 rounds of encryption and fits well onto both legacy (x86) and long mode (x64) of x86 CPU. However, SPECK-128/256 is only suitable for 64-bit architectures.

Key schedule

void speck64_setkey(const void *in, void *out)
{
    uint32_t i, t, k0, k1, k2, k3;
    uint32_t *k=(uint32_t*)in;
    uint32_t *ks=(uint32_t*)out;
    
    // copy 128-bit key to local space
    k0 = k[0]; k1 = k[1];
    k2 = k[2]; k3 = k[3];

    // expand 128-bit key into round keys
    for (i=0; i<27; i++)
    {
      ks[i] = k0;
      
      k1 = (ROTR32(k1, 8) + k0) ^ i;
      k0 = ROTL32(k0, 3) ^ k1;
      
      // rotate left 32-bits
      XCHG(k3, k2);
      XCHG(k3, k1);
    }
}

x86 assembly

%define SPECK_RNDS 27
    
%define k0 eax    
%define k1 ebx    
%define k2 ebp    
%define k3 edx
    
speck_setkeyx:
_speck_setkeyx:
    pushad
    mov    esi, [esp+32+4]   ; esi = in
    mov    edi, [esp+32+8]   ; edi = ks
    lodsd
    xchg   eax, k3
    lodsd
    xchg   eax, k1
    lodsd
    xchg   eax, k2
    lodsd
    xchg   eax, k3
    xor    ecx, ecx
spk_sk:
    ; ((uint32_t*)ks)[i] = k0;
    stosd
    ; k1 = (ROTR32(k1, 8) + k0) ^ i;
    ror    k1, 8
    add    k1, k0
    xor    k1, ecx
    ; k0 = ROTL32(k0, 3) ^ k1;
    rol    k0, 3
    xor    k0, k1
    ; rotate left 32-bits
    xchg   k3, k2
    xchg   k3, k1
    ; i++
    inc    ecx
    cmp    cl, SPECK_RNDS    
    jnz    spk_sk   
    popad
    ret

Encryption/Decryption

void speck64_encrypt(const void *keys, int enc, void *data)
{
    uint32_t i, x0, x1;
    uint32_t *ks=(uint32_t*)keys;
    uint32_t *x=(uint32_t*)data;
    
    // copy input to local space
    x0=x[0]; x1=x[1];
    
    for (i=0; i<27; i++)
    {
      if (enc==SPECK_DECRYPT)
      {
        x1 = ROTR32(x1 ^ x0, 3);
        x0 = ROTL32((x0 ^ ks[27-1-i]) - x1, 8);        
      } else {
        x0 = (ROTR32(x0, 8) + x1) ^ ks[i];
        x1 = ROTL32(x1, 3) ^ x0;
      }
    }
    // save result
    x[0] = x0; x[1] = x1;
}

x86 assembly

%define x0 eax    
%define x1 ebx
    
speck_encryptx:
_speck_encryptx:
    pushad
    lea    esi, [esp+32+4]
    lodsd
    xchg   edi, eax          ; edi = ks
    lodsd
    xchg   eax, ecx          ; ecx = enc
    lodsd
    xchg   eax, esi          ; esi = in
    push   esi
    lodsd    
    xchg   eax, x1
    lodsd
    xchg   eax, x1
    test   ecx, ecx
    mov    cl, SPECK_RNDS
    jz     spk_e0
spk_d0:
    ; x1 = ROTR32(x1 ^ x0, 3);
    xor    x1, x0
    ror    x1, 3
    ; x0 = ROTL32((x0 ^ ks[SPECK_RNDS-1-i]) - x1, 8);
    xor    x0, [edi+4*ecx-4]
    sub    x0, x1
    rol    x0, 8
    loop   spk_d0
    jmp    spk_end    
spk_e0:
    ; x0 = (ROTR32(x0, 8) + x1) ^ ks[i];
    ror    x0, 8
    add    x0, x1
    xor    x0, [edi]
    scasd
    ; x1 = ROTL32(x1, 3) ^ x0;
    rol    x1, 3
    xor    x1, x0
    loop   spk_e0
spk_end:
    pop    edi
    ; ((uint32_t*)in)[0] = x0;
    stosd
    xchg   eax, x1
    ; ((uint32_t*)in)[1] = x1;
    stosd    
    popad
    ret

SPECK-64/128 in C

Many block ciphers are used in Counter Mode (CTR) that turns a block cipher into a stream cipher. Here’s the function with key scheduling and encryption combined.

#define R(v,n)(((v)>>(n))|((v)<<(32-(n))))
#define F(n)for(i=0;i<n;i++)
typedef unsigned int W;

void speck64(void*mk,void*p){
  W k[4],*x=p,i,t;
  
  F(4)k[i]=((W*)mk)[i];
  
  F(27)
    // apply linear+nonlinear layer, mix key
    x[0] = (R(x[0], 8) + x[1]) ^ k[0],
    x[1] = R(x[1], 29) ^ x[0],
    
    // create next subkey
    k[1] = (R(k[1], 8) + k[0]) ^ i,
    k[0] = R(k[0], 29) ^ k[1],
    
    // permute key
    t = k[1], k[1] = k[2], k[2] = k[3], k[3] = t;
}

SPECK-64/128 in x86 assembly

; -----------------------------------------------
; SPECK-64/128 Block Cipher in x86 assembly (Encryption only)
;
; size: 64 bytes 
;
; global calls use cdecl convention
;
; -----------------------------------------------

    bits 32

%define SPECK_RNDS 27
    
%define k0 eax    
%define k1 ebx    
%define k2 ebp    
%define k3 edx

;
; speck64/128 encryption in 64 bytes
;
%ifndef BIN
    global speck
    global _speck
%endif

%define k0 edi    
%define k1 ebp    
%define k2 ecx    
%define k3 esi

%define w0 ebx    
%define w1 edx

speck:
_speck:    
    pushad    
    mov    esi, [esp+32+8]   ; esi = in
    push   esi               ; save
    
    lodsd
    xchg   eax, w0           ; w0 = in[0]
    lodsd
    xchg   eax, w1           ; w1 = in[1]
    
    mov    esi, [esp+32+8]   ; esi = key
    lodsd
    xchg   eax, k0           ; k0 = key[0] 
    lodsd
    xchg   eax, k1           ; k1 = key[1]
    lodsd
    xchg   eax, k2           ; k2 = key[2]
    lodsd 
    xchg   eax, k3           ; k3 = key[3]    
    xor    eax, eax          ; i = 0
spk_el:
    ; w0 = (ROTR32(w0, 8) + w1) ^ k0;
    ror    w0, 8
    add    w0, w1
    xor    w0, k0
    ; w1 = ROTR32(w1, 29) ^ w0;
    ror    w1, 29
    xor    w1, w0
    ; k1 = (ROTR32(k1, 8) + k0) ^ i;
    ror    k1, 8
    add    k1, k0
    xor    k1, eax
    ; k0 = ROTR32(k0, 29) ^ k1;
    ror    k0, 29
    xor    k0, k1    
    xchg   k3, k2
    xchg   k3, k1
    ; i++
    inc    eax
    cmp    al, SPECK_RNDS    
    jnz    spk_el
    
    pop    edi    
    xchg   eax, w0
    stosd
    xchg   eax, w1
    stosd
    popad
    ret

SPECK-64/128 in ARM / AArch32 assembly

  .arm
  .arch armv7
  .text
  
  .global speck

// key
k0 .req r2
k1 .req r3
k2 .req r4
k3 .req r5

// plaintext
x0 .req r6
x1 .req r7

// parameters
k  .req r0
x  .req r1
i  .req r0
t  .req r8

  // speck(void *key, void *data);
speck:
  // save registers
  push   {r0-r12, lr}
  
  // load 128-bit key
  // k0 = k[0]; k1 = k[1]; k2 = k[2]; k3 = k[3];
  ldm    k, {k0, k1, k2, k3}
  // load 64-bit plain text
  ldm    x, {x0, x1}          // x0 = x[0]; x1 = k[1];
  mov    i, #0                // i=0
speck_loop:
  add    x0, x1, x0, ror #8   // x0 = (ROTR32(x0, 8) + x1) ^ k0;
  eor    x0, k0               //
  eor    x1, x0, x1, ror #29  // x1 = ROTL32(x1, 3) ^ x0;
  mov    t, k3                // backup k3
  add    k3, k0, k1, ror #8   // k3 = (ROTR32(k1, 8) + k0) ^ i;
  eor    k3, i                //
  eor    k0, k3, k0, ror #29  // k0 = ROTL32(k0, 3) ^ k3;
  mov    k1, k2               // k1 = k2;
  mov    k2, t                // k2 = t;
  add    i, #1                // i++;
  cmp    i, #27               // i<27;
  bne    speck_loop
  
  // save result
  stm    x, {x0, x1}          // x[0] = x0; x[1] = x1;
  
  // restore registers
  pop    {r0-r12, pc}

SPECK-128/256 in C

#define R(v,n)(((v)>>(n))|((v)<<(64-(n))))
#define F(n)for(i=0;i<n;i++)
typedef unsigned long long W;

void speck128(void*mk,void*p){
  W k[4],*x=p,i,t;

  // load 256-bit key
  F(4)k[i]=((W*)mk)[i];
  
  // encrypt 128-bit plaintext
  F(34)
    // apply linear+nonlinear layer
    x[1] = (R(x[1], 8) + x[0]) ^ k[0],
    x[0] = R(x[0], 61) ^ x[1],
    
    // create next subkey
    k[1] = (R(k[1], 8) + k[0]) ^ i,
    k[0] = R(k[0], 61) ^ k[1],
    
    // permute key
    t = k[1],k[1]=k[2],k[2]=k[3],k[3]=t;
}

SPECK-128/256 in AMD64 assembly

; -----------------------------------------------
; SPECK-128/256 block cipher in AMD64 assembly
;
; size: 83 bytes
;
; global calls use microsoft fastcall convention
;
; -----------------------------------------------

    %ifndef BIN
        global speck128
    %endif

    %define k0 rbx   
    %define k1 rcx    
    %define k2 rdx    
    %define k3 rdi

    %define x0 rbp    
    %define x1 rsi

speck128:   
    push   rbp
    push   rbx
    
    push   rsi
    mov    x0, [rsi  ]       ; x0 = data[0]
    mov    x1, [rsi+8]       ; x1 = data[1] 
    
    ; F(4)k[i]=((W*)mk)[i];
    mov    k0, [rdi   ]      ; k0 = mk[0]
    mov    k1, [rdi+ 8]      ; k1 = mk[1]
    mov    k2, [rdi+16]      ; k2 = mk[2]
    mov    k3, [rdi+24]      ; k3 = mk[3]
    
    xor    eax, eax          ; i = 0
spk_L0:
    ; x[1] = (R(x[1], 8) + x[0]) ^ k[0];
    ror    x1, 8
    add    x1, x0
    xor    x1, k0
    ; x[0] = R(x[0], 61) ^ x[1];
    ror    x0, 61
    xor    x0, x1
    ; k[1] = (R(k[1], 8) + k[0]) ^ i;
    ror    k1, 8
    add    k1, k0
    xor    cl, al            ; k1 ^= i
    ; k[0] = R(k[0], 61) ^ k[1];
    ror    k0, 61
    xor    k0, k1
    ; t = k[1], k[1] = k[2], k[2] = k[3], k[3] = t;
    xchg   k1, k2
    xchg   k2, k3
    ; i++
    inc    al
    cmp    al, 34    
    jnz    spk_L0
    
    pop    rax
    ; save 128-bit result
    mov    [rax  ], x0
    mov    [rax+8], x1
    pop    rbx
    pop    rbp
    ret   

SPECK-128/256 in ARM64 / AArch64 assembly

// SPECK128/256 in ARM64 assembly
// 80 bytes

    .arch armv8-a  
    .text
    
    .global speck128

    // speck128(void*mk, void*data);
speck128:
    // load 256-bit key
    // k0 = k[0]; k1 = k[1]; k2 = k[2]; k3 = k[3];
    ldp    x5, x6, [x0]
    ldp    x7, x8, [x0, 16] 
    // load 128-bit plain text
    ldp    x2, x4, [x1]         // x0 = x[0]; x1 = k[1];
    mov    x3, xzr              // i=0
L0:
    ror    x4, x4, 8
    add    x4, x4, x2           // x1 = (R(x1, 8) + x0) ^ k0;
    eor    x4, x4, x5           //
    eor    x2, x4, x2, ror 61   // x0 = R(x0, 61) ^ x1;
    mov    x9, x8               // backup k3
    ror    x6, x6, 8
    add    x8, x5, x6           // k3 = (R(k1, 8) + k0) ^ i;
    eor    x8, x8, x3           //
    eor    x5, x8, x5, ror 61   // k0 = R(k0, 61) ^ k3;
    mov    x6, x7               // k1 = k2;
    mov    x7, x9               // k2 = t;
    add    x3, x3, 1            // i++;
    cmp    x3, 34               // i < 34;
    bne    L0 
    
    // save result
    stp    x2, x4, [x1]         // x[0] = x0; x[1] = x1;
    ret 

Sources here.

This entry was posted in assembly, cryptography, encryption, programming, security and tagged , , , , . Bookmark the permalink.

1 Response to SPECK Block Cipher

  1. Pingback: Shellcode: Encryption Algorithms in ARM Assembly | modexp

Leave a comment