CHAM Block Cipher

Introduction

CHAM: A Family of Lightweight Block Ciphers for Resource-Constrained Devices was published in December 2017 at the 20th Annual International Conference on Information Security and Cryptology held in South Korea. CHAM consists of three ciphers, CHAM-64/128 for 16-bit architectures, CHAM-128/128 and CHAM-128/256 for 32-bit architectures. This post will only focus on CHAM-128/128 that operates on four branches of 32 bits each. The only operations used are 32-bit modular addition, rotation and exclusive-OR (ARX). The design of CHAM draws inspiration from the SPECK and SIMON block ciphers published by the NSA.

The following are parameters for all three variants. N is the block length, K is the key length, R is the number of rounds, W is the width of a word and K/W is the number of words per key.

Key schedule

Eight round keys are generated from the 128-bit master key. Each round key is 32-bits in length.

void cham128_setkey(void *in, void *out)
{
    int i;
    uint32_t *k=(uint32_t*)in;
    uint32_t *rk=(uint32_t*)out;

    for (i=0; i<KW; i++) {
      rk[i] = k[i] ^ ROTL32(k[i], 1) ^ ROTL32(k[i], 8);
      rk[(i + KW) ^ 1]	= k[i] ^ ROTL32(k[i], 1) ^ ROTL32(k[i], 11);
    }
}

x86 assembly

%define K 128   ; key length
%define N 128   ; block length
%define R 80    ; number of rounds
%define W 32    ; word length
%define KW K/W  ; number of words per key
     
cham128_setkeyx:
_cham128_setkeyx:
    pushad
    mov    esi, [esp+32+4]  ; k  = in
    mov    edi, [esp+32+8]  ; rk = out    
    xor    eax, eax         ; i  = 0
sk_l0:
    mov    ebx, [esi+eax*4] ; ebx = k[i]
    mov    ecx, ebx         ; ecx = k[i]
    mov    edx, ebx         ; edx = k[i]    
    rol    ebx, 1           ; ROTL32(k[i], 1)
    rol    ecx, 8           ; ROTL32(k[i], 8)    
    xor    edx, ebx         ; k[i] ^ ROTL32(k[i], 1) 
    xor    edx, ecx    
    mov    [edi+eax*4], edx ; rk[i] = edx
    xor    edx, ecx         ; reset edx
    rol    ecx, 3           ; k[i] ^ ROTL32(k[i], 11)
    xor    edx, ecx    
    lea    ebx, [eax+KW]
    xor    ebx, 1
    mov    [edi+ebx*4], edx ; rk[(i + KW) ^ 1] = edx
    inc    al
    cmp    al, KW
    jnz    sk_l0    
    popad
    ret

Encryption

There are 80 rounds in total; this is significantly more than the 34 used for SPECK-128/128.

void cham128_encrypt(void *keys, void *data)
{
    int i;
    uint32_t x0, x1, x2, x3;
    uint32_t t;
    uint32_t *rk=(uint32_t*)keys;
    uint32_t *x=(uint32_t*)data;
    
    x0 = x[0]; x1 = x[1];
    x2 = x[2]; x3 = x[3];

    for (i=0; i<R; i++)
    {
      if ((i & 1) == 0) {
        x0 = ROTL32((x0 ^ i) + (ROTL32(x1, 1) ^ rk[i & 7]), 8);
      } else {
        x0 = ROTL32((x0 ^ i) + (ROTL32(x1, 8) ^ rk[i & 7]), 1);
      }
      XCHG(x0, x1);
      XCHG(x1, x2);
      XCHG(x2, x3);
    }
    x[0] = x0; x[1] = x1;
    x[2] = x2; x[3] = x3;
}

Compact code

#define R(v,n)(((v)>>(n))|((v)<<(32-(n))))
#define F(n)for(i=0;i<n;i++)
typedef unsigned int W;

void cham(void*mk,void*p){
    W rk[8],*w=p,*k=mk,i,t;

    F(4)
      t=k[i]^R(k[i],31),
      rk[i]=t^R(k[i],24),
      rk[(i+4)^1]=t^R(k[i],21);
    F(80)
      t=w[3],w[0]^=i,w[3]=rk[i&7],
      w[3]=w[0]+(w[3]^R(w[1],(i&1)?24:31)),
      w[3]=R(w[3],(i&1)?31:24),
      w[0]=w[1],w[1]=w[2],w[2]=t;
}

x86 assembly

Only the encryption here, since if you were to implement with CTR mode, decryption isn’t necessary.

; -----------------------------------------------
; CHAM-128/128 block cipher in x86 assembly
;
; size: 124 bytes
;
; global calls use cdecl convention
;
; -----------------------------------------------

      bits 32
     
      %ifndef BIN
        global cham
        global _cham
      %endif
      
%define K 128   ; key length
%define N 128   ; block length
%define R 80    ; number of rounds
%define W 32    ; word length
%define KW K/W  ; number of words per key
     
%define x0 ebp
%define x1 ebx
%define x2 edx
%define x3 esi
%define rk edi

cham:
_cham:
    pushad
    mov    esi, [esp+32+4]  ; k = key
    mov    ebp, [esp+32+8]  ; x = data
    pushad                  ; allocate 2*KW
    mov    edi, esp         ; edi = rk    
    xor    eax, eax         ; i  = 0
sk_l0:
    mov    ebx, [esi+eax*4] ; ebx = k[i]
    mov    ecx, ebx         ; ecx = k[i]
    mov    edx, ebx         ; edx = k[i]    
    rol    ebx, 1           ; ROTL32(k[i], 1)
    rol    ecx, 8           ; ROTL32(k[i], 8)    
    xor    edx, ebx         ; k[i] ^ ROTL32(k[i], 1) 
    xor    edx, ecx  
    mov    [edi+eax*4], edx ; rk[i] = edx
    xor    edx, ecx         ; reset edx
    rol    ecx, 3           ; k[i] ^ ROTL32(k[i], 11)
    xor    edx, ecx    
    lea    ebx, [eax+KW]
    xor    ebx, 1
    mov    [edi+ebx*4], edx ; rk[(i + KW) ^ 1] = edx
    inc    eax
    cmp    al, KW
    jnz    sk_l0    
    
    xchg   esi, ebp
    push   esi
    lodsd
    xchg   eax, x0
    lodsd
    xchg   eax, x1
    lodsd
    xchg   eax, x2
    lodsd
    xchg   eax, x3
    xor    eax, eax ; i = 0
    ; initialize sub keys
enc_l0: 
    mov    edi, ebp  ; k = keys
    jmp    enc_lx    
enc_l1:
    test   al, 7    ; i & 7
    jz     enc_l0
enc_lx:    
    push   eax      ; save i
    mov    cx, 0x0108
    test   al, 1    ; if ((i & 1)==0)
    jnz    enc_l2
    
    xchg   cl, ch
enc_l2:
    xor    x0, eax          ; x0 ^= i
    mov    eax, x1
    rol    eax, cl          ; 
    xor    eax, [edi]       ; ROTL32(x1, r0) ^ *rk++
    scasd
    add    x0, eax
    xchg   cl, ch
    rol    x0, cl
    
    xchg   x0, x1          ; XCHG(x0, x1);
    xchg   x1, x2          ; XCHG(x1, x2);
    xchg   x2, x3          ; XCHG(x2, x3);
    
    pop    eax      ; restore i
    inc    eax      ; i++
    cmp    al, R    ; i<R
    jnz    enc_l1
    
    pop    edi
    xchg   eax, x0
    stosd           ; x[0] = x0;
    xchg   eax, x1
    stosd           ; x[1] = x1;
    xchg   eax, x2
    stosd           ; x[2] = x2;
    xchg   eax, x3
    stosd           ; x[3] = x3;
    popad
    ret

ARM / AArch32 assembly

  .arm
  .arch armv7-a  
  .text
  
  .global cham
  
k   .req r0
x   .req r1

// data
x0 .req r0
x1 .req r2
x2 .req r3
x3 .req r4  

// round keys  
rk .req sp

k0 .req r6
k1 .req r7
k2 .req r8

i  .req r10

cham:
  // save registers
  push   {r0-r12,lr}
  
  // allocate memory for round keys
  sub    sp, #32
  
  // derive round keys from 128-bit key
  mov    i, #0                 // i  = 0
cham_init:  
  ldr    k0, [k, i, lsl #2]    // k0 = k[i];  
  ror    k1, k0, #31           // k1 = ROTR32(k0, 31);
  ror    k2, k0, #24           // k2 = ROTR32(k0, 24);  
  eor    k0, k1                // k0^= k1;
  eor    k1, k0, k2            // rk[i] = k0 ^ k2; 
  str    k1, [rk, i, lsl #2]  
  eor    k0, k2, ror #29       // k0 ^= ROTR32(k2, 29);
  add    k1, i, #4             // k1 = (i+KW)
  eor    k1, #1                // k1 = (i+KW) ^ 1 
  str    k0, [rk, k1, lsl #2]  // rk[(i+KW)^1] = k0;  
  add    i, #1                 // i++
  cmp    i, #4                 // i<KW  
  bne    cham_init             //  
  
  // load 128-bit plain text
  ldm    x, {x0, x1, x2, x3}
  
  // perform encryption
  mov    i, #0                 // i = 0 
cham_enc:
  mov    k0, x3
  eor    x0, i                 // x0 ^= i
  tst    i, #1                 // if (i & 1)  
  
  // x3  = rk[i & 7];    
  and    k1, i, #7             // k1 = i & 7;
  ldr    x3, [rk, k1, lsl #2]  // x3 = rk[i & 7];  
  
  // execution depends on (i % 2)
  // x3 ^= (i & 1) ? ROTR32(x1, 24) : ROTR32(x1, 31);
  eorne  x3, x1, ror #24       // 
  eoreq  x3, x1, ror #31       // 
  
  add    x3, x0                // x3 += x0;
  
  // x3 = (i & 1) ? ROTR32(x3, 31) : ROTR32(x3, 24);
  rorne  x3, #31               // x3 = ROTR32(x3, 31); 
  roreq  x3, #24               // x3 = ROTR32(x3, 24);
  
  // swap
  mov    x0, x1                // x0 = x1; 
  mov    x1, x2                // x1 = x2;
  mov    x2, k0                // x2 = k0;

  add    i, #1                 // i++;  
  cmp    i, #80                // i<R 
  bne    cham_enc              // 
  
  // save 128-bit cipher text
  stm    x, {x0, x1, x2, x3}   // x[0] = x0; x[1] = x1; 
                               // x[2] = x2; x[3] = x3;
  // release memory for round keys
  add    sp, #32
                                                              
  // restore registers
  pop    {r0-r12, pc}

ARM64 / AArch64 assembly

// CHAM 128/128 in ARM64 assembly
// 160 bytes 

    .arch armv8-a
    .text
    .global cham

    // cham(void*mk,void*p);
cham:
    sub    sp, sp, 32
    mov    w2, wzr
    mov    x8, x1
L0:
    // t=k[i]^R(k[i],31),
    ldr    w5, [x0, x2, lsl 2]
    eor    w6, w5, w5, ror 31

    // rk[i]=t^R(k[i],24),
    eor    w7, w6, w5, ror 24
    str    w7, [sp, x2, lsl 2]

    // rk[(i+4)^1]=t^R(k[i],21);
    eor    w7, w6, w5, ror 21
    add    w5, w2, 4
    eor    w5, w5, 1
    str    w7, [sp, x5, lsl 2]

    // i++
    add    w2, w2, 1
    // i < 4
    cmp    w2, 4
    bne    L0

    ldp    w0, w1, [x8]
    ldp    w2, w3, [x8, 8]

    // i = 0
    mov    w4, wzr
L1:
    tst    w4, 1

    // t=w[3],w[0]^=i,w[3]=rk[i%8],
    mov    w5, w3
    eor    w0, w0, w4
    and    w6, w4, 7
    ldr    w3, [sp, x6, lsl 2]

    // w[3]^=R(w[1],(i & 1) ? 24 : 31),
    mov    w6, w1, ror 24
    mov    w7, w1, ror 31
    csel   w6, w6, w7, ne
    eor    w3, w3, w6

    // w[3]+=w[0],
    add    w3, w3, w0

    // w[3]=R(w[3],(i & 1) ? 31 : 24),
    mov    w6, w3, ror 31
    mov    w7, w3, ror 24
    csel   w3, w6, w7, ne

    // w[0]=w[1],w[1]=w[2],w[2]=t;
    mov    w0, w1
    mov    w1, w2
    mov    w2, w5

    // i++ 
    add    w4, w4, 1
    // i < 80
    cmp    w4, 80
    bne    L1

    stp    w0, w1, [x8]
    stp    w2, w3, [x8, 8]
    add    sp, sp, 32
    ret

Sources here.