DES Block cipher

Introduction

DES was originally designed and written by a team of computer scientists working at IBM along with the NSA and published by NIST in January 1977.

Although obsolete due to AES, DES continues to be used for various reasons and will not disappear anytime soon. From encryption protocols to password algorithms, DES is still part of everyday life.

I want to be clear, I’m not encouraging the use of this algorithm for protection of data, merely implementing it in assembly which is optimized for size.

The cipher itself isn’t considered secure anymore but triple DES or 3DES is still used for TLS/SSL despite the availability of AES for 15 years.

I searched for small versions of DES and found 1 by Christopher Hertel and another by Daniel Otte, the latter of which was modified for the C/assembly version presented here.

The assembly code is currently 1,045 bytes and was joint effort between Peter Ferrie and myself.

Permutation Function

The bulk of DES ROM is taken up by permutation tables which are used a lot in both encryption and key scheduling. This is most used function for the entire algorithm.

```void permute (uint8_t ptbl[], void *input, des_blk *out)
{
uint8_t i, j, x, t, ob;
des_blk *in=(des_blk*)input;
uint8_t *p=(uint8_t*)ptbl;

// we always expect out to absorb 8 bytes
memset (out->v8, 0, 8);

// load destination buffer in bytes
ob = *p++;

// for i=0 to output bytes
for (i=0; i<ob; i++)
{
t=0;
// permutate 8-bits
for (j=0; j<8; j++)
{
x = *p++;
t <<= 1;
if ((in->v8[x / 8]) & (0x80 >> (x & 7)) ){
t |= 0x01;
}
}
out->v8[i]=t;
}
}
```
```; esi = permutation table
; ebx = input
; edi = output
_permutex:
permutex:

xor    eax, eax
push   edi
stosd
stosd
pop    edi

; ob=*p++;
lodsb
xchg   ecx, eax
p_l1:
push   ecx
mov    cl, 8
p_l2:
push   ecx
; x = *p++;
lodsb
; replace (0x80 >> (x % 8)))
; with 1 << (7 - (x % 8))
mov    cl, 7
sub    cl, al
and    cl, 7
; in[x/8];
shr    al, 3
xlatb
; if (in[x/8] & (0x80 >> (x % 8)))
bt     eax, ecx
; t <<= 1;
pop    ecx
loop   p_l2
xchg   eax, edx
; out->v8[i]=t;
stosb
pop    ecx
loop   p_l1
ret
```

Key Expansion

This function creates 16 sub keys which are the same for encryption/decryption. Hertel and Otte expand the key during encryption/decryption which is fine but there’s a separate function created here to ease load on registers and processing.

```void des_setkey (des_ctx *ctx, void *input)
{
int     rnd;
des_blk *k;
des_blk k1, k2;

permute (pc1_permtab, input, &k1);

for (rnd=0; rnd<DES_ROUNDS; rnd++)
{
permute (shiftkey_permtab, &k1, &k2);
k=&k2;
if (ROTTABLE & (1 << rnd)) {
permute (shiftkey_permtab, &k2, &k1);
k=&k1;
}
permute (pc2_permtab, k, &ctx->keys[rnd]);
memcpy (k1.v8, k->v8, DES_BLK_LEN);
}
}
```
```_des_setkeyx:
des_setkey:
mov    ebx, [esp+32+8]  ; input

; alloc space for k1, k2
sub    esp, 16

mov    edx, permutex

; permute (pc1_permtab, input, &k1);
mov    esi, pc1_permtab
mov    edi, esp
call   edx               ; permutex
xor    ecx, ecx
mov    edi, [esp+32+16+4]  ; ctx
sk_l1:
; permute (shiftkey_permtab, &k1, &k2);
mov    ebx, esp          ; ebx=k1
push   edi
lea    edi, [ebx+8]      ; edi=k2
;mov    esi, shiftkey_permtab
call   edx               ; permutex
push   1
pop    eax
shl    eax, cl
test   eax, 07EFCh
xchg   ebx, edi          ; ebx=k2, edi=k1
jz     sk_l2
; permute (shiftkey_permtab, &k2, &k1);
call   edx               ; permutex
mov    ebx, edi          ; ebx=k1
sk_l2:
; permute (pc2_permtab, k, &ctx->keys[rnd]);
;mov    esi, pc2_permtab
pop    edi
call   edx               ; permutex
; memcpy (k1.v8, k->v8, DES_BLK_LEN);
fild   qword [ebx]
fistp  qword [esp]
scasd
scasd
sub    esi, (shiftkey_permtab - pc1_permtab) +
(pc2_permtab - shiftkey_permtab)
; rnd++
inc    ecx
cmp    ecx, 16
jnz    sk_l1
; free stack
ret
```

The F round Function

This is the main function which provides diffusion by mixing input with key and subsituting bytes with sbox tables.

```uint32_t des_f (uint32_t *x, des_blk *key) {
uint8_t  i, x0, x1;
uint32_t t=0;
uint8_t  *sbp;
des_blk  t0, t1;

// permute 1 half of data
permute (e_permtab, x, &t0);

// mix key with data
for (i=0; i<7; i++) {
t0.v8[i] ^= key->v8[i];
}

permute (splitin6bitword_permtab, &t0, &t1);
sbp=sbox;

for (i=0; i<8; ++i)
{
x0=t1.v8[i];
x1 = sbp[x0 >> 1];
x1 = (x0 & 1) ? x1 & 0x0F : x1 >> 4;
t <<= 4;
t |= x1;
sbp += 32;
}
t=SWAP32(t);

permute (p_permtab, &t, &t0);
return t0.v32[0];
}
```
```; eax = input
; esi = key
; return result in eax
_des_fx:
des_fx:
pushfd
cld

; put x in ebx
xchg   ebx, eax
; put key in eax
xchg   eax, esi

; allocate 16 bytes
sub    esp, 16

mov    ebp, permutex

; permute (e_permtab, x, &t0);
mov    edi, esp
mov    esi, e_permtab
call   ebp

; put key in esi
xchg   eax, esi
lodsd
xor    [edi], eax
lodsd
xor    [edi+4], eax

; permute (splitin6bitword_permtab, &t0, &t1);
mov    esi, splitin6bitword_permtab
mov    ebx, esp
scasd
scasd
call   ebp

mov    esi, edi
mov    ebx, sbox
push   8
pop    ecx
df_l2:
lodsb
shr    al, 1
pushfd
xlatb
aam    16
popfd
jb     df_l3
mov    al, ah
df_l3:
shl    edx, 4
or     dl, al
loop   df_l2

bswap  edx

; permute (p_permtab, &t, &t0);
mov    ebx, edi
mov    esi, p_permtab
mov    edi, esp
mov    [ebx], edx
call   ebp
mov    eax, [edi]
mov    [esp+_eax], eax
popfd
ret
```

This function performs encryption and decryption depending on enc variable.

If we’re performing decryption, the key is advanced by 15 and ofs (offset) is set to -1 which will decrease key by 1 instead of advancing by 1 during encryption.

```void des_enc (des_ctx *ctx, void *in, void *out, int enc)
{
int      rnd, ofs=1;
des_blk  t0;
uint32_t L, R, T;

des_blk *key=&ctx->keys[0];

if (enc==DES_DECRYPT) {
ofs = -1;
key += 15;
}
// apply inital permuation to input
permute (ip_permtab, in, &t0);

L=t0.v32[0];
R=t0.v32[1];

for (rnd=0; rnd<DES_ROUNDS; rnd++)
{
L ^= des_f (&R, key);
// swap
T=L;
L=R;
R=T;
key+=ofs;
}
t0.v32[0]=R;
t0.v32[1]=L;

// apply inverse permuation
permute (inv_ip_permtab, &t0, out);
}
```
```%define L ebp
%define R edx

_des_encx:
des_encx:
mov    ebp, esp

mov    ebx, [ebp+32+ 8] ; in
mov    ecx, [ebp+32+16] ; enc

; permute (ip_permtab, in, &t0);
push   ecx
push   ecx
mov    edi, esp
mov    esi, ip_permtab
call   permutex

mov    esi, [ebp+32+ 4] ; esi=ctx
mov    ebx, esp

mov    L, [edi]
mov    R, [edi+4]

jecxz  de_l1
; if decrypt, advance key and set direction
std
de_l1:
mov    cl, 16

de_l2:
; L ^= des_f (&R, key);
push   R
mov    eax, esp
call   des_fx

xor    L, eax
pop    eax
; swap
xchg   L, R
; key += ofs;
lodsd
lodsd
loop   de_l2
cld

; permute (inv_ip_permtab, &t0, out);
mov    dword[ebx], R
mov    dword[ebx+4], L
mov    edi, [esp+32+12+8] ; edi=out
mov    esi, inv_ip_permtab
call   permutex
pop    eax
pop    eax
ret
```

Although not entirely necessary, the following function can be used for creating password algorithms like MS lanman hashes/MS-CHAPv1/MS-CHAPv2 or MS kerberos authentications.

It was originally written by Svend Olaf Mikkelsen, the author of much code used for cracking DES in 1998

```void des_str2key (void *str, des_blk* key) {
uint32_t x1, r1, *p1;
des_blk *s=(des_blk*)str;
int i, j;

for (i=0; i<2; i++) {
p1=(uint32_t*)&s->v8[i*3];
x1=SWAP32(*p1);
if (i==1) {
x1=ROTL32 (x1, 4);
}
r1=0;
for (j=0; j<4; j++) {
r1 = ROTL32((r1 | (x1 & 0xFE000000)), 8);
x1 <<= 7;
}
key->v32[i] = SWAP32(r1);
}
}
```
```_des_str2keyx:
des_str2keyx:
mov    esi, [esp+32+4] ; str
mov    edi, [esp+32+8] ; key
push   2
pop    ecx
s2k_l1:
push   ecx
lodsd
dec    esi
bswap  eax
dec    ecx
mov    cl, 4
jnz    s2k_l3
rol    eax, cl
s2k_l3:
shld   edx, eax, 7
shl    eax, 7
loop   s2k_l3

xchg   eax, edx
bswap  eax
stosd
pop    ecx
loop   s2k_l1
ret
```

The following is simple demonstration of generating MS Lanman hash using above code.

```// generate Lanman hash
void lanman (uint8_t *lmhash, uint8_t *pwd)
{
uint8_t lmpwd[32];
uint8_t i;
des_blk key1, key2;
size_t  len=strlen(pwd);
des_ctx ctx;

memset (lmhash, 0, 16);

// MS Lanman passwords don't exceed 14 characters
len=(len>14) ? 14 : len;

for (i=0; i<14; i++) {
lmpwd[i]=(i<len) ? toupper (pwd[i]) : 0;
}

des_str2key (&lmpwd[0], &key1);
des_str2key (&lmpwd[7], &key2);

des_setkey (&ctx, &key1);
des_enc (&ctx, "KGS!@#\$%", (des_blk*)&lmhash[0], DES_ENCRYPT);

des_setkey (&ctx, &key2);
des_enc (&ctx, "KGS!@#\$%", (des_blk*)&lmhash[8], DES_ENCRYPT);
}
```

Results

architecture size in bytes
x86 1,045
x64 n/a

Full source here and here