; Assembly implementation of BlowFish F function.
;
; Original algoritm by Bruce Schneier
; First assembly implementation by John Lots and Walter van Holst
; Assembly rewrite by Jeroen Pluimers
;
; This code is hereby donated to the public domain
;
; Version history of rewritten code:
;   19941230 1.00.00 jwp - implemented assembly code from scratch
;
; Original Pascal code:
;   Type TSBox = array[1..4,0..255] of Longint;
;   function F(Input: Longint; var SBox: TSBox): Longint; near;
;   var
;     Bytes: array[0..3] of Byte absolute Input;
;   begin
;     F:=(
;          ( SBox[1,Bytes[0]] +
;            SBox[2,Bytes[1]]
;          ) XOR
;          SBox[3,Bytes[2]]
;        ) +
;        SBox[4,Bytes[3]];
;   end;

Ideal
P386
Model   TPascal

CodeSeg


  Struc TSBox
    box1          dd      256 dup(?)
    box2          dd      256 dup(?)
    box3          dd      256 dup(?)
    box4          dd      256 dup(?)
  EndS

  Proc    F Near Input: DWord, SBox: DWord
  Public  F
  ; NOTE:
  ;  - there is no RETURNS keyword as this is Pascal calling convention
  ;  - Result is returned in DX:AX
  ;  - This routine is NEAR and can only be called locally from within a unit

  ; stack frame is automatically generated

          push    ds

          ; load the passed data
          lds     si,  [SBox]
          mov     edx, [Input]
          mov     di,  si  ; save si for later use

          ; calculate A, B, C and D in ax, bx, cx, dx

          xor     ax,  ax
          xor     bx,  bx
          xor     cx,  cx

          mov     al,  dl
          mov     bl,  dh

          shr     edx, 16  ; get high word of edx into dx
          mov     cl,  dl

          shr     dx,  8   ; get high byte of dx into dl and clear dh
          ; the code above is faster than below:
          ;   mov      dl, dh
          ;   xor      dh, dh  ; maak dh leeg

          ; adjust A, B, C, D as offsets into SBox
          shl     ax,  2
          shl     bx,  2
          shl     cx,  2
          shl     dx,  2
          add     ax,  si
          add     bx,  di
          add     cx,  si
          add     dx,  di

          ; apply the F algoritm

          mov     si,  ax
          mov     eax, [DWord ptr ds:si]
          ; originally [... + TSBox.box1] but by definition TSBox.box1 == 0

          mov     si,  bx
          add     eax, [DWord ptr ds:si + TSBox.box2]

          mov     si,  cx
          xor     eax, [DWord ptr ds:si + TSBox.box3]

          mov     si,  dx
          add     eax, [DWord ptr ds:si + TSBox.box4]

          ; get high 16 bits of eax into dx
          mov     edx, eax
          shr     edx, 16

          pop     ds

          ret

  ; stack cleanup is automatically generated

  EndP ; F

EndS  ; CodeSeg

End   ; Source
