Adler-32 checksum in AVR Assembler
Your AVR bootloader should check the application's integrity before attempting to start it. The Adler-32 checksum offers a good trade-off among performance, size and robustness.
This is a straightforward implementation in AVR Assembler 2. It is very fast as it uses a trick to avoid calculating any division remainders (modulo):
#define R22_LOW__MOD_ADLER32 r22
#define R23_HIGH_MOD_ADLER32 r23
#define R18_LOW__ADLER_A r18
#define R19_HIGH_ADLER_A r19
#define R20_LOW__ADLER_B r20
#define R21_HIGH_ADLER_B r21
; ----------------------------
; r24 = new data byte to add to the Adler-32 checksum.
; r23:r22 = must be set to MOD_ADLER upon entry, they are not changed.
; r21:r20:r19:r18 = the previous Adler-32 checksum,
; these registers receive the updated value.
; r1 must be zero upon entry.
;
; Registers r0 and r22 are overwritten.
.equ MOD_ADLER32 = 65521
accumulate_adler32:
; This routine processes one byte at a time. If you already
; have a bigger data chunk available, there are faster ways,
; see the zlib implementation for details, file adler32.c,
; function adler32().
; Add the new Byte to the A component.
clr r0
add R18_LOW__ADLER_A, r24
adc R19_HIGH_ADLER_A, R1_ZERO
adc r0, R1_ZERO
; Have we reached the MOD_ADLER32 value?
cp R18_LOW__ADLER_A, R22_LOW__MOD_ADLER32
cpc R19_HIGH_ADLER_A, R23_HIGH_MOD_ADLER32
cpc r0, R1_ZERO
brlo accumulate_adler32_no_overflow_a
; If so, substract MOD_ADLER32.
sub R18_LOW__ADLER_A, R22_LOW__MOD_ADLER32
sbc R19_HIGH_ADLER_A, R23_HIGH_MOD_ADLER32
; We do not need the following: we know the end
; r0 value will always be zero.
; sbc r0, R1_ZERO
accumulate_adler32_no_overflow_a:
; Update the B component.
clr r0
add R20_LOW__ADLER_B, R18_LOW__ADLER_A
adc R21_HIGH_ADLER_B, R19_HIGH_ADLER_A
adc r0, R1_ZERO
; Have we reached the MOD_ADLER32 value?
cp R20_LOW__ADLER_B, R22_LOW__MOD_ADLER32
cpc R21_HIGH_ADLER_B, R23_HIGH_MOD_ADLER32
cpc r0, R1_ZERO
brlo accumulate_adler32_no_overflow_b
; If so, substract MOD_ADLER32.
sub R20_LOW__ADLER_B, R22_LOW__MOD_ADLER32
sbc R21_HIGH_ADLER_B, R23_HIGH_MOD_ADLER32
; We do not need the following: we know the end
; r0 value will always be zero.
; sbc r0, R1_ZERO
accumulate_adler32_no_overflow_b:
ret |
Caller example:
; The Adler-32 initial value is 1 (0x00000001).
clr r21
clr r20
clr r19
ldi r18, 0x01
ldi R22_LOW__MOD_ADLER32, low ( MOD_ADLER32 )
ldi R23_HIGH_MOD_ADLER32, high( MOD_ADLER32 )
clr r1
ldi r24, 10 ; First byte
rcall accumulate_adler32
ldi r24, 20 ; Second byte
rcall accumulate_adler32
... etc ... |
Equivalent in C. Note that this routine does calculate division reminders (modulo), which is rather slow on AVR processors:
void adler32_update_c ( uint16_t * __restrict__ a,
uint16_t * __restrict__ b,
uint8_t data_byte )
{
const uint16_t MOD_ADLER = 65521;
*a = (((uint32_t)*a) + data_byte) % MOD_ADLER;
*b = (((uint32_t)*b) + *a) % MOD_ADLER;
} |
This is the same implementation ported to the GCC inline assembler, so that the compiler can optimise the register usage based on the code around it:
void adler32_update ( uint16_t * __restrict__ a,
uint16_t * __restrict__ b,
uint8_t data_byte )
{
// This routine processes one byte at a time. If you already
// have a bigger data chunk available, there are faster ways,
// see the zlib implementation for details, file adler32.c,
// function adler32().
const uint16_t MOD_ADLER = 65521;
uint16_t adler_a = *a;
uint16_t adler_b = *b;
asm // No "asm volatile", so that the compiler can optimize
// the whole lot away if possible.
(
"; Add the new Byte to the A component \n\t" \
"clr r0 \n\t" \
"add %A[adler_a], %[data_byte] \n\t" \
"adc %B[adler_a], __zero_reg__ \n\t" \
"adc r0, __zero_reg__ \n\t" \
" \n\t" \
"; Have we reached the MOD_ADLER32 value? \n\t" \
"cp %A[adler_a], %A[mod_adler] \n\t" \
"cpc %B[adler_a], %B[mod_adler] \n\t" \
"cpc r0, __zero_reg__ \n\t" \
"brlo accumulate_adler32_no_overflow_a \n\t" \
" \n\t" \
"; If so, substract MOD_ADLER32. \n\t" \
"sub %A[adler_a], %A[mod_adler] \n\t" \
"sbc %B[adler_a], %B[mod_adler] \n\t" \
"; We do not need the following: we know the end \n\t" \
"; r0 value will always be zero. \n\t" \
"; sbc r0, __zero_reg__ \n\t" \
" \n\t" \
"accumulate_adler32_no_overflow_a: \n\t" \
" \n\t" \
"; Update the B component. \n\t" \
"clr r0 \n\t" \
"add %A[adler_b], %A[adler_a] \n\t" \
"adc %B[adler_b], %B[adler_a] \n\t" \
"adc r0, __zero_reg__ \n\t" \
" \n\t" \
"; Have we reached the MOD_ADLER32 value? \n\t" \
"cp %A[adler_b], %A[mod_adler] \n\t" \
"cpc %B[adler_b], %B[mod_adler] \n\t" \
"cpc r0, __zero_reg__ \n\t" \
"brlo accumulate_adler32_no_overflow_b \n\t" \
" \n\t" \
"; If so, substract MOD_ADLER32. \n\t" \
"sub %A[adler_b], %A[mod_adler] \n\t" \
"sbc %B[adler_b], %B[mod_adler] \n\t" \
"; We do not need the following: we know the end \n\t" \
"; r0 value will always be zero. \n\t" \
"; sbc r0, __zero_reg__ \n\t" \
" \n\t" \
"accumulate_adler32_no_overflow_b: \n\t"
: // output operands
[adler_a] "+r" (adler_a),
[adler_b] "+r" (adler_b)
: // input operands
[data_byte] "r" (data_byte),
[mod_adler] "r" (MOD_ADLER)
: "r0" // clobbered registers
);
*a = adler_a;
*b = adler_b;
} |
Author: R. Diez
|