Hi Guys,

for fun I've written md5sum using IMCC, and have attached my first cut.

It will need some further testing, but in the most part it works well. There are some limitations:

 * 200K file limit imposed when reading in the file
 * 512MB(?) limit imposed by shortcut in algorithm
 * Haven't tested on 64 bit processors
 * Haven't tested on big endian

It would be nice to make sure that it works on all architectures as it would fit well into the parrot test suite; it would be good for both benchmarking and functionality testing.

The only problems have been:

* Using macros which call one another seemed to hang if argument names were reused
(There have been discussions on the future of macros, I know)
* /Possible/ IMCC register allocation bug which I need to reinvestigate (see earlier post)
(May actually have been the following JIT problem?)
* Results from i386 JIT can be corrupt and needs investigating (lots of FFs)
* Seem to get ICU/string errors on some binary files
* Some checksum discrepencies which I need to investigate


Cheers,

Nick
# Parrot md5sum; Nick Glencross <[EMAIL PROTECTED]>
#
# Based on md5.c, from md5sum
#           written by Ulrich Drepper <[EMAIL PROTECTED]>, 1995.

=head1 NAME

md5sum.imc - calculate MD5 checksums

=head1 SYNOPSIS

=head1 DESCRIPTION

This is a pure Parrot MD5 hash routine.

=head1 SUBROUTINES

=head2 _md5sum

Pass in a string, returns a PMC Array with the result

=head1 BUGS

Only tested so far on i386.

=over 4

=item * Might work on 64 bit platforms

=item * Might not work on big endian systems

=back

=cut

###########################################################################

# Main Harness to show that it works

.sub _main

     .local Array args
     .local int maxsize

     args = P5

     # Argument count
     $I0 = args
     $I0 = $I0 - 1

     if $I0 > 0 goto has_args

     $S0 = args[0]
     printerr "(parrot) "
     printerr $S0
     printerr " filename [filename ...]\n" 

     end

has_args:

     maxsize = 200000

     $I1 = 1

next_iter:

     if $I1 > $I0 goto iter_done

     $S0 = args[$I1]
        
     open $P0, $S0, "<"
     defined $I2, $P0
     if $I2 goto found
     printerr $S0
     printerr ": Cannot find\n"
     goto iter_cont
found:
     read $S1, $P0, maxsize
     close $P0

     $I2 = length $S1
     if $I2 < maxsize goto size_ok

     printerr "Too large\n"
     goto iter_cont

size_ok:

     $P0 = _md5sum ($S1)

     _md5_print ($P0)
     print "\t"
     print $S0
     print "\n"

iter_cont:

     $I1 = $I1 + 1
     goto next_iter

iter_done:

     end

.end

###########################################################################
# Low-level macros used in MD5

# A parrot rol instruction might be good (as it can often be JIT'd)
.macro rol (x,n,  out)
     shl .out, .x, .n 
     $I1000 = 32-.n
     $I1000 = .x >>> $I1000
     bor .out, $I1000
.endm

.macro FF (b,c,d)
     tmp = .c ~ .d
     tmp = .b & tmp
     tmp = .d ~ tmp
.endm

.macro FH (b,c,d)
     tmp = .b ~ .c
     tmp = tmp ~ .d
.endm

.macro FI (b,c,d)
     tmp = ~.d
     tmp = .b | tmp
     tmp = .c ~ tmp
.endm

###########################################################################
# Higher level MD5 operations

.macro common (a, b, k, s, T)
     .a = .a + tmp
     .a = .a + .T
     tmp = buffer[.k]
     .a = .a + tmp
     .rol (.a, .s, tmp)
     .a = .b + tmp
.endm

.macro OP1 (aa,bb,cc,dd, kk, ss, TT)
     .FF (.bb,.cc,.dd)
     .common (.aa, .bb, .kk, .ss, .TT)
.endm

.macro OP2 (aa,bb,cc,dd, kk, ss, TT)
     .FF (.dd,.bb,.cc)
     .common (.aa, .bb, .kk, .ss, .TT)
.endm

.macro OP3 (aa,bb,cc,dd, kk, ss, TT)
     .FH (.bb,.cc,.dd)
     .common (.aa, .bb, .kk, .ss, .TT)
.endm

.macro OP4 (aa,bb,cc,dd, kk, ss, TT)
     .FI (.bb,.cc,.dd)
     .common (.aa, .bb, .kk, .ss, .TT)
.endm

###########################################################################

.macro swap (w)

     $I10 = .w & 0x000000ff
     $I11 = .w & 0x0000ff00
     $I12 = .w & 0x00ff0000
     $I13 = .w & 0xff000000

     $I10 = $I10 <<  24
     $I11 = $I11 <<  8
     $I12 = $I12 >>> 8
     $I13 = $I13 >>> 24

     $I10 = $I10 | $I11
     $I10 = $I10 | $I12
     .w   = $I10 | $I13

     # For 64-bit architectures
     .w   = .w & 0xffffffff

.endm

###########################################################################

 .include "library/config.imc"

.sub _md5sum
     .param string str

     $P0 = _config()

     $I0 = $P0["intsize"]
     if $I0 == 4 goto is_4byte_word

     printerr "This doesn't seem to be a 32 bit processor: "
     printerr "Please verify the MD5 checksum\n"

is_4byte_word:

     $I0 = $P0["bigendian"]

     unless $I0 goto is_little_endian

     printerr "This appears to be a big endian processor: "
     printerr "Please verify the MD5 checksum\n"

is_little_endian:

     .local Array buffer
     buffer = new Array
    
     $P0 = new Array
     $P0 = 7

     _md5_create_buffer (str, buffer, $I0)

     # _print_buffer (buffer, 8)

     _md5_init ($P0)
     _md5_process_buffer ($P0, buffer)

     .pcc_begin_return
      .return $P0
     .pcc_end_return
.end


###########################################################################

.sub _md5_init
     .param pmc context

     # Initial MD5 constants
     context[0] = 0x67452301
     context[1] = 0xefcdab89
     context[2] = 0x98badcfe
     context[3] = 0x10325476

.end

###########################################################################

.sub _md5_create_buffer
     .param string str
     .param Array  buffer
     .param int endian

     .local int counter
     .local int subcounter
     .local int slow_counter

     .local int word

     $I0 = length str

     $I1 = $I0 - 1

     .local int words
     words = $I0 + 8
     words = words | 63
     words = words + 1
     words = words / 4

     buffer = words

     word = 0

     counter      = 0
     subcounter   = 0
     slow_counter = 0

md5_create_buffer_loop:     

     $I5 = counter + subcounter

     if $I5 > $I0 goto md5_create_buffer_break

     # MD5 pad character
     $I4 = 0x80

     if $I5 > $I1 goto string_char
     $S0 = str[$I5]
     $I4 = ord $S0

string_char:

     word = word << 8
     word = word | $I4

     subcounter = subcounter + 1

     if subcounter != 4 goto md5_create_buffer_loop

     if endian goto endian_ok

     .swap (word)

endian_ok:


     buffer[slow_counter] = word

     word         = 0
     counter      = counter + 4
     subcounter   = 0
     slow_counter = slow_counter + 1

     goto md5_create_buffer_loop

md5_create_buffer_break:

     # Check for a partial word

     if subcounter == 0 goto complete

     subcounter = 4 - subcounter

     .local int shift
     shift = 8*subcounter

     word = word << shift

     if endian goto endian_ok2

     .swap (word)

endian_ok2:

     buffer[slow_counter] = word

complete:

     $I1 = $I0 >>> 29
     words = words - 1
     buffer[words] = $I1

     $I0 = $I0 << 3
     $I0 = $I0 & 0xffffff
     words = words - 1
     buffer[words] = $I0

.end

###########################################################################

.sub _md5_process_buffer
     .param pmc    context
     .param Array  buffer

     .local int A
     .local int B
     .local int C
     .local int D

     .local int A_save
     .local int B_save
     .local int C_save
     .local int D_save

     .local int tmp

     $P0 = new Array

     A = context[0]
     B = context[1]
     C = context[2]
     D = context[3]

md5_loop:

     A_save = A
     B_save = B 
     C_save = C
     D_save = D

     # Round 1.
     .OP1 (A, B, C, D, 0,  7, 0xd76aa478)
     .OP1 (D, A, B, C, 1, 12, 0xe8c7b756)
     .OP1 (C, D, A, B, 2, 17, 0x242070db)
     .OP1 (B, C, D, A, 3, 22, 0xc1bdceee)
     .OP1 (A, B, C, D, 4,  7, 0xf57c0faf)
     .OP1 (D, A, B, C, 5, 12, 0x4787c62a)
     .OP1 (C, D, A, B, 6, 17, 0xa8304613)
     .OP1 (B, C, D, A, 7, 22, 0xfd469501)
     .OP1 (A, B, C, D, 8,  7, 0x698098d8)
     .OP1 (D, A, B, C, 9, 12, 0x8b44f7af)
     .OP1 (C, D, A, B, 10,17, 0xffff5bb1)
     .OP1 (B, C, D, A, 11,22, 0x895cd7be)
     .OP1 (A, B, C, D, 12, 7, 0x6b901122)
     .OP1 (D, A, B, C, 13,12, 0xfd987193)
     .OP1 (C, D, A, B, 14,17, 0xa679438e)
     .OP1 (B, C, D, A, 15,22, 0x49b40821)

     # Round 2.
     .OP2 (A, B, C, D,  1,  5, 0xf61e2562)
     .OP2 (D, A, B, C,  6,  9, 0xc040b340)
     .OP2 (C, D, A, B, 11, 14, 0x265e5a51)
     .OP2 (B, C, D, A,  0, 20, 0xe9b6c7aa)
     .OP2 (A, B, C, D,  5,  5, 0xd62f105d)
     .OP2 (D, A, B, C, 10,  9, 0x02441453)
     .OP2 (C, D, A, B, 15, 14, 0xd8a1e681)
     .OP2 (B, C, D, A,  4, 20, 0xe7d3fbc8)
     .OP2 (A, B, C, D,  9,  5, 0x21e1cde6)
     .OP2 (D, A, B, C, 14,  9, 0xc33707d6)
     .OP2 (C, D, A, B,  3, 14, 0xf4d50d87)
     .OP2 (B, C, D, A,  8, 20, 0x455a14ed)
     .OP2 (A, B, C, D, 13,  5, 0xa9e3e905)
     .OP2 (D, A, B, C,  2,  9, 0xfcefa3f8)
     .OP2 (C, D, A, B,  7, 14, 0x676f02d9)
     .OP2 (B, C, D, A, 12, 20, 0x8d2a4c8a)

     # Round 3.
     .OP3 (A, B, C, D,  5,  4, 0xfffa3942)
     .OP3 (D, A, B, C,  8, 11, 0x8771f681)
     .OP3 (C, D, A, B, 11, 16, 0x6d9d6122)
     .OP3 (B, C, D, A, 14, 23, 0xfde5380c)
     .OP3 (A, B, C, D,  1,  4, 0xa4beea44)
     .OP3 (D, A, B, C,  4, 11, 0x4bdecfa9)
     .OP3 (C, D, A, B,  7, 16, 0xf6bb4b60)
     .OP3 (B, C, D, A, 10, 23, 0xbebfbc70)
     .OP3 (A, B, C, D, 13,  4, 0x289b7ec6)
     .OP3 (D, A, B, C,  0, 11, 0xeaa127fa)
     .OP3 (C, D, A, B,  3, 16, 0xd4ef3085)
     .OP3 (B, C, D, A,  6, 23, 0x04881d05)
     .OP3 (A, B, C, D,  9,  4, 0xd9d4d039)
     .OP3 (D, A, B, C, 12, 11, 0xe6db99e5)
     .OP3 (C, D, A, B, 15, 16, 0x1fa27cf8)
     .OP3 (B, C, D, A,  2, 23, 0xc4ac5665)

     # Round 4.
     .OP4 (A, B, C, D,  0,  6, 0xf4292244)
     .OP4 (D, A, B, C,  7, 10, 0x432aff97)
     .OP4 (C, D, A, B, 14, 15, 0xab9423a7)
     .OP4 (B, C, D, A,  5, 21, 0xfc93a039)
     .OP4 (A, B, C, D, 12,  6, 0x655b59c3)
     .OP4 (D, A, B, C,  3, 10, 0x8f0ccc92)
     .OP4 (C, D, A, B, 10, 15, 0xffeff47d)
     .OP4 (B, C, D, A,  1, 21, 0x85845dd1)
     .OP4 (A, B, C, D,  8,  6, 0x6fa87e4f)
     .OP4 (D, A, B, C, 15, 10, 0xfe2ce6e0)
     .OP4 (C, D, A, B,  6, 15, 0xa3014314)
     .OP4 (B, C, D, A, 13, 21, 0x4e0811a1)
     .OP4 (A, B, C, D,  4,  6, 0xf7537e82)
     .OP4 (D, A, B, C, 11, 10, 0xbd3af235)
     .OP4 (C, D, A, B,  2, 15, 0x2ad7d2bb)
     .OP4 (B, C, D, A,  9, 21, 0xeb86d391)

     A += A_save
     B += B_save
     C += C_save
     D += D_save

     splice buffer, $P0, 0, 16
     $I0 = buffer

     if $I0 > 0 goto md5_loop

     context[0] = A
     context[1] = B
     context[2] = C
     context[3] = D
 
     # _print_vals (A,B,C,D)
     # print "\n"

.end

###########################################################################

.sub _number_as_hex
     .param int number
     .param int word_size

      $P0 = new Array
      $P0 = 1
      $P0[0] = number

      $S1 = "%0"
      $S0 = word_size
      concat $S1, $S0 
      concat $S1, "lx"
 
      sprintf $S0, $S1, $P0

     .pcc_begin_return
      .return $S0
     .pcc_end_return
.end


###########################################################################

.sub _print_vals
     .param int A
     .param int B
     .param int C
     .param int D

     $P0 = new Array
     $P0 = 4
     $P0[0] = A
     $P0[1] = B
     $P0[2] = C
     $P0[3] = D

     sprintf $S0, "%08lx %08lx %08lx %08lx", $P0
     print $S0
.end

###########################################################################

.sub _md5_print
     .param Array context

     .local int A
     .local int B
     .local int C
     .local int D

     A = context[0]
     B = context[1]
     C = context[2]
     D = context[3]

     $P0 = _config()

     $I0 = $P0["bigendian"]

     if $I0 goto dont_swap

     .swap (A)
     .swap (B)
     .swap (C)
     .swap (D)

dont_swap:

     _print_vals (A,B,C,D)
.end

###########################################################################

# For debugging

.sub _print_buffer
     .param Array buffer
     .param int word_size

     .local int size

     size = buffer

     .local int counter
     .local int value

     counter = 0
print_buffer_loop:
     if counter >= size goto print_buffer_done
     value = buffer[counter]
     $S0 = _number_as_hex (value, word_size)
     print $S0
     print " | "
     counter = counter + 1
     goto print_buffer_loop
    
print_buffer_done:

     print "\n"
.end

Reply via email to