Hi Guys,
for fun I've written md5sum using IMCC, and have attached my first cut.
It will need some further testing, but in the most part it works well. There are some limitations:
* 200K file limit imposed when reading in the file * 512MB(?) limit imposed by shortcut in algorithm * Haven't tested on 64 bit processors * Haven't tested on big endian
It would be nice to make sure that it works on all architectures as it would fit well into the parrot test suite; it would be good for both benchmarking and functionality testing.
The only problems have been:
* Using macros which call one another seemed to hang if argument names were reused
(There have been discussions on the future of macros, I know)
* /Possible/ IMCC register allocation bug which I need to reinvestigate (see earlier post)
(May actually have been the following JIT problem?)
* Results from i386 JIT can be corrupt and needs investigating (lots of FFs)
* Seem to get ICU/string errors on some binary files
* Some checksum discrepencies which I need to investigate
Cheers,
Nick
# Parrot md5sum; Nick Glencross <[EMAIL PROTECTED]> # # Based on md5.c, from md5sum # written by Ulrich Drepper <[EMAIL PROTECTED]>, 1995.
=head1 NAME md5sum.imc - calculate MD5 checksums =head1 SYNOPSIS =head1 DESCRIPTION This is a pure Parrot MD5 hash routine. =head1 SUBROUTINES =head2 _md5sum Pass in a string, returns a PMC Array with the result =head1 BUGS Only tested so far on i386. =over 4 =item * Might work on 64 bit platforms =item * Might not work on big endian systems =back =cut ########################################################################### # Main Harness to show that it works .sub _main .local Array args .local int maxsize args = P5 # Argument count $I0 = args $I0 = $I0 - 1 if $I0 > 0 goto has_args $S0 = args[0] printerr "(parrot) " printerr $S0 printerr " filename [filename ...]\n" end has_args: maxsize = 200000 $I1 = 1 next_iter: if $I1 > $I0 goto iter_done $S0 = args[$I1] open $P0, $S0, "<" defined $I2, $P0 if $I2 goto found printerr $S0 printerr ": Cannot find\n" goto iter_cont found: read $S1, $P0, maxsize close $P0 $I2 = length $S1 if $I2 < maxsize goto size_ok printerr "Too large\n" goto iter_cont size_ok: $P0 = _md5sum ($S1) _md5_print ($P0) print "\t" print $S0 print "\n" iter_cont: $I1 = $I1 + 1 goto next_iter iter_done: end .end ########################################################################### # Low-level macros used in MD5 # A parrot rol instruction might be good (as it can often be JIT'd) .macro rol (x,n, out) shl .out, .x, .n $I1000 = 32-.n $I1000 = .x >>> $I1000 bor .out, $I1000 .endm .macro FF (b,c,d) tmp = .c ~ .d tmp = .b & tmp tmp = .d ~ tmp .endm .macro FH (b,c,d) tmp = .b ~ .c tmp = tmp ~ .d .endm .macro FI (b,c,d) tmp = ~.d tmp = .b | tmp tmp = .c ~ tmp .endm ########################################################################### # Higher level MD5 operations .macro common (a, b, k, s, T) .a = .a + tmp .a = .a + .T tmp = buffer[.k] .a = .a + tmp .rol (.a, .s, tmp) .a = .b + tmp .endm .macro OP1 (aa,bb,cc,dd, kk, ss, TT) .FF (.bb,.cc,.dd) .common (.aa, .bb, .kk, .ss, .TT) .endm .macro OP2 (aa,bb,cc,dd, kk, ss, TT) .FF (.dd,.bb,.cc) .common (.aa, .bb, .kk, .ss, .TT) .endm .macro OP3 (aa,bb,cc,dd, kk, ss, TT) .FH (.bb,.cc,.dd) .common (.aa, .bb, .kk, .ss, .TT) .endm .macro OP4 (aa,bb,cc,dd, kk, ss, TT) .FI (.bb,.cc,.dd) .common (.aa, .bb, .kk, .ss, .TT) .endm ########################################################################### .macro swap (w) $I10 = .w & 0x000000ff $I11 = .w & 0x0000ff00 $I12 = .w & 0x00ff0000 $I13 = .w & 0xff000000 $I10 = $I10 << 24 $I11 = $I11 << 8 $I12 = $I12 >>> 8 $I13 = $I13 >>> 24 $I10 = $I10 | $I11 $I10 = $I10 | $I12 .w = $I10 | $I13 # For 64-bit architectures .w = .w & 0xffffffff .endm ########################################################################### .include "library/config.imc" .sub _md5sum .param string str $P0 = _config() $I0 = $P0["intsize"] if $I0 == 4 goto is_4byte_word printerr "This doesn't seem to be a 32 bit processor: " printerr "Please verify the MD5 checksum\n" is_4byte_word: $I0 = $P0["bigendian"] unless $I0 goto is_little_endian printerr "This appears to be a big endian processor: " printerr "Please verify the MD5 checksum\n" is_little_endian: .local Array buffer buffer = new Array $P0 = new Array $P0 = 7 _md5_create_buffer (str, buffer, $I0) # _print_buffer (buffer, 8) _md5_init ($P0) _md5_process_buffer ($P0, buffer) .pcc_begin_return .return $P0 .pcc_end_return .end ########################################################################### .sub _md5_init .param pmc context # Initial MD5 constants context[0] = 0x67452301 context[1] = 0xefcdab89 context[2] = 0x98badcfe context[3] = 0x10325476 .end ########################################################################### .sub _md5_create_buffer .param string str .param Array buffer .param int endian .local int counter .local int subcounter .local int slow_counter .local int word $I0 = length str $I1 = $I0 - 1 .local int words words = $I0 + 8 words = words | 63 words = words + 1 words = words / 4 buffer = words word = 0 counter = 0 subcounter = 0 slow_counter = 0 md5_create_buffer_loop: $I5 = counter + subcounter if $I5 > $I0 goto md5_create_buffer_break # MD5 pad character $I4 = 0x80 if $I5 > $I1 goto string_char $S0 = str[$I5] $I4 = ord $S0 string_char: word = word << 8 word = word | $I4 subcounter = subcounter + 1 if subcounter != 4 goto md5_create_buffer_loop if endian goto endian_ok .swap (word) endian_ok: buffer[slow_counter] = word word = 0 counter = counter + 4 subcounter = 0 slow_counter = slow_counter + 1 goto md5_create_buffer_loop md5_create_buffer_break: # Check for a partial word if subcounter == 0 goto complete subcounter = 4 - subcounter .local int shift shift = 8*subcounter word = word << shift if endian goto endian_ok2 .swap (word) endian_ok2: buffer[slow_counter] = word complete: $I1 = $I0 >>> 29 words = words - 1 buffer[words] = $I1 $I0 = $I0 << 3 $I0 = $I0 & 0xffffff words = words - 1 buffer[words] = $I0 .end ########################################################################### .sub _md5_process_buffer .param pmc context .param Array buffer .local int A .local int B .local int C .local int D .local int A_save .local int B_save .local int C_save .local int D_save .local int tmp $P0 = new Array A = context[0] B = context[1] C = context[2] D = context[3] md5_loop: A_save = A B_save = B C_save = C D_save = D # Round 1. .OP1 (A, B, C, D, 0, 7, 0xd76aa478) .OP1 (D, A, B, C, 1, 12, 0xe8c7b756) .OP1 (C, D, A, B, 2, 17, 0x242070db) .OP1 (B, C, D, A, 3, 22, 0xc1bdceee) .OP1 (A, B, C, D, 4, 7, 0xf57c0faf) .OP1 (D, A, B, C, 5, 12, 0x4787c62a) .OP1 (C, D, A, B, 6, 17, 0xa8304613) .OP1 (B, C, D, A, 7, 22, 0xfd469501) .OP1 (A, B, C, D, 8, 7, 0x698098d8) .OP1 (D, A, B, C, 9, 12, 0x8b44f7af) .OP1 (C, D, A, B, 10,17, 0xffff5bb1) .OP1 (B, C, D, A, 11,22, 0x895cd7be) .OP1 (A, B, C, D, 12, 7, 0x6b901122) .OP1 (D, A, B, C, 13,12, 0xfd987193) .OP1 (C, D, A, B, 14,17, 0xa679438e) .OP1 (B, C, D, A, 15,22, 0x49b40821) # Round 2. .OP2 (A, B, C, D, 1, 5, 0xf61e2562) .OP2 (D, A, B, C, 6, 9, 0xc040b340) .OP2 (C, D, A, B, 11, 14, 0x265e5a51) .OP2 (B, C, D, A, 0, 20, 0xe9b6c7aa) .OP2 (A, B, C, D, 5, 5, 0xd62f105d) .OP2 (D, A, B, C, 10, 9, 0x02441453) .OP2 (C, D, A, B, 15, 14, 0xd8a1e681) .OP2 (B, C, D, A, 4, 20, 0xe7d3fbc8) .OP2 (A, B, C, D, 9, 5, 0x21e1cde6) .OP2 (D, A, B, C, 14, 9, 0xc33707d6) .OP2 (C, D, A, B, 3, 14, 0xf4d50d87) .OP2 (B, C, D, A, 8, 20, 0x455a14ed) .OP2 (A, B, C, D, 13, 5, 0xa9e3e905) .OP2 (D, A, B, C, 2, 9, 0xfcefa3f8) .OP2 (C, D, A, B, 7, 14, 0x676f02d9) .OP2 (B, C, D, A, 12, 20, 0x8d2a4c8a) # Round 3. .OP3 (A, B, C, D, 5, 4, 0xfffa3942) .OP3 (D, A, B, C, 8, 11, 0x8771f681) .OP3 (C, D, A, B, 11, 16, 0x6d9d6122) .OP3 (B, C, D, A, 14, 23, 0xfde5380c) .OP3 (A, B, C, D, 1, 4, 0xa4beea44) .OP3 (D, A, B, C, 4, 11, 0x4bdecfa9) .OP3 (C, D, A, B, 7, 16, 0xf6bb4b60) .OP3 (B, C, D, A, 10, 23, 0xbebfbc70) .OP3 (A, B, C, D, 13, 4, 0x289b7ec6) .OP3 (D, A, B, C, 0, 11, 0xeaa127fa) .OP3 (C, D, A, B, 3, 16, 0xd4ef3085) .OP3 (B, C, D, A, 6, 23, 0x04881d05) .OP3 (A, B, C, D, 9, 4, 0xd9d4d039) .OP3 (D, A, B, C, 12, 11, 0xe6db99e5) .OP3 (C, D, A, B, 15, 16, 0x1fa27cf8) .OP3 (B, C, D, A, 2, 23, 0xc4ac5665) # Round 4. .OP4 (A, B, C, D, 0, 6, 0xf4292244) .OP4 (D, A, B, C, 7, 10, 0x432aff97) .OP4 (C, D, A, B, 14, 15, 0xab9423a7) .OP4 (B, C, D, A, 5, 21, 0xfc93a039) .OP4 (A, B, C, D, 12, 6, 0x655b59c3) .OP4 (D, A, B, C, 3, 10, 0x8f0ccc92) .OP4 (C, D, A, B, 10, 15, 0xffeff47d) .OP4 (B, C, D, A, 1, 21, 0x85845dd1) .OP4 (A, B, C, D, 8, 6, 0x6fa87e4f) .OP4 (D, A, B, C, 15, 10, 0xfe2ce6e0) .OP4 (C, D, A, B, 6, 15, 0xa3014314) .OP4 (B, C, D, A, 13, 21, 0x4e0811a1) .OP4 (A, B, C, D, 4, 6, 0xf7537e82) .OP4 (D, A, B, C, 11, 10, 0xbd3af235) .OP4 (C, D, A, B, 2, 15, 0x2ad7d2bb) .OP4 (B, C, D, A, 9, 21, 0xeb86d391) A += A_save B += B_save C += C_save D += D_save splice buffer, $P0, 0, 16 $I0 = buffer if $I0 > 0 goto md5_loop context[0] = A context[1] = B context[2] = C context[3] = D # _print_vals (A,B,C,D) # print "\n" .end ########################################################################### .sub _number_as_hex .param int number .param int word_size $P0 = new Array $P0 = 1 $P0[0] = number $S1 = "%0" $S0 = word_size concat $S1, $S0 concat $S1, "lx" sprintf $S0, $S1, $P0 .pcc_begin_return .return $S0 .pcc_end_return .end ########################################################################### .sub _print_vals .param int A .param int B .param int C .param int D $P0 = new Array $P0 = 4 $P0[0] = A $P0[1] = B $P0[2] = C $P0[3] = D sprintf $S0, "%08lx %08lx %08lx %08lx", $P0 print $S0 .end ########################################################################### .sub _md5_print .param Array context .local int A .local int B .local int C .local int D A = context[0] B = context[1] C = context[2] D = context[3] $P0 = _config() $I0 = $P0["bigendian"] if $I0 goto dont_swap .swap (A) .swap (B) .swap (C) .swap (D) dont_swap: _print_vals (A,B,C,D) .end ########################################################################### # For debugging .sub _print_buffer .param Array buffer .param int word_size .local int size size = buffer .local int counter .local int value counter = 0 print_buffer_loop: if counter >= size goto print_buffer_done value = buffer[counter] $S0 = _number_as_hex (value, word_size) print $S0 print " | " counter = counter + 1 goto print_buffer_loop print_buffer_done: print "\n" .end