i corrected som typos, and cleaned up the final section of code. Mike
################################################################ # 2005-09-16 # This is a worksheet that I developed to dissect intact all # 116 jpgs from about 282MB of an accidentally reformatted # and partially overwritten 512MB CF card (Olympus c5050) # # The 282MB turned out to be un-fragmented, meaning that # each JPG resided in a continuous stretch of disk space. # # Working in a bash shell, the basic approach is to use sed to # make the jpg begin/close markers grep-able, then use grep to # identify the jpgs' byte positions, then use dd to grab the # jpg. # ######## # # sed --version # GNU sed version 4.1.2 # grep --version # grep (GNU grep) 2.5.1 # bash --version # GNU bash, version 3.00.0(1)-release (i586-suse-linux) # # FF D8 is the beginning marker of a jpg. # FF D8 is the closing marker of a jpg # Since each jpg contains an embedded jpg thumbnail, there will # be a nested pair of markers. # # grep's -b option will report the byte-offset of the line # containing the match, not the offset of the match itself. # # This page got me started (thanks TsuruZoh Tachibanaya): # # http://www.media.mit.edu/pia/Research/deepview/exif.html # # my gmail addresses are mike.n.kplug or z23r751 # ################################################################ ########## MAKE A WORKING COPY OF THE FLASH CARD cat /dev/sda1 > CF ########## MAKE A SMALLER FILE TO WORK WITH ########## THIS STEP IS OPTIONAL # find the byte offset of the first jpg residing # in the not-overwritten half of the card # grab lines containing either the begin marker or exif # date/time info. hexdump -C CF | grep -e "\ ff\ d8\ \|[0-9]:[0-9][0-9]\|[0-9][0-9]:[0-9]" > CF_grepped_hexdump # The hexdump takes a few minutes for 512MB # # To locate first deleted jpg, hand search the output paying # attention to exif date/time strings in the ascii column. # # Each line generated by hexdump begins with a hexadecimal # number that indicates the byte-offset, such as 0d6bf600 # # the following line converts that number to base-ten printf "%d" 0x0d6bf600 # Calculate the number of bytes in the file that # follow the offset: filesize - byteoffset tail -c 287271936 CF > CF_short # Verify the short file starts with the jpg marker FF D8 hexdump -C CF_short | head ########## MAKE FILES THAT ARE GREP-ABLE for FF D8 and FF D9 # verify that ^BEGIN or ^CLOSE will be unique (grep should # grep should not find any matches) grep ^BEGIN CF_short grep ^CLOSE CF_short # - 0x0A is the newline character cat CF_short | sed -e 's/\xFF\xD8/\x0ABEGIN/' > SED_begin_1 cat CF_short | sed -e 's/\xFF\xD9/\x0ACLOSE/' > SED_close_1 # Note that sed will replace 2 bytes with 6 bytes. Note also # that the byte offests for the close markers will indicate # the beginning of the 2-byte markers, not their ends. These # 2 issues will need to be accounted for later. ########## GRAB IMPORTANT LINES: # "strings" strips non-printable characters. # The 2nd grep filters out unwanted lines. # The 2nd sed leaves only the byte offset. grep -a -b ^BEGIN SED_begin_1 | strings | grep BEGIN | sed -e 's#:BEGIN.*$##' > SED_begin_2 grep -a -b ^CLOSE SED_close_1 | strings | grep CLOSE | sed -e 's#:CLOSE.*$##' > SED_close_2 wc SED_begin_2 # 232 232 2217 SED_begin_2 wc SED_close_2 # 2391 2391 23802 SED_close_2 # note the excess number of CLOSEs, presumably left over # from previous uses of the flash card. these excess # CLOSEs occur (?) at the end of the CF card, and in the # not-overwritten space that exists between the jpgs ############ INSPECT THE BYTE-DISTANCE BETWEEN SUCCESSIVE BEGINS ############ THIS STEP IS OPTIONAL old_offset=0; n_begin=0; for i in `cat SED_begin_2`; do (( n_begin += 1 )); new_offset=${i/\:BEGIN*/}; distance=$(( new_offset - old_offset )); # if [ $distance -lt 4096 ]; #then printf "%4d " $n_begin ; printf "%10d %10d " $new_offset $old_offset; printf "%10d\n" $distance; # fi; old_offset=$new_offset; done # everything looks good so far (due to exif # header data, every other distance is 4096+4) ########## ADJUST THE BYTE OFFSETS # Subtract 4(n-1) bytes from the nth offset # Add 2 bytes to the close offsets to include FF D9 # Subtract 1 because grep's byte-offsets are 1-based # whereas dd is 0-based rm -f SED_begin_3 nn=-1; for i in `cat SED_begin_2`; do (( nn += 1)); echo $(( i - 4 * nn - 1 )) >> SED_begin_3; done rm -f SED_close_3 nn=-1; for i in `cat SED_close_2`; do (( nn += 1)); echo $(( 2 + i - 4 * nn - 1 )) >> SED_close_3; done ########## GRAB EVERY OTHER LINE, BEGINNING WITH 1ST # this is necessary due to the embedded jpg thumbnail cat SED_begin_3 | sed -n '1~2p' > SED_begin_4 wc SED_begin_4 # 116 116 1107 SED_begin_2v # looks like 116 jpgs will be recovered !! ############ CALCULATE THE EXTENT OF EACH JPG # if this code runs smoothly, uncomment the CPU intensive # dd command and run it again to dissect out the jpgs. rm -f recovered*jpg; old_close_offset=0; n_begin=0; for begin_offset in `cat SED_begin_4`; do (( n_begin += 1 )); # Now find the second closing marker. The first # closing marker belongs to the embedded thumbnail found=0; n_close=0; for close_offset in `cat SED_close_3`; do (( n_close += 1 )); if [ $close_offset -gt $begin_offset ]; then found=$(( found + 1 )); if [ $found -eq 2 ]; then break; fi; fi; done; size_of_jpg=$(( $close_offset - $begin_offset )); size_of_gap=$(( $begin_offset - $old_close_offset )); fn=`printf "recovered_%04d.jpg" $n_begin`; printf "%12s " $fn; printf "%5d %5d " $n_begin $n_close; printf "%10d %10d " $size_of_gap $size_of_jpg; printf "%10d %10d\n" $begin_offset $close_offset; old_close_offset=$close_offset; # this dd command is really slow; dd bs=1c cbs=1c skip=${begin_offset}c count=${size_of_jpg}c if=CF_short of=$fn; done -- [email protected] http://www.kernel-panic.org/cgi-bin/mailman/listinfo/kplug-list
