#!/bin/bash # s3md5 Copyright (C) 2013 # Antonio Espinosa # # This file is part of s3md5 by Teachnova (www.teachnova.com) # # s3md5 is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # s3md5 is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with s3md5. If not, see . VERSION='1.2' DEBUG=0 ################################################################## # s3md5 paths root_path() { SOURCE="${BASH_SOURCE[0]}" DIR="$( dirname "$SOURCE" )" while [ -h "$SOURCE" ] do SOURCE="$( readlink "$SOURCE" )" [[ $SOURCE != /* ]] && SOURCE="$DIR/$SOURCE" DIR="$( cd -P "$( dirname "$SOURCE" )" && pwd )" done DIR="$( cd -P "$( dirname "$SOURCE" )" && pwd )" echo "$DIR" } is_integer() { local min="$2" local max="$3" if ! [[ "$1" =~ ^[0-9]+$ ]] ; then return 1 fi if [ -n "$2" ] && [ $2 -gt $1 ]; then return 1; fi if [ -n "$3" ] && [ $3 -lt $1 ]; then return 1; fi return 0 } license_show() { cat << LICENSE s3md5 v$VERSION Copyright (C) 2013 Antonio Espinosa - TeachNova This program comes with ABSOLUTELY NO WARRANTY. This is free software, and you are welcome to redistribute it GPLv3 license conditions. Read LICENSE.md for more details. LICENSE } short_help() { option=$1 if [[ -n "$option" ]]; then cat < [-v|--verbose] # Verify the S3 etag of a file $APP -e|--etag [-v|--verbose] # Short help $APP -h # Extended help $APP --help Example : # Use 15 MB chunk size (as default in s3cmd .s3cfg config file) # and get the etag of "myFile" $APP 15 myFile # Given the S3 etag, verify if it matches "myFile" $APP --etag e3ad981c2963eb533e3e58175cf1c966-8 64 myFile HELP } extended_help() { license_show cat << HELP Calculates the Etag/S3 MD5 Sum of a file, using the same algorithm that S3 uses on multipart uploaded files. Specially usefull on files bigger than 5GB uploaded using multipart S3 API. HELP short_help } ARGS=() function main() { if [ $# -gt 0 ]; then case "$1" in -h) short_help exit 1 ;; --help) extended_help exit 1 ;; -e|--etag) shift ETAG=$1 shift main "$@" ;; -H|--show-filename) shift SHOW_FILENAME=1 main "$@" ;; -v|--verbose) shift DEBUG=1 main "$@" ;; -*) short_help $1 exit 1 ;; *) ARGS+=("$1") shift main "$@" ;; esac else run "${ARGS[@]}" fi } cleanup() { $RM_BIN "$ERROR_FILE" $RM_BIN "$SUM_FILE" $RM_BIN "$BIN_FILE" } if [ "$(uname)" == "Darwin" ]; then IS_OSX=1 else IS_OSX=0 fi ROOT_PATH=`root_path` APP=`basename $0` ECHO_BIN=$(which echo) RM_BIN="$(which rm) -f" DD_BIN=$(which dd) if [ "$IS_OSX" -eq 0 ]; then MD5_BIN=$(which md5sum) else MD5_BIN=$(which md5) fi CUT_BIN=$(which cut) GREP_BIN=$(which grep) CAT_BIN=$(which cat) AWK_BIN=$(which awk) XXD_BIN=$(which xxd) get_chunks_from_etag() { $ECHO_BIN "$1" | cut -d'-' -f 2 } get_filesize_in_bytes() { du -m "$1" | cut -f 1 } get_chunk_size() { local filesize=$1 local chunks=$2 local min_chunk_size=`$ECHO_BIN "$filesize / $chunks" | bc` local max_chunk_size=`$ECHO_BIN "$filesize / ( $chunks - 1 )" | bc` $ECHO_BIN "With $chunks chunks, the possible chunk sizes are between $min_chunk_size MB and $max_chunk_size MB" } function run() { # Check parameters if [ $# -lt 2 ]; then short_help; exit 1; fi SIZE="$1" FILE="$2" local FILESIZE=`get_filesize_in_bytes "$FILE"` if [ $DEBUG -eq 1 ]; then $ECHO_BIN "File size: $FILESIZE MB" fi if [ -n "$ETAG" ]; then if [ $DEBUG -eq 1 ]; then local CHUNKS=`get_chunks_from_etag $ETAG` get_chunk_size $FILESIZE $CHUNKS fi fi } main "$@" if [ ! -f "$FILE" ]; then $ECHO_BIN "ERROR : File '$FILE' not found" exit 2 fi if ! is_integer "$SIZE"; then $ECHO_BIN "ERROR : Chunk size is not an integer number" exit 2 fi ERROR_FILE="/tmp/s3md5-error-$$.out" SUM_FILE="/tmp/s3md5-md5sumlist-$$.out" BIN_FILE="/tmp/s3md5-md5bin-$$.out" trap "{ cleanup; }" EXIT $ECHO_BIN -n > "$SUM_FILE" $ECHO_BIN -n > "$BIN_FILE" n=0 i=0 while true; do sum=`$DD_BIN bs=1024k count=$SIZE skip=$i if="$FILE" 2> "$ERROR_FILE" | $MD5_BIN | $CUT_BIN -d' ' -f1` if $GREP_BIN -q "^0 bytes " "$ERROR_FILE"; then # break this loop if 0 bytes read if [ $DEBUG -eq 1 ]; then $ECHO_BIN "END" fi break fi if $GREP_BIN -q "cannot skip" "$ERROR_FILE"; then # End of file, break this loop if [ $DEBUG -eq 1 ]; then $ECHO_BIN "END" fi break else if [ $DEBUG -eq 1 ]; then part=$((n+1)) to=$((i+SIZE)) $ECHO_BIN -n "SUM for part $part ($i to $to MB) ... " fi # Chunk read if [ $DEBUG -eq 1 ]; then $ECHO_BIN "OK - $sum" fi i=$((i+SIZE)) n=$((n+1)) # Add its md5 sum to sum list file $ECHO_BIN "$sum" >> "$SUM_FILE" fi done if [ $DEBUG -eq 1 ]; then $ECHO_BIN "Converting all md5sums to binary" fi # Convert all md5 sums (in hex text notation) to bin and concatenate them into a file $CAT_BIN "$SUM_FILE" | $AWK_BIN '{print $1}' | while read MD5; do $ECHO_BIN $MD5 | $XXD_BIN -r -p >> "$BIN_FILE"; done # Calculate MD5 sum of this binary file if [ "$IS_OSX" -eq 0 ]; then s3sum=`$MD5_BIN "$BIN_FILE" | $CUT_BIN -d' ' -f1` else s3sum=`$MD5_BIN "$BIN_FILE" | $CUT_BIN -d' ' -f4` fi if [ $DEBUG -eq 1 ]; then $ECHO_BIN -n "Etag/S3 MD5 Sum : " fi filename_in_output="" [[ -n ${SHOW_FILENAME} ]] && filename_in_output=" ${FILE}" if [ -n "$ETAG" ]; then if [ $DEBUG -eq 1 ]; then # Return Etag/S3 MD5 Sum $ECHO_BIN "${s3sum}-${n}${filename_in_output}" fi if [ "${s3sum}-${n}" == "$ETAG" ]; then $ECHO_BIN "TRUE" exit 0 else $ECHO_BIN "FALSE" exit 1 fi else # Return Etag/S3 MD5 Sum $ECHO_BIN "${s3sum}-${n}${filename_in_output}" fi