#!/bin/sh
# Version 0.91
# Released to the public domain by Daniel M. Webb, 2005
THIS=mirror-verify

if [ -z "$2" ]; then
    cat <<END_OF_HELP
$THIS - rigorously test a mirror to make sure it matches its source

usage: $THIS <source root> <destination root> [--existing] 
             [--filelist <filelist>]

where <source root> and <destination root> are rsync paths used like:

  % rsync <source root> <destination root>

  --existing means only check files that exist on source and destination

  <filelist> is a file to be used with the rsync --from0 and --files-from option.
  You probably don't want to use this option with --existing, since that ignores
  files that are in <filelist> but not on <destination root>.

  This script does not write to any files.

  Exit status 0 is returned and no text is returned if both sides are identical.

  If there are files that don't match, a report with the list of files is
  sent to stdout and exit status 1 is returned.  
END_OF_HELP
    exit 1
fi

function die() {
    echo "$*"
    exit 1
}

SOURCE="$1"
DEST="$2"
shift; shift

only_existing=false
if [ "$1" = --existing ]; then
    only_existing=true
    shift
fi

ipatterns=""
if [ ! -z "$1" ]; then
    [ $1 = '--filelist' ] || die "invalid option, expected --filelist"
    shift
    filelist="$1"
    [ -f "$filelist" ] || die "$filelist isn't a file"
    ipatterns="--from0 --files-from $filelist"
fi

failed=false
function rsync_filter() {
    result=$(echo -n "$1" | egrep -v 'building file list ... done' \
                          | egrep -v 'sent .* bytes.*received .* bytes .* bytes/sec' \
                          | egrep -v 'total size is.*speedup is.*')
    echo -n "$result"
}

# Find files that only exist on source
if [ $only_existing != true ]; then
    raw=$(rsync -avz --omit-dir-times --existing --ignore-existing --dry-run $ipatterns \
          "$SOURCE" "$DEST" 2>/dev/null)
    FILES1=$(rsync_filter "$raw")
    if [ ! -z "$FILES1" ]; then
        failed=true
        echo "$THIS: WARNING: Files on $SOURCE that don't exist on $DEST:"
        echo "$FILES1"
    fi
fi

# Find files that only exist on destination
if [ $only_existing != true ]; then
    raw=$(rsync -avz --omit-dir-times --delete --dry-run $ipatterns \
          "$SOURCE" "$DEST" 2>/dev/null)
    raw=$(echo -n "$raw" | egrep "^deleting " | sed -e 's/^deleting //')
    FILES2=$(rsync_filter "$raw")
    if [ ! -z "$FILES2" ]; then
        failed=true
        echo "$THIS: WARNING: Files on $DEST that don't exist on $SOURCE:"
        echo "$FILES2"
    fi
fi

# Find files that exist on both but whose checksums don't match
raw=$(rsync -az --omit-dir-times --existing --checksum --dry-run $ipatterns \
      "$SOURCE" "$DEST" 2>/dev/null)
FILES3=$(rsync_filter "$raw")
if [ ! -z "$FILES3" ]; then
    failed=true
    echo "$THIS: WARNING: Files on $SOURCE and $DEST that have checksum differences:" 
    echo "$FILES3"
fi

if [ $failed = true ]; then
    exit 1
else
    exit 0
fi

