#!/bin/bash
#
# script to compare random samples from two external store databases and report whether they match
# asks one db for the rowcount
# generates 2000 random numbers within the rowcount range
# selects the blob with the random ID, md5sums it from both DBs
# compares the md5sum
# reports how many match / fail
#
# example usage:
# ./verify-copy.sh 10.0.2.158 yowiki blobs es1002.eqiad.wmnet yowiki blobs_cluster10
# passwords
mysqluser="wikiadmin"
mysqlpass="xxxxxxxxx"
# get the database name and the two hostnames from the command line
hosta=$1
dbnamea=$2
tablea=$3
hostb=$4
dbnameb=$5
tableb=$6
# print help
if [[ -z "$hosta" || $hosta == '-h' || $hosta == '--help' ]]
then
echo "Compares random entries in the external database tables."
echo "Example usage:"
echo " ./verify-copy.sh 10.0.2.158 yowiki blobs es1002.eqiad.wmnet yowiki blobs_cluster10"
exit 0
fi
## find out how many rows we've got
num_rows=$(mysql --batch --skip-column-names -u $mysqluser -p$mysqlpass -h $hosta $dbnamea -e "select count(*) from $tablea;")
## select 2000 random rows (in 10 groups of 200 rows) and md5sum them, compare, record status
num_failed=0
num_succeeded=0
for j in {1..10}
do
idlist='1'
for i in {1..200}
do
id=$RANDOM
let "id %= $num_rows"
idlist="${idlist},$id"
done
suma=$(mysql --batch --skip-column-names -u $mysqluser -p$mysqlpass -h $hosta $dbnamea -e "select blob_text from $tablea where blob_id in ($idlist)" | md5sum | cut -f 1 -d\ )
sumb=$(mysql --batch --skip-column-names -u $mysqluser -p$mysqlpass -h $hostb $dbnameb -e "select blob_text from $tableb where blob_id in ($idlist)" | md5sum | cut -f 1 -d\ )
if [ $suma == $sumb ]
then
let "num_succeeded += 1"
else
let "num_failed += 1"
fi
done
echo "matched: $num_succeeded, failed: $num_failed"