Revamp check_format_compatible.sh (#8012)

Summary:
* Adds backup/restore forward/backward compatibility testing
* Adds forward/backward compatibility testing to sst ingestion
* More structure sharing and comments for the lists of branches
comprising each group
* Less reliant on invariants between groups with de-duplication logic
* Restructured for n+1 branch checkout+build steps rather than something
like 3n. Should be much faster despite more checks.

And to make manual runs easier

* On success, restores working trees to original working branch (aborts
early if uncommitted changes) and deletes temporary branch & remote
* Adds SHORT_TEST=1 mode that uses only the oldest version for each
* Adds USE_SSH=1 to use ssh instead of https for github
group

Pull Request resolved: https://github.com/facebook/rocksdb/pull/8012

Test Plan:
a number of manual tests, mostly with SHORT_TEST=1. Using one
version older for any of the groups (except I didn't check
db_backward_only_refs) fails. Changing default format_version to 5
(planned) without updating this script fails as it should, and passes
with appropriate update. Full local run passed (had to remove "2.7.fb.branch"
due to compiler issues, also before this change).

Reviewed By: riversand963

Differential Revision: D26735840

Pulled By: pdillinger

fbshipit-source-id: 1320c22de5674760657e385aa42df9fade8b6fff
main
Peter Dillinger 3 years ago committed by Facebook GitHub Bot
parent a46f080cce
commit a9046f3c45
  1. 15
      tools/backup_db.sh
  2. 330
      tools/check_format_compatible.sh
  3. 15
      tools/restore_db.sh
  4. 1
      tools/write_external_sst.sh

@ -0,0 +1,15 @@
#!/usr/bin/env bash
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
#
#
if [ "$#" -lt 2 ]; then
echo "usage: ${BASH_SOURCE[0]} <DB Path> <Backup Dir>"
exit 1
fi
db_dir="$1"
backup_dir="$2"
echo "== Backing up DB $db_dir to $backup_dir"
./ldb backup --db="$db_dir" --backup_dir="$backup_dir"

@ -1,24 +1,82 @@
#!/usr/bin/env bash
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
#
# A shell script to load some pre generated data file to a DB using ldb tool
# ./ldb needs to be avaible to be executed.
# A shell script to build and run different versions of ldb to check for
# expected forward and backward compatibility with "current" version. The
# working copy must have no uncommitted changes.
#
# Usage: <SCRIPT> [ref_for_current]
# `ref_for_current` can be a revision, tag, commit or branch name. Default is HEAD.
#
# Usage: <SCRIPT> [checkout]
# `checkout` can be a tag, commit or branch name. Will build using it and check DBs generated by all previous branches (or tags for very old versions without branch) can be opened by it.
# Return value 0 means all regression tests pass. 1 if not pass.
#
# Environment options:
# SHORT_TEST=1 - Test only the oldest branch for each kind of test. This is
# a good choice for PR validation as it is relatively fast and will find
# most issues.
# USE_SSH=1 - Connect to GitHub with ssh instead of https
if ! git diff-index --quiet HEAD; then
echo "You have uncommitted changes. Aborting."
exit 1
fi
current_checkout_name=${1:-HEAD}
# This allows the script to work even if with transient refs like "HEAD"
current_checkout_hash="$(git rev-parse --quiet --verify $current_checkout_name)"
if [ "$current_checkout_hash" == "" ]; then
echo "Not a recognized ref: $current_checkout_name"
exit 1
fi
# To restore to prior branch at the end
orig_branch="$(git rev-parse --abbrev-ref HEAD)"
tmp_branch=_tmp_format_compatible
tmp_origin=_tmp_origin
# Don't depend on what current "origin" might be
set -e
git remote remove $tmp_origin 2>/dev/null || true
if [ "$USE_SSH" ]; then
git remote add $tmp_origin "git@github.com:facebook/rocksdb.git"
else
git remote add $tmp_origin "https://github.com/facebook/rocksdb.git"
fi
git fetch $tmp_origin
scriptpath=`dirname $BASH_SOURCE`
test_dir=${TEST_TMPDIR:-"/tmp"}"/format_compatible_check"
cleanup() {
echo "== Cleaning up"
git checkout "$orig_branch" || true
git branch -D $tmp_branch || true
git remote remove $tmp_origin || true
}
trap cleanup EXIT # Always clean up, even on failure or Ctrl+C
scriptpath=`dirname ${BASH_SOURCE[0]}`
test_dir=${TEST_TMPDIR:-"/tmp"}"/rocksdb_format_compatible_$USER"
rm -rf ${test_dir:?}
# For saving current version of scripts as we checkout different versions to test
script_copy_dir=$test_dir"/script_copy"
input_data_path=$test_dir"/test_data_input/"
mkdir -p $script_copy_dir
cp -f $scriptpath/*.sh $script_copy_dir
python_bin=$(which python3 || which python || echo python3)
# For shared raw input data
input_data_path=$test_dir"/test_data_input"
mkdir -p $input_data_path
# For external sst ingestion test
ext_test_dir=$test_dir"/ext"
mkdir -p $ext_test_dir
# For DB dump test
db_test_dir=$test_dir"/db"
mkdir -p $db_test_dir
# For backup/restore test (uses DB test)
bak_test_dir=$test_dir"/bak"
mkdir -p $bak_test_dir
mkdir $test_dir || true
mkdir $input_data_path || true
rm -rf $script_copy_dir
cp $scriptpath $script_copy_dir -rf
python_bin=$(which python3 || which python || echo python3)
# Generate random files.
for i in {1..6}
@ -55,11 +113,55 @@ with open('${sorted_input_data}', 'w') as f:
print(k + " ==> " + v, file=f)
EOF
declare -a backward_compatible_checkout_objs=("2.2.fb.branch" "2.3.fb.branch" "2.4.fb.branch" "2.5.fb.branch" "2.6.fb.branch" "2.7.fb.branch" "2.8.1.fb" "3.0.fb.branch" "3.1.fb" "3.2.fb" "3.3.fb" "3.4.fb" "3.5.fb" "3.6.fb" "3.7.fb" "3.8.fb" "3.9.fb" "4.2.fb" "4.3.fb" "4.4.fb" "4.5.fb" "4.6.fb" "4.7.fb" "4.8.fb" "4.9.fb" "4.10.fb" "4.11.fb" "4.12.fb" "4.13.fb" "5.0.fb" "5.1.fb" "5.2.fb" "5.3.fb" "5.4.fb" "5.5.fb" "5.6.fb" "5.7.fb" "5.8.fb" "5.9.fb" "5.10.fb" "5.11.fb" "5.12.fb" "5.13.fb" "5.14.fb" "5.15.fb")
declare -a forward_compatible_checkout_objs=() # N/A at the moment
declare -a forward_compatible_with_options_checkout_objs=("5.16.fb" "5.17.fb" "5.18.fb" "6.0.fb" "6.1.fb" "6.2.fb" "6.3.fb" "6.4.fb" "6.5.fb" "6.6.fb" "6.7.fb" "6.8.fb" "6.9.fb" "6.10.fb" "6.11.fb" "6.12.fb" "6.13.fb" "6.14.fb" "6.15.fb" "6.16.fb" "6.17.fb")
declare -a checkout_objs=(${backward_compatible_checkout_objs[@]} ${forward_compatible_checkout_objs[@]} ${forward_compatible_with_options_checkout_objs[@]})
declare -a extern_sst_ingestion_compatible_checkout_objs=("5.16.fb" "5.17.fb" "5.18.fb" "6.0.fb" "6.1.fb" "6.2.fb" "6.3.fb" "6.4.fb" "6.5.fb" "6.6.fb" "6.7.fb" "6.8.fb" "6.9.fb" "6.10.fb" "6.11.fb" "6.12.fb" "6.13.fb" "6.14.fb" "6.15.fb" "6.16.fb" "6.17.fb")
# db_backward_only_refs defined below the rest
# To check for DB forward compatibility with loading options (old version
# reading data from new), as well as backward compatibility
declare -a db_forward_with_options_refs=("5.16.fb" "5.17.fb" "5.18.fb" "6.0.fb" "6.1.fb" "6.2.fb" "6.3.fb" "6.4.fb" "6.5.fb" "6.6.fb" "6.7.fb" "6.8.fb" "6.9.fb" "6.10.fb" "6.11.fb" "6.12.fb" "6.13.fb" "6.14.fb" "6.15.fb" "6.16.fb" "6.17.fb")
# To check for DB forward compatibility without loading options (in addition
# to the "with loading options" set), as well as backward compatibility
declare -a db_forward_no_options_refs=() # N/A at the moment
# To check for SST ingestion backward compatibility (new version reading
# data from old) (ldb ingest_extern_sst added in 5.16.x, back-ported to
# 5.14.x, 5.15.x)
declare -a ext_backward_only_refs=("5.14.fb" "5.15.fb")
# To check for SST ingestion forward compatibility (old version reading
# data from new) as well as backward compatibility
declare -a ext_forward_refs=("${db_forward_no_options_refs[@]}" "${db_forward_with_options_refs[@]}")
# To check for backup backward compatibility (new version reading data
# from old) (ldb backup/restore added in 4.11.x)
declare -a bak_backward_only_refs=("4.11.fb" "4.12.fb" "4.13.fb" "5.0.fb" "5.1.fb" "5.2.fb" "5.3.fb" "5.4.fb" "5.5.fb" "5.6.fb" "5.7.fb" "5.8.fb" "5.9.fb" "5.10.fb" "5.11.fb" "5.12.fb" "5.13.fb" "${ext_backward_only_refs[@]}")
# To check for backup forward compatibility (old version reading data
# from new) as well as backward compatibility
declare -a bak_forward_refs=("${db_forward_no_options_refs[@]}" "${db_forward_with_options_refs[@]}")
# Branches (git refs) to check for DB backward compatibility (new version
# reading data from old) (in addition to the "forward compatible" list)
# NOTE: 2.7.fb.branch shows assertion violation in some configurations
declare -a db_backward_only_refs=("2.2.fb.branch" "2.3.fb.branch" "2.4.fb.branch" "2.5.fb.branch" "2.6.fb.branch" "2.7.fb.branch" "2.8.1.fb" "3.0.fb.branch" "3.1.fb" "3.2.fb" "3.3.fb" "3.4.fb" "3.5.fb" "3.6.fb" "3.7.fb" "3.8.fb" "3.9.fb" "4.2.fb" "4.3.fb" "4.4.fb" "4.5.fb" "4.6.fb" "4.7.fb" "4.8.fb" "4.9.fb" "4.10.fb" "${bak_backward_only_refs[@]}")
if [ "$SHORT_TEST" ]; then
# Use only the first (if exists) of each list
db_backward_only_refs=(${db_backward_only_refs[0]})
db_forward_no_options_refs=(${db_forward_no_options_refs[0]})
db_forward_with_options_refs=(${db_forward_with_options_refs[0]})
ext_backward_only_refs=(${ext_backward_only_refs[0]})
ext_forward_refs=(${ext_forward_refs[0]})
bak_backward_only_refs=(${bak_backward_only_refs[0]})
bak_forward_refs=(${bak_forward_refs[0]})
fi
# De-duplicate & accumulate
declare -a checkout_refs=()
for checkout_ref in "${db_backward_only_refs[@]}" "${db_forward_no_options_refs[@]}" "${db_forward_with_options_refs[@]}" "${ext_backward_only_refs[@]}" "${ext_forward_refs[@]}" "${bak_backward_only_refs[@]}" "${bak_forward_refs[@]}"
do
if [ ! -e $db_test_dir/$checkout_ref ]; then
mkdir -p $db_test_dir/$checkout_ref
checkout_refs+=($checkout_ref)
fi
done
generate_db()
{
@ -105,89 +207,155 @@ ingest_external_sst()
set -e
}
# Sandcastle sets us up with a remote that is just another directory on the same
# machine and doesn't have our branches. Need to fetch them so checkout works.
# Remote add may fail if added previously (we don't cleanup).
git remote add github_origin "https://github.com/facebook/rocksdb.git"
set -e
git fetch github_origin
backup_db()
{
set +e
$script_copy_dir/backup_db.sh $1 $2
if [ $? -ne 0 ]; then
echo ==== Error backing up DB $1 to $2 ====
exit 1
fi
set -e
}
# Compatibility test for external SST file ingestion
for checkout_obj in "${extern_sst_ingestion_compatible_checkout_objs[@]}"
do
echo == Generating DB with extern SST file in "$checkout_obj" ...
git checkout github_origin/$checkout_obj -b $checkout_obj
make clean
DISABLE_WARNING_AS_ERROR=1 make ldb -j32
write_external_sst $input_data_path $test_dir/$checkout_obj $test_dir/$checkout_obj
ingest_external_sst $test_dir/$checkout_obj $test_dir/$checkout_obj
done
restore_db()
{
set +e
$script_copy_dir/restore_db.sh $1 $2
if [ $? -ne 0 ]; then
echo ==== Error restoring from $1 to $2 ====
exit 1
fi
set -e
}
member_of_array()
{
local e match="$1"
shift
for e; do [[ "$e" == "$match" ]] && return 0; done
return 1
}
# General structure from here:
# * Check out, build, and do stuff with the "current" branch.
# * For each older branch under consideration,
# * Check out, build, and do stuff with it, potentially using data
# generated from "current" branch.
# * (Again) check out, build, and do (other) stuff with the "current"
# branch, potentially using data from older branches.
#
# This way, we only do at most n+1 checkout+build steps, without the
# need to stash away executables.
checkout_flag=${1:-"master"}
# Decorate name
current_checkout_name="$current_checkout_name ($current_checkout_hash)"
echo == Building $checkout_flag debug
git checkout github_origin/$checkout_flag -b tmp-$checkout_flag
echo "== Building $current_checkout_name debug"
git checkout -B $tmp_branch $current_checkout_hash
make clean
DISABLE_WARNING_AS_ERROR=1 make ldb -j32
compare_base_db_dir=$test_dir"/base_db_dir"
write_external_sst $input_data_path $compare_base_db_dir $compare_base_db_dir
ingest_external_sst $compare_base_db_dir $compare_base_db_dir
for checkout_obj in "${extern_sst_ingestion_compatible_checkout_objs[@]}"
echo "== Using $current_checkout_name, generate DB with extern SST and ingest"
current_ext_test_dir=$ext_test_dir"/current"
write_external_sst $input_data_path ${current_ext_test_dir}_pointless $current_ext_test_dir
ingest_external_sst ${current_ext_test_dir}_ingest $current_ext_test_dir
echo "== Generating DB from $current_checkout_name ..."
current_db_test_dir=$db_test_dir"/current"
generate_db $input_data_path $current_db_test_dir
echo "== Creating backup of DB from $current_checkout_name ..."
current_bak_test_dir=$bak_test_dir"/current"
backup_db $current_db_test_dir $current_bak_test_dir
for checkout_ref in "${checkout_refs[@]}"
do
echo == Build "$checkout_obj" and try to open DB generated using $checkout_flag
git checkout $checkout_obj
echo "== Building $checkout_ref debug"
git reset --hard $tmp_origin/$checkout_ref
make clean
DISABLE_WARNING_AS_ERROR=1 make ldb -j32
compare_db $test_dir/$checkout_obj $compare_base_db_dir db_dump.txt 1 1
git checkout tmp-$checkout_flag
# Clean up
git branch -D $checkout_obj
done
echo == Finish compatibility test for SST ingestion.
# We currently assume DB backward compatibility for every branch listed
echo "== Use $checkout_ref to generate a DB ..."
generate_db $input_data_path $db_test_dir/$checkout_ref
for checkout_obj in "${checkout_objs[@]}"
do
echo == Generating DB from "$checkout_obj" ...
git checkout github_origin/$checkout_obj -b $checkout_obj
make clean
DISABLE_WARNING_AS_ERROR=1 make ldb -j32
generate_db $input_data_path $test_dir/$checkout_obj
done
if member_of_array "$checkout_ref" "${ext_backward_only_refs[@]}" ||
member_of_array "$checkout_ref" "${ext_forward_refs[@]}"
then
echo "== Use $checkout_ref to generate DB with extern SST file"
write_external_sst $input_data_path $ext_test_dir/${checkout_ref}_pointless $ext_test_dir/$checkout_ref
fi
if member_of_array "$checkout_ref" "${ext_forward_refs[@]}"
then
echo "== Use $checkout_ref to ingest extern SST file and compare vs. $current_checkout_name"
ingest_external_sst $ext_test_dir/${checkout_ref}_ingest $ext_test_dir/$checkout_ref
compare_db $ext_test_dir/${checkout_ref}_ingest ${current_ext_test_dir}_ingest db_dump.txt 1 1
rm -rf ${ext_test_dir:?}/${checkout_ref}_ingest
echo "== Use $checkout_ref to ingest extern SST file from $current_checkout_name"
ingest_external_sst $ext_test_dir/${checkout_ref}_ingest $current_ext_test_dir
compare_db $ext_test_dir/${checkout_ref}_ingest ${current_ext_test_dir}_ingest db_dump.txt 1 1
fi
if member_of_array "$checkout_ref" "${db_forward_no_options_refs[@]}" ||
member_of_array "$checkout_ref" "${db_forward_with_options_refs[@]}"
then
echo "== Use $checkout_ref to open DB generated using $current_checkout_name..."
compare_db $db_test_dir/$checkout_ref $current_db_test_dir forward_${checkout_ref}_dump.txt 0
fi
checkout_flag=${1:-"master"}
if member_of_array "$checkout_ref" "${db_forward_with_options_refs[@]}"
then
echo "== Use $checkout_ref to open DB generated using $current_checkout_name with its options..."
compare_db $db_test_dir/$checkout_ref $current_db_test_dir forward_${checkout_ref}_dump.txt 1 1
fi
echo == Building $checkout_flag debug
git checkout tmp-$checkout_flag
if member_of_array "$checkout_ref" "${bak_backward_only_refs[@]}" ||
member_of_array "$checkout_ref" "${bak_forward_refs[@]}"
then
echo "== Use $checkout_ref to backup DB"
backup_db $db_test_dir/$checkout_ref $bak_test_dir/$checkout_ref
fi
if member_of_array "$checkout_ref" "${bak_forward_refs[@]}"
then
echo "== Use $checkout_ref to restore DB from $current_checkout_name"
rm -rf ${db_test_dir:?}/$checkout_ref
restore_db $current_bak_test_dir $db_test_dir/$checkout_ref
compare_db $db_test_dir/$checkout_ref $current_db_test_dir forward_${checkout_ref}_dump.txt 0
fi
done
echo "== Building $current_checkout_name debug (again, final)"
git reset --hard $current_checkout_hash
make clean
DISABLE_WARNING_AS_ERROR=1 make ldb -j32
compare_base_db_dir=$test_dir"/base_db_dir"
echo == Generate compare base DB to $compare_base_db_dir
generate_db $input_data_path $compare_base_db_dir
for checkout_obj in "${checkout_objs[@]}"
for checkout_ref in "${checkout_refs[@]}"
do
echo == Opening DB from "$checkout_obj" using debug build of $checkout_flag ...
compare_db $test_dir/$checkout_obj $compare_base_db_dir db_dump.txt 1 0
done
# We currently assume DB backward compatibility for every branch listed
echo "== Use $current_checkout_name to open DB generated using $checkout_ref..."
compare_db $db_test_dir/$checkout_ref $current_db_test_dir db_dump.txt 1 0
for checkout_obj in "${forward_compatible_checkout_objs[@]}"
do
echo == Build "$checkout_obj" and try to open DB generated using $checkout_flag...
git checkout $checkout_obj
make clean
DISABLE_WARNING_AS_ERROR=1 make ldb -j32
compare_db $test_dir/$checkout_obj $compare_base_db_dir forward_${checkout_obj}_dump.txt 0
done
if member_of_array "$checkout_ref" "${ext_backward_only_refs[@]}" ||
member_of_array "$checkout_ref" "${ext_forward_refs[@]}"
then
rm -rf ${ext_test_dir:?}/${checkout_ref}_ingest
echo "== Use $current_checkout_name to ingest extern SST file from $checkout_ref"
ingest_external_sst $ext_test_dir/${checkout_ref}_ingest $current_ext_test_dir
compare_db $ext_test_dir/${checkout_ref}_ingest ${current_ext_test_dir}_ingest db_dump.txt 1 1
fi
for checkout_obj in "${forward_compatible_with_options_checkout_objs[@]}"
do
echo == Build "$checkout_obj" and try to open DB generated using $checkout_flag with its options...
git checkout $checkout_obj
make clean
DISABLE_WARNING_AS_ERROR=1 make ldb -j32
compare_db $test_dir/$checkout_obj $compare_base_db_dir forward_${checkout_obj}_dump.txt 1 1
if member_of_array "$checkout_ref" "${bak_backward_only_refs[@]}" ||
member_of_array "$checkout_ref" "${bak_forward_refs[@]}"
then
echo "== Use $current_checkout_name to restore DB from $checkout_ref"
rm -rf ${db_test_dir:?}/$checkout_ref
restore_db $bak_test_dir/$checkout_ref $db_test_dir/$checkout_ref
compare_db $db_test_dir/$checkout_ref $current_db_test_dir db_dump.txt 1 0
fi
done
echo ==== Compatibility Test PASSED ====

@ -0,0 +1,15 @@
#!/usr/bin/env bash
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
#
#
if [ "$#" -lt 2 ]; then
echo "usage: ${BASH_SOURCE[0]} <Backup Dir> <DB Path>"
exit 1
fi
backup_dir="$1"
db_dir="$2"
echo "== Restoring latest from $backup_dir to $db_dir"
./ldb restore --db="$db_dir" --backup_dir="$backup_dir"

@ -12,6 +12,7 @@ input_data_dir=$1
db_dir=$2
extern_sst_dir=$3
rm -rf $db_dir
mkdir -p $extern_sst_dir
set -e

Loading…
Cancel
Save