Add cdssync migration test dataset tooling

Add the cdssync migration test dataset manifest, generator script,
workspace instructions, and gitignore.

This sets the default workflow to:
- generate the dataset locally
- copy it to the test machine with metadata preserved
- verify the copied data before migration testing
This commit is contained in:
2026-04-20 11:49:41 -04:00
parent 4f56ff9c4d
commit bb1cb37dc2
4 changed files with 458 additions and 0 deletions

View File

@@ -0,0 +1,275 @@
#!/usr/bin/env bash
set -euo pipefail
usage() {
cat <<'EOF'
Usage:
generate_migration_test_dataset.sh TARGET_DIR
Creates a compact filesystem migration test dataset under TARGET_DIR.
The dataset matches the manifest in migration-test-manifest.md.
Notes:
- Existing TARGET_DIR contents are left in place unless they collide.
- ACL and xattr cases are created only if the local tools are available.
- Sparse files are created with logical size but low physical allocation.
EOF
}
if [[ $# -ne 1 ]]; then
usage
exit 1
fi
TARGET_DIR=$1
ROOT=$(realpath -m "$TARGET_DIR")
mkdir -p "$ROOT"
have_setfacl=0
have_setfattr=0
if command -v setfacl >/dev/null 2>&1; then
have_setfacl=1
fi
if command -v setfattr >/dev/null 2>&1; then
have_setfattr=1
fi
create_dir() {
mkdir -p "$ROOT/$1"
}
set_times() {
local rel=$1
local stamp=$2
touch -a -m -t "$stamp" "$ROOT/$rel"
}
write_text() {
local path=$1
local mib=$2
local bytes=$((mib * 1024 * 1024))
perl -e '
my ($target, $label) = @ARGV;
my $chunk = "Migration text payload for $label\n";
while (length($chunk) < 8192) { $chunk .= $chunk; }
while ($target > 0) {
my $part = substr($chunk, 0, $target > length($chunk) ? length($chunk) : $target);
print $part;
$target -= length($part);
}
' "$bytes" "$path" >"$ROOT/$path"
}
write_compressible() {
local path=$1
local mib=$2
local bytes=$((mib * 1024 * 1024))
perl -e '
my ($target) = @ARGV;
my $chunk = "A" x 8192;
while ($target > 0) {
my $part = substr($chunk, 0, $target > length($chunk) ? length($chunk) : $target);
print $part;
$target -= length($part);
}
' "$bytes" >"$ROOT/$path"
}
write_random() {
local path=$1
local mib=$2
dd if=/dev/urandom of="$ROOT/$path" bs=1M count="$mib" status=none
}
write_script() {
local path=$1
local mib=$2
cat >"$ROOT/$path" <<'EOF'
#!/usr/bin/env bash
echo "migration test script"
EOF
local current_size
current_size=$(wc -c <"$ROOT/$path")
local target_size=$((mib * 1024 * 1024))
if (( current_size < target_size )); then
dd if=/dev/zero bs=1 count=$((target_size - current_size)) status=none | tr '\0' '#' >>"$ROOT/$path"
fi
}
write_empty() {
: >"$ROOT/$1"
}
write_sparse() {
local path=$1
local mib=$2
truncate -s "${mib}M" "$ROOT/$path"
}
apply_mode() {
chmod "$2" "$ROOT/$1"
}
make_file() {
local path=$1
local type=$2
local mib=$3
local mode=$4
create_dir "$(dirname "$path")"
case "$type" in
text) write_text "$path" "$mib" ;;
random) write_random "$path" "$mib" ;;
compressible) write_compressible "$path" "$mib" ;;
script) write_script "$path" "$mib" ;;
empty) write_empty "$path" ;;
sparse) write_sparse "$path" "$mib" ;;
*)
echo "Unknown type: $type" >&2
exit 1
;;
esac
apply_mode "$path" "$mode"
}
create_base_dirs() {
create_dir "regular"
create_dir "hidden"
create_dir "spaces in name"
create_dir "deep/tree/level1/level2/level3"
create_dir "readonly-dir"
create_dir "links"
create_dir "metadata"
create_dir "empty-dirs/empty_a"
create_dir "empty-dirs/empty_b"
create_dir "empty-dirs/.hidden_empty_dir"
create_dir "readonly-dir/no_write_subdir"
}
create_regular_files() {
make_file "regular/text_1mb_644.txt" text 1 0644
make_file "regular/text_3mb_600.txt" text 3 0600
make_file "regular/text_5mb_755.txt" text 5 0755
make_file "regular/random_1mb_600.bin" random 1 0600
make_file "regular/random_3mb_644.bin" random 3 0644
make_file "regular/random_5mb_755.bin" random 5 0755
make_file "regular/compressible_1mb_644.log" compressible 1 0644
make_file "regular/compressible_3mb_600.log" compressible 3 0600
make_file "regular/compressible_5mb_755.log" compressible 5 0755
make_file "regular/script_1mb_755.sh" script 1 0755
make_file "regular/script_3mb_700.sh" script 3 0700
make_file "regular/script_5mb_755.sh" script 5 0755
make_file "regular/sparse_1mb_600.img" sparse 1 0600
make_file "regular/sparse_3mb_600.img" sparse 3 0600
make_file "regular/sparse_5mb_600.img" sparse 5 0600
make_file "regular/empty_000_644.txt" empty 0 0644
make_file "regular/empty_001_600.txt" empty 0 0600
make_file "regular/empty_002_755.txt" empty 0 0755
}
create_named_variants() {
make_file "hidden/.hidden_text_1mb_644.txt" text 1 0644
make_file "hidden/.hidden_random_3mb_600.bin" random 3 0600
make_file "hidden/.hidden_script_1mb_755.sh" script 1 0755
make_file "hidden/.hidden_empty_644" empty 0 0644
make_file "hidden/.hidden_sparse_5mb_600.img" sparse 5 0600
make_file "spaces in name/file with spaces text 1mb 644.txt" text 1 0644
make_file "spaces in name/file with spaces random 3mb 600.bin" random 3 0600
make_file "spaces in name/file with spaces script 1mb 755.sh" script 1 0755
make_file "spaces in name/file with spaces empty 644" empty 0 0644
make_file "spaces in name/file with spaces sparse 5mb 600.img" sparse 5 0600
make_file "regular/longname_aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa_text_1mb_644.txt" text 1 0644
make_file "regular/longname_bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb_random_3mb_600.bin" random 3 0600
make_file "regular/longname_cccccccccccccccccccccccccccccccc_compressible_5mb_755.log" compressible 5 0755
}
create_deep_and_duplicate_cases() {
make_file "deep/tree/level1/level2/level3/deep_text_1mb_644.txt" text 1 0644
make_file "deep/tree/level1/level2/level3/deep_random_3mb_600.bin" random 3 0600
make_file "deep/tree/level1/level2/level3/deep_script_1mb_755.sh" script 1 0755
make_file "deep/tree/level1/level2/level3/deep_sparse_5mb_600.img" sparse 5 0600
make_file "regular/dup_source_text_3mb_644.txt" text 3 0644
cp "$ROOT/regular/dup_source_text_3mb_644.txt" "$ROOT/regular/dup_copy_a_text_3mb_600.txt"
cp "$ROOT/regular/dup_source_text_3mb_644.txt" "$ROOT/deep/tree/level1/level2/dup_copy_b_text_3mb_755.txt"
chmod 0600 "$ROOT/regular/dup_copy_a_text_3mb_600.txt"
chmod 0755 "$ROOT/deep/tree/level1/level2/dup_copy_b_text_3mb_755.txt"
}
create_time_and_readonly_cases() {
make_file "regular/old_text_1mb_644.txt" text 1 0644
make_file "regular/recent_text_1mb_644.txt" text 1 0644
make_file "regular/futureish_text_1mb_644.txt" text 1 0644
set_times "regular/old_text_1mb_644.txt" 201801020304
set_times "regular/recent_text_1mb_644.txt" 202604191530
set_times "regular/futureish_text_1mb_644.txt" 203001020304
make_file "readonly-dir/locked_text_1mb_444.txt" text 1 0444
make_file "readonly-dir/locked_random_3mb_400.bin" random 3 0400
make_file "readonly-dir/locked_script_1mb_500.sh" script 1 0500
chmod 0555 "$ROOT/readonly-dir/no_write_subdir"
}
create_links() {
ln -s ../regular/text_1mb_644.txt "$ROOT/links/symlink_to_text_1mb_644.txt"
ln -s ../deep/tree/level1/level2/level3/deep_random_3mb_600.bin "$ROOT/links/symlink_to_deep_random_3mb_600.bin"
ln -s ../hidden/.hidden_text_1mb_644.txt "$ROOT/links/symlink_to_hidden_file"
ln "$ROOT/regular/random_3mb_644.bin" "$ROOT/links/hardlink_to_random_3mb_644.bin"
ln "$ROOT/regular/compressible_5mb_755.log" "$ROOT/links/hardlink_to_compressible_5mb_755.log"
}
create_metadata_cases() {
make_file "metadata/xattr_text_1mb_644.txt" text 1 0644
make_file "metadata/xattr_random_3mb_600.bin" random 3 0600
make_file "metadata/acl_text_1mb_644.txt" text 1 0644
make_file "metadata/acl_script_1mb_755.sh" script 1 0755
if (( have_setfattr )); then
setfattr -n user.migration_case -v "xattr-text" "$ROOT/metadata/xattr_text_1mb_644.txt"
setfattr -n user.migration_case -v "xattr-random" "$ROOT/metadata/xattr_random_3mb_600.bin"
else
echo "Skipping xattr assignment: setfattr not available"
fi
if (( have_setfacl )); then
setfacl -m u:nobody:r-- "$ROOT/metadata/acl_text_1mb_644.txt"
setfacl -m u:nobody:r-x "$ROOT/metadata/acl_script_1mb_755.sh"
else
echo "Skipping ACL assignment: setfacl not available"
fi
}
write_summary() {
cat >"$ROOT/GENERATION_SUMMARY.txt" <<EOF
Dataset root: $ROOT
Manifest: migration-test-manifest.md
Optional metadata support:
- setfacl available: $have_setfacl
- setfattr available: $have_setfattr
Notes:
- Sparse files have logical size with low physical allocation.
- Hard links share inode data with their source file.
- Read-only files and directories may require elevated privileges to modify later.
EOF
}
create_base_dirs
create_regular_files
create_named_variants
create_deep_and_duplicate_cases
create_time_and_readonly_cases
create_links
create_metadata_cases
write_summary
echo "Created migration test dataset at: $ROOT"