#!/usr/bin/env bash set -euo pipefail usage() { cat <<'EOF' Usage: generate_migration_test_dataset.sh [OPTIONS] TARGET_DIR [UPDATE_INTERVAL_SECONDS] Creates a compact filesystem migration test dataset under TARGET_DIR. The dataset matches the manifest in migration-test-manifest.md. Options: --update-only Update an existing dataset in place. --folder-count N Generate N bulk-data folders under bulk/. --files-per-folder N Generate N bulk files in each bulk folder. --min-file-size-mib N Minimum bulk file size in MiB. Default: 1. --max-file-size-mib N Maximum bulk file size in MiB. Default: 5. --max-dataset-size-mib N Maximum total size for generated bulk files. Notes: - Existing TARGET_DIR contents are left in place unless they collide. - ACL and xattr cases are created only if the local tools are available. - Sparse files are created with logical size but low physical allocation. - If UPDATE_INTERVAL_SECONDS is provided, the script keeps rewriting mutable files with random content after the initial dataset creation. - An interval of 0 means continuous updates with no sleep between passes. - Update mode rewrites content-bearing regular files only. - Update mode does not rewrite script files, sparse files, symlinks, hard links, or empty files. EOF } UPDATE_ONLY=0 FOLDER_COUNT=0 FILES_PER_FOLDER=0 MIN_FILE_SIZE_MIB=1 MAX_FILE_SIZE_MIB=5 MAX_DATASET_SIZE_MIB= while [[ $# -gt 0 ]]; do case "$1" in --update-only) UPDATE_ONLY=1 shift ;; --folder-count) FOLDER_COUNT=${2:-} shift 2 ;; --files-per-folder) FILES_PER_FOLDER=${2:-} shift 2 ;; --min-file-size-mib) MIN_FILE_SIZE_MIB=${2:-} shift 2 ;; --max-file-size-mib) MAX_FILE_SIZE_MIB=${2:-} shift 2 ;; --max-dataset-size-mib) MAX_DATASET_SIZE_MIB=${2:-} shift 2 ;; --help|-h) usage exit 0 ;; --*) echo "Unknown option: $1" >&2 usage exit 1 ;; *) break ;; esac done if [[ $# -lt 1 || $# -gt 2 ]]; then usage exit 1 fi TARGET_DIR=$1 ROOT=$(realpath -m "$TARGET_DIR") UPDATE_INTERVAL=${2:-} if [[ -n "$UPDATE_INTERVAL" && ! "$UPDATE_INTERVAL" =~ ^[0-9]+$ ]]; then echo "UPDATE_INTERVAL_SECONDS must be a non-negative integer" >&2 exit 1 fi for value_name in FOLDER_COUNT FILES_PER_FOLDER MIN_FILE_SIZE_MIB MAX_FILE_SIZE_MIB; do if ! [[ ${!value_name} =~ ^[0-9]+$ ]]; then echo "$value_name must be a non-negative integer" >&2 exit 1 fi done if [[ -n "$MAX_DATASET_SIZE_MIB" && ! "$MAX_DATASET_SIZE_MIB" =~ ^[0-9]+$ ]]; then echo "MAX_DATASET_SIZE_MIB must be a non-negative integer" >&2 exit 1 fi if (( MIN_FILE_SIZE_MIB > MAX_FILE_SIZE_MIB )); then echo "MIN_FILE_SIZE_MIB cannot be greater than MAX_FILE_SIZE_MIB" >&2 exit 1 fi mkdir -p "$ROOT" have_setfacl=0 have_setfattr=0 if command -v setfacl >/dev/null 2>&1; then have_setfacl=1 fi if command -v setfattr >/dev/null 2>&1; then have_setfattr=1 fi create_dir() { mkdir -p "$ROOT/$1" } random_int_between() { local min=$1 local max=$2 if (( min == max )); then echo "$min" return fi shuf -i "${min}-${max}" -n 1 } log() { printf '[%s] %s\n' "$(date '+%Y-%m-%d %H:%M:%S')" "$*" } set_times() { local rel=$1 local stamp=$2 touch -a -m -t "$stamp" "$ROOT/$rel" } write_text() { local path=$1 local mib=$2 local bytes=$((mib * 1024 * 1024)) perl -e ' my ($target, $label) = @ARGV; my $chunk = "Migration text payload for $label\n"; while (length($chunk) < 8192) { $chunk .= $chunk; } while ($target > 0) { my $part = substr($chunk, 0, $target > length($chunk) ? length($chunk) : $target); print $part; $target -= length($part); } ' "$bytes" "$path" >"$ROOT/$path" } write_compressible() { local path=$1 local mib=$2 local bytes=$((mib * 1024 * 1024)) perl -e ' my ($target) = @ARGV; my $chunk = "A" x 8192; while ($target > 0) { my $part = substr($chunk, 0, $target > length($chunk) ? length($chunk) : $target); print $part; $target -= length($part); } ' "$bytes" >"$ROOT/$path" } write_random() { local path=$1 local mib=$2 dd if=/dev/urandom of="$ROOT/$path" bs=1M count="$mib" status=none } write_script() { local path=$1 local mib=$2 cat >"$ROOT/$path" <<'EOF' #!/usr/bin/env bash echo "migration test script" EOF local current_size current_size=$(wc -c <"$ROOT/$path") local target_size=$((mib * 1024 * 1024)) if (( current_size < target_size )); then dd if=/dev/zero bs=1 count=$((target_size - current_size)) status=none | tr '\0' '#' >>"$ROOT/$path" fi } write_empty() { : >"$ROOT/$1" } write_sparse() { local path=$1 local mib=$2 truncate -s "${mib}M" "$ROOT/$path" } apply_mode() { chmod "$2" "$ROOT/$1" } set_acl_and_xattr_metadata() { if (( have_setfattr )); then if ! setfattr -n user.migration_case -v "xattr-text" "$ROOT/metadata/xattr_text_1mb_644.txt"; then log "Skipping xattr assignment on $ROOT/metadata/xattr_text_1mb_644.txt: operation not supported" fi if ! setfattr -n user.migration_case -v "xattr-random" "$ROOT/metadata/xattr_random_3mb_600.bin"; then log "Skipping xattr assignment on $ROOT/metadata/xattr_random_3mb_600.bin: operation not supported" fi else log "Skipping xattr assignment: setfattr not available" fi if (( have_setfacl )); then if ! setfacl -m u:nobody:r-- "$ROOT/metadata/acl_text_1mb_644.txt"; then log "Skipping ACL assignment on $ROOT/metadata/acl_text_1mb_644.txt: operation not supported" fi if ! setfacl -m u:nobody:r-x "$ROOT/metadata/acl_script_1mb_755.sh"; then log "Skipping ACL assignment on $ROOT/metadata/acl_script_1mb_755.sh: operation not supported" fi else log "Skipping ACL assignment: setfacl not available" fi } rewrite_file_with_random_data() { local rel=$1 local path="$ROOT/$rel" local size local mode size=$(stat -c '%s' "$path") mode=$(stat -c '%a' "$path") chmod u+w "$path" if (( size > 0 )); then head -c "$size" /dev/urandom >"$path" else : >"$path" fi chmod "$mode" "$path" } append_bulk_mutable_files() { if [[ -d "$ROOT/bulk" ]]; then while IFS= read -r rel; do BULK_MUTABLE_FILES+=("$rel") done < <(cd "$ROOT" && find bulk -type f | sort) fi } update_mutable_files_pass() { local rel local mutable_files=( "regular/text_1mb_644.txt" "regular/text_3mb_600.txt" "regular/text_5mb_755.txt" "regular/random_1mb_600.bin" "regular/random_3mb_644.bin" "regular/random_5mb_755.bin" "regular/compressible_1mb_644.log" "regular/compressible_3mb_600.log" "regular/compressible_5mb_755.log" "hidden/.hidden_text_1mb_644.txt" "hidden/.hidden_random_3mb_600.bin" "spaces in name/file with spaces text 1mb 644.txt" "spaces in name/file with spaces random 3mb 600.bin" "regular/longname_aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa_text_1mb_644.txt" "regular/longname_bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb_random_3mb_600.bin" "regular/longname_cccccccccccccccccccccccccccccccc_compressible_5mb_755.log" "deep/tree/level1/level2/level3/deep_text_1mb_644.txt" "deep/tree/level1/level2/level3/deep_random_3mb_600.bin" "regular/dup_source_text_3mb_644.txt" "regular/dup_copy_a_text_3mb_600.txt" "deep/tree/level1/level2/dup_copy_b_text_3mb_755.txt" "regular/old_text_1mb_644.txt" "regular/recent_text_1mb_644.txt" "regular/futureish_text_1mb_644.txt" "readonly-dir/locked_text_1mb_444.txt" "readonly-dir/locked_random_3mb_400.bin" "metadata/xattr_text_1mb_644.txt" "metadata/xattr_random_3mb_600.bin" "metadata/acl_text_1mb_644.txt" ) local BULK_MUTABLE_FILES=() append_bulk_mutable_files for rel in "${mutable_files[@]}"; do rewrite_file_with_random_data "$rel" done for rel in "${BULK_MUTABLE_FILES[@]}"; do rewrite_file_with_random_data "$rel" done set_acl_and_xattr_metadata } run_update_loop() { local iteration=1 log "Starting update loop for $ROOT with interval ${UPDATE_INTERVAL}s" while true; do update_mutable_files_pass log "Completed random update pass $iteration" iteration=$((iteration + 1)) if (( UPDATE_INTERVAL > 0 )); then sleep "$UPDATE_INTERVAL" fi done } make_file() { local path=$1 local type=$2 local mib=$3 local mode=$4 create_dir "$(dirname "$path")" case "$type" in text) write_text "$path" "$mib" ;; random) write_random "$path" "$mib" ;; compressible) write_compressible "$path" "$mib" ;; script) write_script "$path" "$mib" ;; empty) write_empty "$path" ;; sparse) write_sparse "$path" "$mib" ;; *) echo "Unknown type: $type" >&2 exit 1 ;; esac apply_mode "$path" "$mode" } create_base_dirs() { create_dir "regular" create_dir "hidden" create_dir "spaces in name" create_dir "deep/tree/level1/level2/level3" create_dir "readonly-dir" create_dir "links" create_dir "metadata" create_dir "empty-dirs/empty_a" create_dir "empty-dirs/empty_b" create_dir "empty-dirs/.hidden_empty_dir" create_dir "readonly-dir/no_write_subdir" } create_regular_files() { make_file "regular/text_1mb_644.txt" text 1 0644 make_file "regular/text_3mb_600.txt" text 3 0600 make_file "regular/text_5mb_755.txt" text 5 0755 make_file "regular/random_1mb_600.bin" random 1 0600 make_file "regular/random_3mb_644.bin" random 3 0644 make_file "regular/random_5mb_755.bin" random 5 0755 make_file "regular/compressible_1mb_644.log" compressible 1 0644 make_file "regular/compressible_3mb_600.log" compressible 3 0600 make_file "regular/compressible_5mb_755.log" compressible 5 0755 make_file "regular/script_1mb_755.sh" script 1 0755 make_file "regular/script_3mb_700.sh" script 3 0700 make_file "regular/script_5mb_755.sh" script 5 0755 make_file "regular/sparse_1mb_600.img" sparse 1 0600 make_file "regular/sparse_3mb_600.img" sparse 3 0600 make_file "regular/sparse_5mb_600.img" sparse 5 0600 make_file "regular/empty_000_644.txt" empty 0 0644 make_file "regular/empty_001_600.txt" empty 0 0600 make_file "regular/empty_002_755.txt" empty 0 0755 } create_named_variants() { make_file "hidden/.hidden_text_1mb_644.txt" text 1 0644 make_file "hidden/.hidden_random_3mb_600.bin" random 3 0600 make_file "hidden/.hidden_script_1mb_755.sh" script 1 0755 make_file "hidden/.hidden_empty_644" empty 0 0644 make_file "hidden/.hidden_sparse_5mb_600.img" sparse 5 0600 make_file "spaces in name/file with spaces text 1mb 644.txt" text 1 0644 make_file "spaces in name/file with spaces random 3mb 600.bin" random 3 0600 make_file "spaces in name/file with spaces script 1mb 755.sh" script 1 0755 make_file "spaces in name/file with spaces empty 644" empty 0 0644 make_file "spaces in name/file with spaces sparse 5mb 600.img" sparse 5 0600 make_file "regular/longname_aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa_text_1mb_644.txt" text 1 0644 make_file "regular/longname_bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb_random_3mb_600.bin" random 3 0600 make_file "regular/longname_cccccccccccccccccccccccccccccccc_compressible_5mb_755.log" compressible 5 0755 } create_deep_and_duplicate_cases() { make_file "deep/tree/level1/level2/level3/deep_text_1mb_644.txt" text 1 0644 make_file "deep/tree/level1/level2/level3/deep_random_3mb_600.bin" random 3 0600 make_file "deep/tree/level1/level2/level3/deep_script_1mb_755.sh" script 1 0755 make_file "deep/tree/level1/level2/level3/deep_sparse_5mb_600.img" sparse 5 0600 make_file "regular/dup_source_text_3mb_644.txt" text 3 0644 cp "$ROOT/regular/dup_source_text_3mb_644.txt" "$ROOT/regular/dup_copy_a_text_3mb_600.txt" cp "$ROOT/regular/dup_source_text_3mb_644.txt" "$ROOT/deep/tree/level1/level2/dup_copy_b_text_3mb_755.txt" chmod 0600 "$ROOT/regular/dup_copy_a_text_3mb_600.txt" chmod 0755 "$ROOT/deep/tree/level1/level2/dup_copy_b_text_3mb_755.txt" } create_time_and_readonly_cases() { make_file "regular/old_text_1mb_644.txt" text 1 0644 make_file "regular/recent_text_1mb_644.txt" text 1 0644 make_file "regular/futureish_text_1mb_644.txt" text 1 0644 set_times "regular/old_text_1mb_644.txt" 201801020304 set_times "regular/recent_text_1mb_644.txt" 202604191530 set_times "regular/futureish_text_1mb_644.txt" 203001020304 make_file "readonly-dir/locked_text_1mb_444.txt" text 1 0444 make_file "readonly-dir/locked_random_3mb_400.bin" random 3 0400 make_file "readonly-dir/locked_script_1mb_500.sh" script 1 0500 chmod 0555 "$ROOT/readonly-dir/no_write_subdir" } create_links() { ln -s ../regular/text_1mb_644.txt "$ROOT/links/symlink_to_text_1mb_644.txt" ln -s ../deep/tree/level1/level2/level3/deep_random_3mb_600.bin "$ROOT/links/symlink_to_deep_random_3mb_600.bin" ln -s ../hidden/.hidden_text_1mb_644.txt "$ROOT/links/symlink_to_hidden_file" ln "$ROOT/regular/random_3mb_644.bin" "$ROOT/links/hardlink_to_random_3mb_644.bin" ln "$ROOT/regular/compressible_5mb_755.log" "$ROOT/links/hardlink_to_compressible_5mb_755.log" } create_metadata_cases() { make_file "metadata/xattr_text_1mb_644.txt" text 1 0644 make_file "metadata/xattr_random_3mb_600.bin" random 3 0600 make_file "metadata/acl_text_1mb_644.txt" text 1 0644 make_file "metadata/acl_script_1mb_755.sh" script 1 0755 set_acl_and_xattr_metadata } create_bulk_files() { local folder_index local file_index local remaining_mib local size_mib local created_files=0 local consumed_mib=0 if (( FOLDER_COUNT == 0 || FILES_PER_FOLDER == 0 )); then return fi if [[ -n "$MAX_DATASET_SIZE_MIB" && "$MAX_DATASET_SIZE_MIB" == "0" ]]; then log "Skipping bulk-data generation because max dataset size is 0 MiB" return fi create_dir "bulk" for (( folder_index=1; folder_index<=FOLDER_COUNT; folder_index++ )); do create_dir "bulk/folder-$(printf '%03d' "$folder_index")" for (( file_index=1; file_index<=FILES_PER_FOLDER; file_index++ )); do if [[ -n "$MAX_DATASET_SIZE_MIB" ]]; then remaining_mib=$((MAX_DATASET_SIZE_MIB - consumed_mib)) if (( remaining_mib < MIN_FILE_SIZE_MIB )); then log "Reached bulk dataset size cap after creating $created_files files (${consumed_mib} MiB)" return fi if (( remaining_mib < MAX_FILE_SIZE_MIB )); then size_mib=$(random_int_between "$MIN_FILE_SIZE_MIB" "$remaining_mib") else size_mib=$(random_int_between "$MIN_FILE_SIZE_MIB" "$MAX_FILE_SIZE_MIB") fi else size_mib=$(random_int_between "$MIN_FILE_SIZE_MIB" "$MAX_FILE_SIZE_MIB") fi make_file \ "bulk/folder-$(printf '%03d' "$folder_index")/bulk_random_$(printf '%03d' "$file_index")_${size_mib}mib.bin" \ random \ "$size_mib" \ 0644 consumed_mib=$((consumed_mib + size_mib)) created_files=$((created_files + 1)) done done log "Created $created_files bulk files across $FOLDER_COUNT folders (${consumed_mib} MiB total)" } write_summary() { cat >"$ROOT/GENERATION_SUMMARY.txt" <