diff --git a/cdssync/AGENTS.md b/cdssync/AGENTS.md index b5a1731..3e01c73 100644 --- a/cdssync/AGENTS.md +++ b/cdssync/AGENTS.md @@ -15,6 +15,10 @@ For migration test datasets in this workspace, follow this process by default: - Use `/home/aw/code/cds/cdssync/generate_migration_test_dataset.sh` to create the dataset unless the user explicitly asks for a different method. - Prefer `/home/aw/code/cds/cdssync/migration-test-dataset` as the local canonical dataset location unless the user specifies another target. +- The generator script accepts an optional `UPDATE_INTERVAL_SECONDS` argument: + - omit it to create the dataset once and exit + - use `0` for continuous random content updates + - use any integer greater than `0` to rewrite mutable files every `N` seconds - If ACL/xattr coverage matters, ensure the generation host has: - `acl` installed for `setfacl` and `getfacl` - `attr` installed for `setfattr` and `getfattr` diff --git a/cdssync/generate_migration_test_dataset.sh b/cdssync/generate_migration_test_dataset.sh index 61ce13c..0b3f946 100755 --- a/cdssync/generate_migration_test_dataset.sh +++ b/cdssync/generate_migration_test_dataset.sh @@ -5,7 +5,7 @@ set -euo pipefail usage() { cat <<'EOF' Usage: - generate_migration_test_dataset.sh TARGET_DIR + generate_migration_test_dataset.sh TARGET_DIR [UPDATE_INTERVAL_SECONDS] Creates a compact filesystem migration test dataset under TARGET_DIR. The dataset matches the manifest in migration-test-manifest.md. @@ -14,16 +14,28 @@ Notes: - Existing TARGET_DIR contents are left in place unless they collide. - ACL and xattr cases are created only if the local tools are available. - Sparse files are created with logical size but low physical allocation. + - If UPDATE_INTERVAL_SECONDS is provided, the script keeps rewriting + mutable files with random content after the initial dataset creation. + - An interval of 0 means continuous updates with no sleep between passes. + - Update mode rewrites content-bearing regular files only. + - Update mode does not rewrite script files, sparse files, symlinks, + hard links, or empty files. EOF } -if [[ $# -ne 1 ]]; then +if [[ $# -lt 1 || $# -gt 2 ]]; then usage exit 1 fi TARGET_DIR=$1 ROOT=$(realpath -m "$TARGET_DIR") +UPDATE_INTERVAL=${2:-} + +if [[ -n "$UPDATE_INTERVAL" && ! "$UPDATE_INTERVAL" =~ ^[0-9]+$ ]]; then + echo "UPDATE_INTERVAL_SECONDS must be a non-negative integer" >&2 + exit 1 +fi mkdir -p "$ROOT" @@ -42,6 +54,10 @@ create_dir() { mkdir -p "$ROOT/$1" } +log() { + printf '[%s] %s\n' "$(date '+%Y-%m-%d %H:%M:%S')" "$*" +} + set_times() { local rel=$1 local stamp=$2 @@ -114,6 +130,96 @@ apply_mode() { chmod "$2" "$ROOT/$1" } +set_acl_and_xattr_metadata() { + if (( have_setfattr )); then + setfattr -n user.migration_case -v "xattr-text" "$ROOT/metadata/xattr_text_1mb_644.txt" + setfattr -n user.migration_case -v "xattr-random" "$ROOT/metadata/xattr_random_3mb_600.bin" + else + echo "Skipping xattr assignment: setfattr not available" + fi + + if (( have_setfacl )); then + setfacl -m u:nobody:r-- "$ROOT/metadata/acl_text_1mb_644.txt" + setfacl -m u:nobody:r-x "$ROOT/metadata/acl_script_1mb_755.sh" + else + echo "Skipping ACL assignment: setfacl not available" + fi +} + +rewrite_file_with_random_data() { + local rel=$1 + local path="$ROOT/$rel" + local size + local mode + + size=$(stat -c '%s' "$path") + mode=$(stat -c '%a' "$path") + + chmod u+w "$path" + if (( size > 0 )); then + head -c "$size" /dev/urandom >"$path" + else + : >"$path" + fi + chmod "$mode" "$path" +} + +update_mutable_files_pass() { + local rel + local mutable_files=( + "regular/text_1mb_644.txt" + "regular/text_3mb_600.txt" + "regular/text_5mb_755.txt" + "regular/random_1mb_600.bin" + "regular/random_3mb_644.bin" + "regular/random_5mb_755.bin" + "regular/compressible_1mb_644.log" + "regular/compressible_3mb_600.log" + "regular/compressible_5mb_755.log" + "hidden/.hidden_text_1mb_644.txt" + "hidden/.hidden_random_3mb_600.bin" + "spaces in name/file with spaces text 1mb 644.txt" + "spaces in name/file with spaces random 3mb 600.bin" + "regular/longname_aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa_text_1mb_644.txt" + "regular/longname_bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb_random_3mb_600.bin" + "regular/longname_cccccccccccccccccccccccccccccccc_compressible_5mb_755.log" + "deep/tree/level1/level2/level3/deep_text_1mb_644.txt" + "deep/tree/level1/level2/level3/deep_random_3mb_600.bin" + "regular/dup_source_text_3mb_644.txt" + "regular/dup_copy_a_text_3mb_600.txt" + "deep/tree/level1/level2/dup_copy_b_text_3mb_755.txt" + "regular/old_text_1mb_644.txt" + "regular/recent_text_1mb_644.txt" + "regular/futureish_text_1mb_644.txt" + "readonly-dir/locked_text_1mb_444.txt" + "readonly-dir/locked_random_3mb_400.bin" + "metadata/xattr_text_1mb_644.txt" + "metadata/xattr_random_3mb_600.bin" + "metadata/acl_text_1mb_644.txt" + ) + + for rel in "${mutable_files[@]}"; do + rewrite_file_with_random_data "$rel" + done + + set_acl_and_xattr_metadata +} + +run_update_loop() { + local iteration=1 + + log "Starting update loop for $ROOT with interval ${UPDATE_INTERVAL}s" + while true; do + update_mutable_files_pass + log "Completed random update pass $iteration" + iteration=$((iteration + 1)) + + if (( UPDATE_INTERVAL > 0 )); then + sleep "$UPDATE_INTERVAL" + fi + done +} + make_file() { local path=$1 local type=$2 @@ -232,19 +338,7 @@ create_metadata_cases() { make_file "metadata/acl_text_1mb_644.txt" text 1 0644 make_file "metadata/acl_script_1mb_755.sh" script 1 0755 - if (( have_setfattr )); then - setfattr -n user.migration_case -v "xattr-text" "$ROOT/metadata/xattr_text_1mb_644.txt" - setfattr -n user.migration_case -v "xattr-random" "$ROOT/metadata/xattr_random_3mb_600.bin" - else - echo "Skipping xattr assignment: setfattr not available" - fi - - if (( have_setfacl )); then - setfacl -m u:nobody:r-- "$ROOT/metadata/acl_text_1mb_644.txt" - setfacl -m u:nobody:r-x "$ROOT/metadata/acl_script_1mb_755.sh" - else - echo "Skipping ACL assignment: setfacl not available" - fi + set_acl_and_xattr_metadata } write_summary() { @@ -273,3 +367,7 @@ create_metadata_cases write_summary echo "Created migration test dataset at: $ROOT" + +if [[ -n "$UPDATE_INTERVAL" ]]; then + run_update_loop +fi diff --git a/cdssync/migration-test-manifest.md b/cdssync/migration-test-manifest.md index 4ed6f7a..fda6c69 100644 --- a/cdssync/migration-test-manifest.md +++ b/cdssync/migration-test-manifest.md @@ -2,6 +2,18 @@ This manifest defines a compact, high-value filesystem test set for validating file migration behavior. It is intended to cover common file-content, naming, metadata, and directory edge cases without generating an unnecessarily large corpus. +The generator script can also run in continuous update mode after initial creation. In that mode, mutable content files are rewritten with random data on a fixed interval: + +- omit the interval argument to create the dataset once and exit +- use `0` for continuous rewrites with no sleep between passes +- use any integer greater than `0` to rewrite mutable files every `N` seconds + +Important implementation detail for update mode: + +- the update loop rewrites content-bearing regular files that are intended to simulate active data churn +- it does not rewrite script files, sparse files, symlinks, hard links, or empty files +- this preserves the special-case filesystem structure while still generating ongoing content changes + ## Recommended Root Layout - `regular/`