diff --git a/cdssync/.gitignore b/cdssync/.gitignore new file mode 100644 index 0000000..a973b0e --- /dev/null +++ b/cdssync/.gitignore @@ -0,0 +1,3 @@ +tmp-dataset-test/ +tmp-metadata-check/ +migration-test-dataset/ diff --git a/cdssync/AGENTS.md b/cdssync/AGENTS.md new file mode 100644 index 0000000..b5a1731 --- /dev/null +++ b/cdssync/AGENTS.md @@ -0,0 +1,53 @@ +# AI Agent Instructions for `cdssync` + +These instructions apply anywhere under `/home/aw/code/cds/cdssync`. + +## Migration Test Dataset Workflow + +For migration test datasets in this workspace, follow this process by default: + +1. Generate the dataset locally from this workspace. +2. Preserve the local generated dataset as the canonical original copy. +3. Copy the dataset to the test machine using metadata-preserving tooling. +4. Verify the copied dataset on the test machine before using it for migration testing. + +## Generation Rules + +- Use `/home/aw/code/cds/cdssync/generate_migration_test_dataset.sh` to create the dataset unless the user explicitly asks for a different method. +- Prefer `/home/aw/code/cds/cdssync/migration-test-dataset` as the local canonical dataset location unless the user specifies another target. +- If ACL/xattr coverage matters, ensure the generation host has: + - `acl` installed for `setfacl` and `getfacl` + - `attr` installed for `setfattr` and `getfattr` + +## Copy Rules + +- Use `rsync -aHAX` by default when copying the dataset to another machine. +- Preserve permissions, timestamps, symlinks, hard links, ACLs, and xattrs. +- Do not use GUI copy/paste or non-preserving copy methods for this dataset unless the user explicitly asks for that. + +## Verification Rules + +After copying to a test machine, verify at least: + +- file and directory structure +- permissions +- symlinks +- hard links +- timestamps +- ACLs +- xattrs + +Preferred verification commands include: + +- `find DEST_DIR | sort` +- `stat DEST_DIR/regular/script_3mb_700.sh` +- `stat DEST_DIR/readonly-dir/locked_text_1mb_444.txt` +- `readlink DEST_DIR/links/symlink_to_text_1mb_644.txt` +- `stat DEST_DIR/regular/random_3mb_644.bin DEST_DIR/links/hardlink_to_random_3mb_644.bin` +- `getfacl -p DEST_DIR/metadata/acl_text_1mb_644.txt` +- `getfattr -d DEST_DIR/metadata/xattr_text_1mb_644.txt` + +## Destination Host Requirements + +- If the destination host lacks `acl` or `attr`, ACL/xattr verification will be incomplete. +- If the destination filesystem does not support ACLs or xattrs, those attributes may not survive transfer even when the copy method is correct. diff --git a/cdssync/generate_migration_test_dataset.sh b/cdssync/generate_migration_test_dataset.sh new file mode 100755 index 0000000..61ce13c --- /dev/null +++ b/cdssync/generate_migration_test_dataset.sh @@ -0,0 +1,275 @@ +#!/usr/bin/env bash + +set -euo pipefail + +usage() { + cat <<'EOF' +Usage: + generate_migration_test_dataset.sh TARGET_DIR + +Creates a compact filesystem migration test dataset under TARGET_DIR. +The dataset matches the manifest in migration-test-manifest.md. + +Notes: + - Existing TARGET_DIR contents are left in place unless they collide. + - ACL and xattr cases are created only if the local tools are available. + - Sparse files are created with logical size but low physical allocation. +EOF +} + +if [[ $# -ne 1 ]]; then + usage + exit 1 +fi + +TARGET_DIR=$1 +ROOT=$(realpath -m "$TARGET_DIR") + +mkdir -p "$ROOT" + +have_setfacl=0 +have_setfattr=0 + +if command -v setfacl >/dev/null 2>&1; then + have_setfacl=1 +fi + +if command -v setfattr >/dev/null 2>&1; then + have_setfattr=1 +fi + +create_dir() { + mkdir -p "$ROOT/$1" +} + +set_times() { + local rel=$1 + local stamp=$2 + touch -a -m -t "$stamp" "$ROOT/$rel" +} + +write_text() { + local path=$1 + local mib=$2 + local bytes=$((mib * 1024 * 1024)) + perl -e ' + my ($target, $label) = @ARGV; + my $chunk = "Migration text payload for $label\n"; + while (length($chunk) < 8192) { $chunk .= $chunk; } + while ($target > 0) { + my $part = substr($chunk, 0, $target > length($chunk) ? length($chunk) : $target); + print $part; + $target -= length($part); + } + ' "$bytes" "$path" >"$ROOT/$path" +} + +write_compressible() { + local path=$1 + local mib=$2 + local bytes=$((mib * 1024 * 1024)) + perl -e ' + my ($target) = @ARGV; + my $chunk = "A" x 8192; + while ($target > 0) { + my $part = substr($chunk, 0, $target > length($chunk) ? length($chunk) : $target); + print $part; + $target -= length($part); + } + ' "$bytes" >"$ROOT/$path" +} + +write_random() { + local path=$1 + local mib=$2 + dd if=/dev/urandom of="$ROOT/$path" bs=1M count="$mib" status=none +} + +write_script() { + local path=$1 + local mib=$2 + cat >"$ROOT/$path" <<'EOF' +#!/usr/bin/env bash +echo "migration test script" +EOF + local current_size + current_size=$(wc -c <"$ROOT/$path") + local target_size=$((mib * 1024 * 1024)) + if (( current_size < target_size )); then + dd if=/dev/zero bs=1 count=$((target_size - current_size)) status=none | tr '\0' '#' >>"$ROOT/$path" + fi +} + +write_empty() { + : >"$ROOT/$1" +} + +write_sparse() { + local path=$1 + local mib=$2 + truncate -s "${mib}M" "$ROOT/$path" +} + +apply_mode() { + chmod "$2" "$ROOT/$1" +} + +make_file() { + local path=$1 + local type=$2 + local mib=$3 + local mode=$4 + + create_dir "$(dirname "$path")" + + case "$type" in + text) write_text "$path" "$mib" ;; + random) write_random "$path" "$mib" ;; + compressible) write_compressible "$path" "$mib" ;; + script) write_script "$path" "$mib" ;; + empty) write_empty "$path" ;; + sparse) write_sparse "$path" "$mib" ;; + *) + echo "Unknown type: $type" >&2 + exit 1 + ;; + esac + + apply_mode "$path" "$mode" +} + +create_base_dirs() { + create_dir "regular" + create_dir "hidden" + create_dir "spaces in name" + create_dir "deep/tree/level1/level2/level3" + create_dir "readonly-dir" + create_dir "links" + create_dir "metadata" + create_dir "empty-dirs/empty_a" + create_dir "empty-dirs/empty_b" + create_dir "empty-dirs/.hidden_empty_dir" + create_dir "readonly-dir/no_write_subdir" +} + +create_regular_files() { + make_file "regular/text_1mb_644.txt" text 1 0644 + make_file "regular/text_3mb_600.txt" text 3 0600 + make_file "regular/text_5mb_755.txt" text 5 0755 + make_file "regular/random_1mb_600.bin" random 1 0600 + make_file "regular/random_3mb_644.bin" random 3 0644 + make_file "regular/random_5mb_755.bin" random 5 0755 + make_file "regular/compressible_1mb_644.log" compressible 1 0644 + make_file "regular/compressible_3mb_600.log" compressible 3 0600 + make_file "regular/compressible_5mb_755.log" compressible 5 0755 + make_file "regular/script_1mb_755.sh" script 1 0755 + make_file "regular/script_3mb_700.sh" script 3 0700 + make_file "regular/script_5mb_755.sh" script 5 0755 + make_file "regular/sparse_1mb_600.img" sparse 1 0600 + make_file "regular/sparse_3mb_600.img" sparse 3 0600 + make_file "regular/sparse_5mb_600.img" sparse 5 0600 + make_file "regular/empty_000_644.txt" empty 0 0644 + make_file "regular/empty_001_600.txt" empty 0 0600 + make_file "regular/empty_002_755.txt" empty 0 0755 +} + +create_named_variants() { + make_file "hidden/.hidden_text_1mb_644.txt" text 1 0644 + make_file "hidden/.hidden_random_3mb_600.bin" random 3 0600 + make_file "hidden/.hidden_script_1mb_755.sh" script 1 0755 + make_file "hidden/.hidden_empty_644" empty 0 0644 + make_file "hidden/.hidden_sparse_5mb_600.img" sparse 5 0600 + + make_file "spaces in name/file with spaces text 1mb 644.txt" text 1 0644 + make_file "spaces in name/file with spaces random 3mb 600.bin" random 3 0600 + make_file "spaces in name/file with spaces script 1mb 755.sh" script 1 0755 + make_file "spaces in name/file with spaces empty 644" empty 0 0644 + make_file "spaces in name/file with spaces sparse 5mb 600.img" sparse 5 0600 + + make_file "regular/longname_aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa_text_1mb_644.txt" text 1 0644 + make_file "regular/longname_bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb_random_3mb_600.bin" random 3 0600 + make_file "regular/longname_cccccccccccccccccccccccccccccccc_compressible_5mb_755.log" compressible 5 0755 +} + +create_deep_and_duplicate_cases() { + make_file "deep/tree/level1/level2/level3/deep_text_1mb_644.txt" text 1 0644 + make_file "deep/tree/level1/level2/level3/deep_random_3mb_600.bin" random 3 0600 + make_file "deep/tree/level1/level2/level3/deep_script_1mb_755.sh" script 1 0755 + make_file "deep/tree/level1/level2/level3/deep_sparse_5mb_600.img" sparse 5 0600 + + make_file "regular/dup_source_text_3mb_644.txt" text 3 0644 + cp "$ROOT/regular/dup_source_text_3mb_644.txt" "$ROOT/regular/dup_copy_a_text_3mb_600.txt" + cp "$ROOT/regular/dup_source_text_3mb_644.txt" "$ROOT/deep/tree/level1/level2/dup_copy_b_text_3mb_755.txt" + chmod 0600 "$ROOT/regular/dup_copy_a_text_3mb_600.txt" + chmod 0755 "$ROOT/deep/tree/level1/level2/dup_copy_b_text_3mb_755.txt" +} + +create_time_and_readonly_cases() { + make_file "regular/old_text_1mb_644.txt" text 1 0644 + make_file "regular/recent_text_1mb_644.txt" text 1 0644 + make_file "regular/futureish_text_1mb_644.txt" text 1 0644 + set_times "regular/old_text_1mb_644.txt" 201801020304 + set_times "regular/recent_text_1mb_644.txt" 202604191530 + set_times "regular/futureish_text_1mb_644.txt" 203001020304 + + make_file "readonly-dir/locked_text_1mb_444.txt" text 1 0444 + make_file "readonly-dir/locked_random_3mb_400.bin" random 3 0400 + make_file "readonly-dir/locked_script_1mb_500.sh" script 1 0500 + chmod 0555 "$ROOT/readonly-dir/no_write_subdir" +} + +create_links() { + ln -s ../regular/text_1mb_644.txt "$ROOT/links/symlink_to_text_1mb_644.txt" + ln -s ../deep/tree/level1/level2/level3/deep_random_3mb_600.bin "$ROOT/links/symlink_to_deep_random_3mb_600.bin" + ln -s ../hidden/.hidden_text_1mb_644.txt "$ROOT/links/symlink_to_hidden_file" + ln "$ROOT/regular/random_3mb_644.bin" "$ROOT/links/hardlink_to_random_3mb_644.bin" + ln "$ROOT/regular/compressible_5mb_755.log" "$ROOT/links/hardlink_to_compressible_5mb_755.log" +} + +create_metadata_cases() { + make_file "metadata/xattr_text_1mb_644.txt" text 1 0644 + make_file "metadata/xattr_random_3mb_600.bin" random 3 0600 + make_file "metadata/acl_text_1mb_644.txt" text 1 0644 + make_file "metadata/acl_script_1mb_755.sh" script 1 0755 + + if (( have_setfattr )); then + setfattr -n user.migration_case -v "xattr-text" "$ROOT/metadata/xattr_text_1mb_644.txt" + setfattr -n user.migration_case -v "xattr-random" "$ROOT/metadata/xattr_random_3mb_600.bin" + else + echo "Skipping xattr assignment: setfattr not available" + fi + + if (( have_setfacl )); then + setfacl -m u:nobody:r-- "$ROOT/metadata/acl_text_1mb_644.txt" + setfacl -m u:nobody:r-x "$ROOT/metadata/acl_script_1mb_755.sh" + else + echo "Skipping ACL assignment: setfacl not available" + fi +} + +write_summary() { + cat >"$ROOT/GENERATION_SUMMARY.txt" <