Add interval-based update mode for test dataset generation
Add optional interval-based random content updates to the cdssync migration test dataset generator and document the new behavior. This allows the dataset to be created once and then updated either continuously or every N seconds while preserving the intended special-case file structure.
This commit is contained in:
@@ -15,6 +15,10 @@ For migration test datasets in this workspace, follow this process by default:
|
||||
|
||||
- Use `/home/aw/code/cds/cdssync/generate_migration_test_dataset.sh` to create the dataset unless the user explicitly asks for a different method.
|
||||
- Prefer `/home/aw/code/cds/cdssync/migration-test-dataset` as the local canonical dataset location unless the user specifies another target.
|
||||
- The generator script accepts an optional `UPDATE_INTERVAL_SECONDS` argument:
|
||||
- omit it to create the dataset once and exit
|
||||
- use `0` for continuous random content updates
|
||||
- use any integer greater than `0` to rewrite mutable files every `N` seconds
|
||||
- If ACL/xattr coverage matters, ensure the generation host has:
|
||||
- `acl` installed for `setfacl` and `getfacl`
|
||||
- `attr` installed for `setfattr` and `getfattr`
|
||||
|
||||
@@ -5,7 +5,7 @@ set -euo pipefail
|
||||
usage() {
|
||||
cat <<'EOF'
|
||||
Usage:
|
||||
generate_migration_test_dataset.sh TARGET_DIR
|
||||
generate_migration_test_dataset.sh TARGET_DIR [UPDATE_INTERVAL_SECONDS]
|
||||
|
||||
Creates a compact filesystem migration test dataset under TARGET_DIR.
|
||||
The dataset matches the manifest in migration-test-manifest.md.
|
||||
@@ -14,16 +14,28 @@ Notes:
|
||||
- Existing TARGET_DIR contents are left in place unless they collide.
|
||||
- ACL and xattr cases are created only if the local tools are available.
|
||||
- Sparse files are created with logical size but low physical allocation.
|
||||
- If UPDATE_INTERVAL_SECONDS is provided, the script keeps rewriting
|
||||
mutable files with random content after the initial dataset creation.
|
||||
- An interval of 0 means continuous updates with no sleep between passes.
|
||||
- Update mode rewrites content-bearing regular files only.
|
||||
- Update mode does not rewrite script files, sparse files, symlinks,
|
||||
hard links, or empty files.
|
||||
EOF
|
||||
}
|
||||
|
||||
if [[ $# -ne 1 ]]; then
|
||||
if [[ $# -lt 1 || $# -gt 2 ]]; then
|
||||
usage
|
||||
exit 1
|
||||
fi
|
||||
|
||||
TARGET_DIR=$1
|
||||
ROOT=$(realpath -m "$TARGET_DIR")
|
||||
UPDATE_INTERVAL=${2:-}
|
||||
|
||||
if [[ -n "$UPDATE_INTERVAL" && ! "$UPDATE_INTERVAL" =~ ^[0-9]+$ ]]; then
|
||||
echo "UPDATE_INTERVAL_SECONDS must be a non-negative integer" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
mkdir -p "$ROOT"
|
||||
|
||||
@@ -42,6 +54,10 @@ create_dir() {
|
||||
mkdir -p "$ROOT/$1"
|
||||
}
|
||||
|
||||
log() {
|
||||
printf '[%s] %s\n' "$(date '+%Y-%m-%d %H:%M:%S')" "$*"
|
||||
}
|
||||
|
||||
set_times() {
|
||||
local rel=$1
|
||||
local stamp=$2
|
||||
@@ -114,6 +130,96 @@ apply_mode() {
|
||||
chmod "$2" "$ROOT/$1"
|
||||
}
|
||||
|
||||
set_acl_and_xattr_metadata() {
|
||||
if (( have_setfattr )); then
|
||||
setfattr -n user.migration_case -v "xattr-text" "$ROOT/metadata/xattr_text_1mb_644.txt"
|
||||
setfattr -n user.migration_case -v "xattr-random" "$ROOT/metadata/xattr_random_3mb_600.bin"
|
||||
else
|
||||
echo "Skipping xattr assignment: setfattr not available"
|
||||
fi
|
||||
|
||||
if (( have_setfacl )); then
|
||||
setfacl -m u:nobody:r-- "$ROOT/metadata/acl_text_1mb_644.txt"
|
||||
setfacl -m u:nobody:r-x "$ROOT/metadata/acl_script_1mb_755.sh"
|
||||
else
|
||||
echo "Skipping ACL assignment: setfacl not available"
|
||||
fi
|
||||
}
|
||||
|
||||
rewrite_file_with_random_data() {
|
||||
local rel=$1
|
||||
local path="$ROOT/$rel"
|
||||
local size
|
||||
local mode
|
||||
|
||||
size=$(stat -c '%s' "$path")
|
||||
mode=$(stat -c '%a' "$path")
|
||||
|
||||
chmod u+w "$path"
|
||||
if (( size > 0 )); then
|
||||
head -c "$size" /dev/urandom >"$path"
|
||||
else
|
||||
: >"$path"
|
||||
fi
|
||||
chmod "$mode" "$path"
|
||||
}
|
||||
|
||||
update_mutable_files_pass() {
|
||||
local rel
|
||||
local mutable_files=(
|
||||
"regular/text_1mb_644.txt"
|
||||
"regular/text_3mb_600.txt"
|
||||
"regular/text_5mb_755.txt"
|
||||
"regular/random_1mb_600.bin"
|
||||
"regular/random_3mb_644.bin"
|
||||
"regular/random_5mb_755.bin"
|
||||
"regular/compressible_1mb_644.log"
|
||||
"regular/compressible_3mb_600.log"
|
||||
"regular/compressible_5mb_755.log"
|
||||
"hidden/.hidden_text_1mb_644.txt"
|
||||
"hidden/.hidden_random_3mb_600.bin"
|
||||
"spaces in name/file with spaces text 1mb 644.txt"
|
||||
"spaces in name/file with spaces random 3mb 600.bin"
|
||||
"regular/longname_aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa_text_1mb_644.txt"
|
||||
"regular/longname_bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb_random_3mb_600.bin"
|
||||
"regular/longname_cccccccccccccccccccccccccccccccc_compressible_5mb_755.log"
|
||||
"deep/tree/level1/level2/level3/deep_text_1mb_644.txt"
|
||||
"deep/tree/level1/level2/level3/deep_random_3mb_600.bin"
|
||||
"regular/dup_source_text_3mb_644.txt"
|
||||
"regular/dup_copy_a_text_3mb_600.txt"
|
||||
"deep/tree/level1/level2/dup_copy_b_text_3mb_755.txt"
|
||||
"regular/old_text_1mb_644.txt"
|
||||
"regular/recent_text_1mb_644.txt"
|
||||
"regular/futureish_text_1mb_644.txt"
|
||||
"readonly-dir/locked_text_1mb_444.txt"
|
||||
"readonly-dir/locked_random_3mb_400.bin"
|
||||
"metadata/xattr_text_1mb_644.txt"
|
||||
"metadata/xattr_random_3mb_600.bin"
|
||||
"metadata/acl_text_1mb_644.txt"
|
||||
)
|
||||
|
||||
for rel in "${mutable_files[@]}"; do
|
||||
rewrite_file_with_random_data "$rel"
|
||||
done
|
||||
|
||||
set_acl_and_xattr_metadata
|
||||
}
|
||||
|
||||
run_update_loop() {
|
||||
local iteration=1
|
||||
|
||||
log "Starting update loop for $ROOT with interval ${UPDATE_INTERVAL}s"
|
||||
while true; do
|
||||
update_mutable_files_pass
|
||||
log "Completed random update pass $iteration"
|
||||
iteration=$((iteration + 1))
|
||||
|
||||
if (( UPDATE_INTERVAL > 0 )); then
|
||||
sleep "$UPDATE_INTERVAL"
|
||||
fi
|
||||
done
|
||||
}
|
||||
|
||||
make_file() {
|
||||
local path=$1
|
||||
local type=$2
|
||||
@@ -232,19 +338,7 @@ create_metadata_cases() {
|
||||
make_file "metadata/acl_text_1mb_644.txt" text 1 0644
|
||||
make_file "metadata/acl_script_1mb_755.sh" script 1 0755
|
||||
|
||||
if (( have_setfattr )); then
|
||||
setfattr -n user.migration_case -v "xattr-text" "$ROOT/metadata/xattr_text_1mb_644.txt"
|
||||
setfattr -n user.migration_case -v "xattr-random" "$ROOT/metadata/xattr_random_3mb_600.bin"
|
||||
else
|
||||
echo "Skipping xattr assignment: setfattr not available"
|
||||
fi
|
||||
|
||||
if (( have_setfacl )); then
|
||||
setfacl -m u:nobody:r-- "$ROOT/metadata/acl_text_1mb_644.txt"
|
||||
setfacl -m u:nobody:r-x "$ROOT/metadata/acl_script_1mb_755.sh"
|
||||
else
|
||||
echo "Skipping ACL assignment: setfacl not available"
|
||||
fi
|
||||
set_acl_and_xattr_metadata
|
||||
}
|
||||
|
||||
write_summary() {
|
||||
@@ -273,3 +367,7 @@ create_metadata_cases
|
||||
write_summary
|
||||
|
||||
echo "Created migration test dataset at: $ROOT"
|
||||
|
||||
if [[ -n "$UPDATE_INTERVAL" ]]; then
|
||||
run_update_loop
|
||||
fi
|
||||
|
||||
@@ -2,6 +2,18 @@
|
||||
|
||||
This manifest defines a compact, high-value filesystem test set for validating file migration behavior. It is intended to cover common file-content, naming, metadata, and directory edge cases without generating an unnecessarily large corpus.
|
||||
|
||||
The generator script can also run in continuous update mode after initial creation. In that mode, mutable content files are rewritten with random data on a fixed interval:
|
||||
|
||||
- omit the interval argument to create the dataset once and exit
|
||||
- use `0` for continuous rewrites with no sleep between passes
|
||||
- use any integer greater than `0` to rewrite mutable files every `N` seconds
|
||||
|
||||
Important implementation detail for update mode:
|
||||
|
||||
- the update loop rewrites content-bearing regular files that are intended to simulate active data churn
|
||||
- it does not rewrite script files, sparse files, symlinks, hard links, or empty files
|
||||
- this preserves the special-case filesystem structure while still generating ongoing content changes
|
||||
|
||||
## Recommended Root Layout
|
||||
|
||||
- `regular/`
|
||||
|
||||
Reference in New Issue
Block a user