Add monitoring helper for mtdi and galaxy migrate
Add a small monitoring helper script for logging CPU and memory usage for mtdi-daemon and galaxy-migrate. Also update the cdssync workspace instructions to document how to use the monitoring helper.
This commit is contained in:
@@ -32,6 +32,15 @@ For migration test datasets in this workspace, follow this process by default:
|
||||
- `acl` installed for `setfacl` and `getfacl`
|
||||
- `attr` installed for `setfattr` and `getfattr`
|
||||
|
||||
## Monitoring Helper
|
||||
|
||||
- Use `/home/aw/code/cds/cdssync/monitor_mtdi_galaxy.sh` when the user wants CPU and memory logging for `mtdi-daemon` and `galaxy-migrate`.
|
||||
- The script accepts:
|
||||
- `INTERVAL_SECONDS`, default `10`
|
||||
- `LOG_FILE`, default `/root/monitor_mtdi_galaxy.log`
|
||||
- A common remote run pattern is:
|
||||
- `nohup /root/monitor_mtdi_galaxy.sh 10 /root/monitor_mtdi_galaxy.log >/dev/null 2>&1 </dev/null &`
|
||||
|
||||
## Copy Rules
|
||||
|
||||
- Use `rsync -aHAX` by default when copying the dataset to another machine.
|
||||
|
||||
26
cdssync/monitor_mtdi_galaxy.sh
Normal file
26
cdssync/monitor_mtdi_galaxy.sh
Normal file
@@ -0,0 +1,26 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
INTERVAL_SECONDS=${1:-10}
|
||||
LOG_FILE=${2:-/root/monitor_mtdi_galaxy.log}
|
||||
|
||||
if ! [[ "$INTERVAL_SECONDS" =~ ^[0-9]+$ ]] || (( INTERVAL_SECONDS <= 0 )); then
|
||||
echo "Usage: $0 [INTERVAL_SECONDS] [LOG_FILE]" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "# monitor start $(date '+%Y-%m-%d %H:%M:%S') interval=${INTERVAL_SECONDS}s" >>"$LOG_FILE"
|
||||
|
||||
while true; do
|
||||
{
|
||||
echo "=== $(date '+%Y-%m-%d %H:%M:%S') ==="
|
||||
ps -eo pid,ppid,comm,args,%cpu,%mem,rss,vsz --sort=-%cpu | \
|
||||
awk '/mtdi-daemon|galaxy-migrate/ && $0 !~ /awk/ {
|
||||
printf "pid=%s ppid=%s comm=%s cpu=%s mem=%s rss_kb=%s vsz_kb=%s cmd=%s\n",
|
||||
$1, $2, $3, $5, $6, $7, $8, substr($0, index($0, $4))
|
||||
}'
|
||||
echo
|
||||
} >>"$LOG_FILE"
|
||||
sleep "$INTERVAL_SECONDS"
|
||||
done
|
||||
Reference in New Issue
Block a user