Gồm 2 script, cơ chế:
– Dọn nén các file .log trong folder chứa log, ở đây là /opt/batch/<tên batch>/log (Điều chỉnh tron file chạy chính)
– Sau khi nén xong thì đưa lên file server để chứa: ở đây đã mount file server vào /net/filesv (cấu hình điều chỉnh trong file thứ cấp)
– Các file log được dọn là các file log có từ tháng trước, và hiện tại không có tiến trình nào đang đọc ghi: ví dụ: hnay là tháng 8 thì sẽ dọn các file có timestamp là tháng 7
File chạy chính, clean_all_log.sh
#!/bin/bash ## find all directory named log or logs and call clean_log_batch.sh script START=$(date +%s) function timecal { END=$(date +%s) TOTAL=$(expr $END - $START) HOURS=$(expr $TOTAL / 3600) MINUTES=$(expr $TOTAL % 3600 / 60) RUNNING_TIME="$MINUTES minute(s)" [[ $HOURS -gt 0 ]]&&{ RUNNING_TIME="$HOURS hour(s), $RUNNING_TIME"; } echo "[INFO] Total time: $RUNNING_TIME" } trap "timecal" EXIT BASEDIR=$(dirname $0) ## batch dir, value must be absolute path BATCH_DIR=/opt/batch [ ! -d $BATCH_DIR ]&&{ echo $BATCH_DIR not found; exit 1; } LOG_DIRS=$(find $BATCH_DIR -type d -name 'log' -or -name 'logs') for LOG_DIR in $LOG_DIRS do echo "[INFO] Clean log path $LOG_DIR" ./clean_log_batch.sh $LOG_DIR done
File thứ cấp: clean_log_batch.sh
#!/bin/bash ## This script requires autofs installed on the running machine ## and read/write on REMOTE_HOST via nfs share ## LOG_DIR must be absolute path ## Log file must have timestamp in its name with format YYYY[.-_]mm[/-_]dd ## Log file must have .log in its name ## Log file must have its name pattern placed before timestamp ## Even so, cover all the log pattern generated by developers on a whim without a standard still be painful LOG_DIR=$1 TEMP_DIR=${LOG_DIR}/temp REMOTE_HOST=filesv REMOTE_FOLDER=/net/${REMOTE_HOST}/drbd/applog/applicationserver/`hostname`${1} MONTHS_TO_KEEP=1 # months to keep log, the older ones will be archived. e.g: if current month is Sep, and month to keep is 1, the logs from August and older will be archived. [[ ! -d $1 ]]&&{ echo "[ERROR] $1 not found";exit 1; } [[ ! -d /net/${REMOTE_HOST} ]]&&{ echo "[ERROR] remote dir /net/${REMOTE_HOST} not found"; exit 1; } [[ ! -d $TEMP_DIR ]]&&{ mkdir -p ${TEMP_DIR}; } # Create temp folder if it doesn't exist TIMELINE=$(date -d "$(date +%Y-%m-15) -$MONTHS_TO_KEEP months" +'%Y%m') cd $LOG_DIR # move all .gz files to temp dir if they 're available in LOG_DIR find . -maxdepth 1 -type f -name "*.gz" -exec mv {} $TEMP_DIR/ \; PATTERNS=$(ls -p | awk -F'[._-]20[0-9][0-9]|20[0-9][0-9]' '/.log/ {print $1}' | uniq | sed 's/\.log//' | grep -v /) for PATTERN in $PATTERNS do RETRY=$(ls $LOG_DIR/${PATTERN}* 2>/dev/null | wc -l) # Times of retry until exit from loop. This value is equal to the number of files with PATTERN in LOG_DIR. OLDEST_TIME_STAMP=$(ls -p | grep -m1 "^${PATTERN}[._-]\{0,1\}\(log[._-]\)\{0,1\}20[0-9][0-9]" 2>/dev/null | sed "s/${PATTERN}//" | tr -cd '[:digit:]') if [[ ${#OLDEST_TIME_STAMP} -lt 8 ]]; then echo "[WARN] Timestamp extracted from file name $OLDEST_TIME_STAMP is invalid. Ignore pattern $PATTERN" continue fi RETRY_TIME=0 # while the 6 first characters of time stamp (e.g: 201607) is greater than current time # and $RETRY_TIME is less than $RETRY while [[ "${OLDEST_TIME_STAMP}" ]]&&[[ ${OLDEST_TIME_STAMP:0:6} -le ${TIMELINE} ]]&&[[ $RETRY_TIME -lt $RETRY ]] do # TIME_STAMP_PATTERN format: YYYY-mm YEAR=${OLDEST_TIME_STAMP:0:4} MONTH=${OLDEST_TIME_STAMP:4:2} TIME_STAMP_PATTERN=${YEAR}-${MONTH} echo "[INFO] Archiving ${PATTERN}_${TIME_STAMP_PATTERN}" # files with PATTERN and TIME_STAMP_PATTERN LOG_FILES=$(find . -maxdepth 1 -type f -regextype sed -regex "\./${PATTERN}[-_.]\{0,1\}${YEAR}[-._]\{0,1\}${MONTH}.*" -or -regextype sed -regex "\./${PATTERN}.*[-_.]\{0,1\}${YEAR}[-_.]\{0,1\}${MONTH}.*") # Check if log files is being used by any process fuser $LOG_FILES && { echo "[WARNING] There 're files being used. Abort archive ${PATTERN}_${TIME_STAMP_PATTERN}" RETRY_TIME=$(($RETRY_TIME + 1)) continue } tar -cvzf ${TEMP_DIR}/${PATTERN}_${YEAR}-${MONTH}.tar.gz ${LOG_FILES} --remove-files # Reset the OLDEST_TIME_STAMP variable OLDEST_TIME_STAMP=$(ls -p | grep -m1 "${PATTERN}[._-]\{0,1\}\(log[._-]\)\{0,1\}20[0-9][0-9]" 2>/dev/null | sed "s/${PATTERN}//" | tr -cd '[:digit:]') RETRY_TIME=$(($RETRY_TIME + 1)) # increase value of RETRY_TIME by 1 done done # Exit if TEMP_DIR empty [[ "$(ls -A $TEMP_DIR)" ]]||{ exit 0; } # copy all archived file from TEMP_DIR to remote host # and delete if copy is successful mkdir -p $REMOTE_FOLDER && \ rsync -have --progress --backup --backup-dir=${REMOTE_FOLDER} --remove-source-files ${TEMP_DIR}/* ${REMOTE_FOLDER} && rmdir ${TEMP_DIR}