Script nén log file, sau đó move sang thư mục backup

Gồm 2 script, cơ chế:
– Dọn nén các file .log trong folder chứa log, ở đây là /opt/batch/<tên batch>/log (Điều chỉnh tron file chạy chính)
– Sau khi nén xong thì đưa lên file server để chứa: ở đây đã mount file server vào /net/filesv (cấu hình điều chỉnh trong file thứ cấp)
– Các file log được dọn là các file log có từ tháng trước, và hiện tại không có tiến trình nào đang đọc ghi: ví dụ: hnay là tháng 8 thì sẽ dọn các file có timestamp là tháng 7

File chạy chính, clean_all_log.sh

#!/bin/bash

## find all directory named log or logs and call clean_log_batch.sh script 


START=$(date +%s)

function timecal {
        END=$(date +%s)
        TOTAL=$(expr $END - $START)
        HOURS=$(expr $TOTAL / 3600)
        MINUTES=$(expr $TOTAL % 3600 / 60)
        RUNNING_TIME="$MINUTES minute(s)"
        [[ $HOURS -gt 0 ]]&&{ RUNNING_TIME="$HOURS hour(s), $RUNNING_TIME"; }
        echo "[INFO] Total time: $RUNNING_TIME"
}

trap "timecal" EXIT

BASEDIR=$(dirname $0)
## batch dir, value must be absolute path
BATCH_DIR=/opt/batch
[ ! -d $BATCH_DIR ]&&{ echo $BATCH_DIR not found; exit 1; }
LOG_DIRS=$(find $BATCH_DIR -type d -name 'log' -or -name 'logs')
for LOG_DIR in $LOG_DIRS
do
	echo "[INFO] Clean log path $LOG_DIR"
	./clean_log_batch.sh $LOG_DIR	
done

File thứ cấp: clean_log_batch.sh

#!/bin/bash

## This script requires autofs installed on the running machine
## and read/write on REMOTE_HOST via nfs share
## LOG_DIR must be absolute path

## Log file must have timestamp in its name with format YYYY[.-_]mm[/-_]dd
## Log file must have .log in its name
## Log file must have its name pattern placed before timestamp
## Even so, cover all the log pattern generated by developers on a whim without a standard still be painful

LOG_DIR=$1
TEMP_DIR=${LOG_DIR}/temp
REMOTE_HOST=filesv
REMOTE_FOLDER=/net/${REMOTE_HOST}/drbd/applog/applicationserver/`hostname`${1}
MONTHS_TO_KEEP=1  # months to keep log, the older ones will be archived. e.g: if current month is Sep, and month to keep is 1, the logs from August and older will be archived.

[[ ! -d $1 ]]&&{ echo "[ERROR] $1 not found";exit 1; }
[[ ! -d /net/${REMOTE_HOST} ]]&&{ echo "[ERROR] remote dir /net/${REMOTE_HOST} not found"; exit 1; }
[[ ! -d $TEMP_DIR ]]&&{ mkdir -p ${TEMP_DIR}; }   # Create temp folder if it doesn't exist
TIMELINE=$(date -d "$(date +%Y-%m-15) -$MONTHS_TO_KEEP months" +'%Y%m')

cd $LOG_DIR

# move all .gz files to temp dir if they 're available in LOG_DIR
find . -maxdepth 1 -type f -name "*.gz" -exec mv {} $TEMP_DIR/ \;

PATTERNS=$(ls -p | awk -F'[._-]20[0-9][0-9]|20[0-9][0-9]' '/.log/ {print $1}' | uniq | sed 's/\.log//' | grep -v /)
for PATTERN in $PATTERNS
do
 RETRY=$(ls $LOG_DIR/${PATTERN}* 2>/dev/null | wc -l) # Times of retry until exit from loop. This value is equal to the number of files with PATTERN in LOG_DIR. 
 OLDEST_TIME_STAMP=$(ls -p | grep -m1 "^${PATTERN}[._-]\{0,1\}\(log[._-]\)\{0,1\}20[0-9][0-9]" 2>/dev/null | sed "s/${PATTERN}//" | tr -cd '[:digit:]')
 if [[ ${#OLDEST_TIME_STAMP} -lt 8 ]]; then
  echo "[WARN] Timestamp extracted from file name $OLDEST_TIME_STAMP is invalid. Ignore pattern $PATTERN"
  continue
 fi
 RETRY_TIME=0
# while the 6 first characters of time stamp (e.g: 201607) is greater than current time
# and $RETRY_TIME is less than $RETRY
 while [[ "${OLDEST_TIME_STAMP}" ]]&&[[ ${OLDEST_TIME_STAMP:0:6} -le ${TIMELINE} ]]&&[[ $RETRY_TIME -lt $RETRY ]]
 do
# TIME_STAMP_PATTERN format: YYYY-mm
  YEAR=${OLDEST_TIME_STAMP:0:4}
  MONTH=${OLDEST_TIME_STAMP:4:2}
  TIME_STAMP_PATTERN=${YEAR}-${MONTH}
  echo "[INFO] Archiving ${PATTERN}_${TIME_STAMP_PATTERN}"
# files with PATTERN and TIME_STAMP_PATTERN
  LOG_FILES=$(find . -maxdepth 1 -type f -regextype sed -regex "\./${PATTERN}[-_.]\{0,1\}${YEAR}[-._]\{0,1\}${MONTH}.*" -or -regextype sed -regex "\./${PATTERN}.*[-_.]\{0,1\}${YEAR}[-_.]\{0,1\}${MONTH}.*")
# Check if log files is being used by any process
  fuser $LOG_FILES && {
    echo "[WARNING] There 're files being used. Abort archive ${PATTERN}_${TIME_STAMP_PATTERN}"
    RETRY_TIME=$(($RETRY_TIME + 1))
    continue
  }
  tar -cvzf ${TEMP_DIR}/${PATTERN}_${YEAR}-${MONTH}.tar.gz ${LOG_FILES} --remove-files
# Reset the OLDEST_TIME_STAMP variable
  OLDEST_TIME_STAMP=$(ls -p | grep -m1 "${PATTERN}[._-]\{0,1\}\(log[._-]\)\{0,1\}20[0-9][0-9]" 2>/dev/null | sed "s/${PATTERN}//" | tr -cd '[:digit:]')
  RETRY_TIME=$(($RETRY_TIME + 1)) # increase value of RETRY_TIME by 1
 done
done
# Exit if TEMP_DIR empty
[[ "$(ls -A $TEMP_DIR)" ]]||{ exit 0; }
# copy all archived file from TEMP_DIR to remote host
# and delete if copy is successful
mkdir -p $REMOTE_FOLDER && \
rsync -have --progress --backup --backup-dir=${REMOTE_FOLDER} --remove-source-files ${TEMP_DIR}/* ${REMOTE_FOLDER} && rmdir ${TEMP_DIR}