#!/bin/bash set -eu unset LD_PRELOAD ######################################## # 项目地址:https://github.com/yuzi-ska/Hardlink-Upload-Mirror ######################################## ######################################## # 配置区:按需修改 ######################################## LOG_FILE="/volume/log/build_upload_mirror/mirror.log" LOG_ROTATE_SIZE=$((5 * 1024 * 1024)) LOG_ROTATE_COUNT=5 # 状态目录:保存每组映射的“源文件 -> 目标文件”映射 STATE_DIR="/volume/log/build_upload_mirror/state" # 是否进行短哈希后缀追加,如:"清洗后的主文件名__短哈希.扩展名" USE_HASH_SUFFIX=1 # 计划任务环境里可能找不到 perl,这里写绝对路径 PERL_BIN="/opt/bin/perl" # 填写格式为`源文件位置|目标文件地址` FOLDER_PAIRS=( "/volume/Project|/volume/Project/backup" "/files/Project|/files/Project/backup" ) ######################################## # 初始化 ######################################## mkdir -p "$(dirname "$LOG_FILE")" mkdir -p "$STATE_DIR" touch "$LOG_FILE" rotate_log_if_needed() { [ -f "$LOG_FILE" ] || return 0 local size size="$(stat -c '%s' "$LOG_FILE" 2>/dev/null || echo 0)" [ "$size" -lt "$LOG_ROTATE_SIZE" ] && return 0 local i for ((i=LOG_ROTATE_COUNT-1; i>=1; i--)); do if [ -f "${LOG_FILE}.${i}" ]; then mv -f "${LOG_FILE}.${i}" "${LOG_FILE}.$((i+1))" fi done mv -f "$LOG_FILE" "${LOG_FILE}.1" : > "$LOG_FILE" } log() { rotate_log_if_needed echo "[$(date '+%F %T')] $*" >> "$LOG_FILE" } # 提前检查 perl 是否存在,避免计划任务 silently 出错 if [ ! -x "$PERL_BIN" ]; then log "错误:PERL_BIN 不存在或不可执行:$PERL_BIN" exit 1 fi ######################################## # 名称清洗规则 # 保留:中文、平假名、片假名、英文、数字 ######################################## sanitize_name() { "$PERL_BIN" -CSDA -e ' use utf8; my $s = join("", <>); $s =~ s/[\x{1F000}-\x{1FAFF}\x{2600}-\x{27BF}\x{FE0F}\x{200D}\x{20E3}]//g; $s =~ s/[^\p{Han}\p{Hiragana}\p{Katakana}\p{Latin}\p{Nd}._-]+/_/g; $s =~ s/_+/_/g; $s =~ s/^_+//; $s =~ s/_+$//; $s = "_" if $s eq ""; print $s; ' <<< "$1" } ######################################## # 目录相对路径合法化 ######################################## build_sanitized_rel_path() { local rel="$1" local out="" local part if [ -z "$rel" ] || [ "$rel" = "." ]; then printf '%s' "" return fi IFS='/' read -r -a parts <<< "$rel" for part in "${parts[@]}"; do [ -n "$part" ] || continue out="$out/$(sanitize_name "$part")" done printf '%s' "$out" } ######################################## # 识别分卷压缩 / 复合扩展名 ######################################## detect_archive_suffix() { local base="$1" if [[ "$base" =~ ^(.+)(\.part[0-9]+\.rar)$ ]]; then printf '%s\n%s\n' "${BASH_REMATCH[1]}" "${BASH_REMATCH[2]}" return 0 fi if [[ "$base" =~ ^(.+)(\.(7z|zip|tar)\.[0-9]{3})$ ]]; then printf '%s\n%s\n' "${BASH_REMATCH[1]}" "${BASH_REMATCH[2]}" return 0 fi if [[ "$base" =~ ^(.+)(\.z[0-9]{2})$ ]]; then printf '%s\n%s\n' "${BASH_REMATCH[1]}" "${BASH_REMATCH[2]}" return 0 fi if [[ "$base" =~ ^(.+)(\.r[0-9]{2})$ ]]; then printf '%s\n%s\n' "${BASH_REMATCH[1]}" "${BASH_REMATCH[2]}" return 0 fi if [[ "$base" =~ ^(.+)(\.tar\.(gz|xz|bz2|zst))$ ]]; then printf '%s\n%s\n' "${BASH_REMATCH[1]}" "${BASH_REMATCH[2]}" return 0 fi if [[ "$base" == *.* && "$base" != .* ]]; then printf '%s\n.%s\n' "${base%.*}" "${base##*.}" return 0 fi printf '%s\n\n' "$base" } ######################################## # 文件名合法化 ######################################## safe_target_name() { local base="$1" local parsed local stem local ext local clean_stem parsed="$(detect_archive_suffix "$base")" stem="$(printf '%s\n' "$parsed" | sed -n '1p')" ext="$(printf '%s\n' "$parsed" | sed -n '2p')" clean_stem="$(sanitize_name "$stem")" if [ -z "$clean_stem" ] || [ "$clean_stem" = "_" ]; then clean_stem="$(printf '%s' "$stem" | sed 's/[^[:alnum:][:space:]._-]/_/g; s/[[:space:]]\+/_/g; s/_\+/_/g; s/^_//; s/_$//')" [ -n "$clean_stem" ] || clean_stem="file" fi if [ "$USE_HASH_SUFFIX" = "1" ]; then local short_hash short_hash="$(printf '%s' "$base" | md5sum | awk '{print substr($1,1,6)}')" printf '%s__%s%s' "$clean_stem" "$short_hash" "$ext" else printf '%s%s' "$clean_stem" "$ext" fi } ######################################## # inode / 文件系统 / 路径判断 ######################################## same_inode() { local f1="$1" local f2="$2" local i1 i2 i1="$(stat -c '%d:%i' "$f1" 2>/dev/null || true)" i2="$(stat -c '%d:%i' "$f2" 2>/dev/null || true)" [ -n "$i1" ] && [ "$i1" = "$i2" ] } same_filesystem() { local p1="$1" local p2="$2" mkdir -p "$p1" "$p2" local d1 d2 d1="$(stat -c '%d' "$p1" 2>/dev/null || true)" d2="$(stat -c '%d' "$p2" 2>/dev/null || true)" [ -n "$d1" ] && [ "$d1" = "$d2" ] } is_subpath() { local child="$1" local parent="$2" case "$child" in "$parent"|"${parent}/"*) return 0 ;; *) return 1 ;; esac } same_value() { local f1="$1" local f2="$2" if same_inode "$f1" "$f2"; then return 0 fi local s1 s2 s1="$(stat -c '%s' "$f1" 2>/dev/null || echo -1)" s2="$(stat -c '%s' "$f2" 2>/dev/null || echo -1)" [ "$s1" = "$s2" ] || return 1 cmp -s "$f1" "$f2" } ######################################## # 状态文件 ######################################## pair_state_file() { local src="$1" local dst="$2" local key key="$(printf '%s|%s' "$src" "$dst" | md5sum | awk '{print $1}')" printf '%s/%s.list' "$STATE_DIR" "$key" } ######################################## # 安全删除目标文件 ######################################## safe_remove_target_file() { local target="$1" local dst_root="$2" local src_root="$3" [ -n "$target" ] || return [ -n "$dst_root" ] || return [ -n "$src_root" ] || return dst_root="${dst_root%/}" src_root="${src_root%/}" case "$dst_root" in ""|"/"|"/volume1"|"/volume2"|"/volume3"|"/volume4") log "安全检查未通过,拒绝删除:危险目标目录 $dst_root" return ;; esac if [ "$src_root" = "$dst_root" ]; then log "安全检查未通过,拒绝删除:源目录与目标目录相同 src=$src_root dst=$dst_root" return fi [ -e "$target" ] || return if ! is_subpath "$target" "$dst_root"; then log "安全检查未通过,拒绝删除(不在目标目录内):$target" return fi if is_subpath "$target" "$src_root"; then log "安全检查未通过,拒绝删除(目标路径落入源目录):$target" return fi rm -f -- "$target" log "删除镜像文件:$target" } cleanup_deleted_targets() { local prev_list="$1" local curr_list="$2" local dst_root="$3" local src_root="$4" [ -f "$prev_list" ] || return while IFS=$'\t' read -r old_src old_target; do [ -n "${old_src:-}" ] || continue [ -n "${old_target:-}" ] || continue if [ -e "$old_src" ]; then continue fi if grep -Fq -- "$old_src"$'\t'"$old_target" "$curr_list"; then continue fi safe_remove_target_file "$old_target" "$dst_root" "$src_root" done < "$prev_list" find "$dst_root" -depth -type d -empty -delete 2>/dev/null || true } ######################################## # 处理一组目录映射 ######################################## process_pair() { local src="$1" local dst="$2" local log_dir local state_file local state_file_tmp local current_list local prev_tmp log "开始处理:SRC=$src DST=$dst" if [ ! -d "$src" ]; then log "跳过,不存在的源目录:$src" return fi mkdir -p "$dst" if ! same_filesystem "$src" "$dst"; then log "跳过,源目录与镜像目录不在同一文件系统,无法创建硬链接:SRC=$src DST=$dst" return fi log_dir="$(dirname "$LOG_FILE")" state_file="$(pair_state_file "$src" "$dst")" state_file_tmp="${state_file}.tmp" current_list="$(mktemp)" prev_tmp="$(mktemp)" [ -f "$state_file" ] && cp -f "$state_file" "$prev_tmp" || : > "$prev_tmp" # 1) 创建镜像目录结构 while IFS= read -r -d '' src_dir; do local rel_dir sanitized_rel dst_dir case "$src_dir" in */#recycle|*/#recycle/*|*/@eaDir|*/@eaDir/*) continue ;; esac if is_subpath "$src_dir" "$dst"; then continue fi if is_subpath "$src_dir" "$log_dir"; then continue fi if [ "$src_dir" = "$src" ]; then continue fi rel_dir="${src_dir#"$src"/}" if [ -z "$rel_dir" ] || [ "$rel_dir" = "$src_dir" ]; then continue fi sanitized_rel="$(build_sanitized_rel_path "$rel_dir")" dst_dir="$dst$sanitized_rel" mkdir -p "$dst_dir" done < <(find "$src" -mindepth 1 -type d -print0) # 2) 文件处理 while IFS= read -r -d '' src_file; do local src_dirname rel_dir base sanitized_rel_dir dst_dir dst_file case "$src_file" in */#recycle/*|*/@eaDir/*) continue ;; esac if is_subpath "$src_file" "$dst"; then continue fi if is_subpath "$src_file" "$log_dir"; then continue fi base="${src_file##*/}" src_dirname="${src_file%/*}" if [ "$src_dirname" = "$src" ]; then rel_dir="" else rel_dir="${src_dirname#"$src"/}" if [ "$rel_dir" = "$src_dirname" ]; then rel_dir="" fi fi if [ -n "$rel_dir" ]; then sanitized_rel_dir="$(build_sanitized_rel_path "$rel_dir")" dst_dir="$dst$sanitized_rel_dir" else dst_dir="$dst" fi mkdir -p "$dst_dir" dst_file="$dst_dir/$(safe_target_name "$base")" if [ -e "$dst_file" ]; then if same_inode "$src_file" "$dst_file"; then log "已存在且同 inode,跳过:$src_file -> $dst_file" elif same_value "$src_file" "$dst_file"; then log "内容一致,跳过:$src_file -> $dst_file" else rm -f -- "$dst_file" if ln "$src_file" "$dst_file" 2>>"$LOG_FILE"; then log "内容变化,重建硬链接:$src_file -> $dst_file" else log "重建硬链接失败:$src_file -> $dst_file" fi fi else if ln "$src_file" "$dst_file" 2>>"$LOG_FILE"; then log "新建硬链接:$src_file -> $dst_file" else log "创建硬链接失败:$src_file -> $dst_file" fi fi printf '%s\t%s\n' "$src_file" "$dst_file" >> "$current_list" done < <(find "$src" -type f -print0) sort -u "$current_list" -o "$current_list" # 关键修正:先写 state,再做清理 cp -f "$current_list" "$state_file_tmp" mv -f "$state_file_tmp" "$state_file" cleanup_deleted_targets "$prev_tmp" "$current_list" "$dst" "$src" rm -f "$current_list" "$prev_tmp" log "处理完成:SRC=$src DST=$dst" } ######################################## # 主流程 ######################################## log "========== 脚本开始 ==========" for pair in "${FOLDER_PAIRS[@]}"; do SRC_DIR="${pair%%|*}" DST_DIR="${pair#*|}" SRC_DIR="${SRC_DIR%/}" DST_DIR="${DST_DIR%/}" case "$DST_DIR" in ""|"/"|"/volume1"|"/volume2"|"/volume3"|"/volume4") log "跳过危险目标目录配置:$pair" continue ;; esac if [ -z "$SRC_DIR" ] || [ -z "$DST_DIR" ] || [ "$SRC_DIR" = "$DST_DIR" ]; then log "跳过无效配置:$pair" continue fi process_pair "$SRC_DIR" "$DST_DIR" done log "========== 脚本结束 =========="