#!/usr/bin/env python3 # Copyright (c) 2016, Antonio SJ Musumeci # # Permission to use, copy, modify, and/or distribute this software for any # purpose with or without fee is hereby granted, provided that the above # copyright notice and this permission notice appear in all copies. # # THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES # WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF # MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR # ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES # WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN # ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF # OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. import argparse import ctypes import errno import fnmatch import io import os import shlex import stat import subprocess import sys _libc = ctypes.CDLL("libc.so.6",use_errno=True) _lgetxattr = _libc.lgetxattr _lgetxattr.argtypes = [ctypes.c_char_p,ctypes.c_char_p,ctypes.c_void_p,ctypes.c_size_t] def lgetxattr(path,name): if type(path) == str: path = path.encode(errors='backslashreplace') if type(name) == str: name = name.encode(errors='backslashreplace') length = 64 while True: buf = ctypes.create_string_buffer(length) res = _lgetxattr(path,name,buf,ctypes.c_size_t(length)) if res >= 0: return buf.raw[0:res] else: err = ctypes.get_errno() if err == errno.ERANGE: length *= 2 elif err == errno.ENODATA: return None else: raise IOError(err,os.strerror(err),path) def xattr_relpath(fullpath): return lgetxattr(fullpath,'user.mergerfs.relpath').decode(errors='backslashreplace') def xattr_basepath(fullpath): return lgetxattr(fullpath,'user.mergerfs.basepath').decode(errors='backslashreplace') def ismergerfs(path): try: lgetxattr(path,'user.mergerfs.version') return True except IOError as e: return False def mergerfs_control_file(basedir): if basedir == '/': return None ctrlfile = os.path.join(basedir,'.mergerfs') if os.path.exists(ctrlfile): return ctrlfile else: dirname = os.path.dirname(basedir) return mergerfs_control_file(dirname) def mergerfs_srcmounts(ctrlfile): srcmounts = lgetxattr(ctrlfile,'user.mergerfs.srcmounts') srcmounts = srcmounts.decode(errors='backslashreplace').split(':') return srcmounts def match(filename,matches): for match in matches: if fnmatch.fnmatch(filename,match): return True return False def execute_cmd(args): return subprocess.call(args) def print_args(args): quoted = [shlex.quote(arg) for arg in args] print(' '.join(quoted)) def human_to_bytes(s): m = s[-1] if m == 'K': i = int(s[0:-1]) * 1024 elif m == 'M': i = int(s[0:-1]) * 1024 * 1024 elif m == 'G': i = int(s[0:-1]) * 1024 * 1024 * 1024 elif m == 'T': i = int(s[0:-1]) * 1024 * 1024 * 1024 * 1024 else: i = int(s) return i def get_stats(branches): sizes = {} for branch in branches: vfs = os.statvfs(branch) sizes[branch] = vfs.f_bavail * vfs.f_frsize return sizes def build_move_file(src,tgt,rel): rel = rel.strip('/') srcpath = os.path.join(src,'./',rel) tgtpath = tgt.rstrip('/') + '/' return ['rsync', '-avHAXWE', '--numeric-ids', '--progress', '--relative', '--remove-source-files', srcpath, tgtpath] def print_help(): help = \ ''' usage: mergerfs.consolidate [] Consolidate files in a single mergerfs directory onto a single drive. positional arguments: dir starting directory optional arguments: -m, --max-files= Skip directories with more than N files. (default: 256) -M, --max-size= Skip directories with files adding up to more than N. (default: 16G) -I, --include-path= fnmatch compatible path include filter. Can be used multiple times. -E, --exclude-path= fnmatch compatible path exclude filter. Can be used multiple times. -e, --execute Execute `rsync` commands as well as print them. -h, --help Print this help. ''' print(help) def buildargparser(): parser = argparse.ArgumentParser(add_help=False) parser.add_argument('dir', type=str, nargs='?', default=None) parser.add_argument('-m','--max-files', dest='max_files', type=int, default=256) parser.add_argument('-M','--max-size', dest='max_size', type=human_to_bytes, default='16G') parser.add_argument('-I','--include-path', dest='includepath', type=str, action='append', default=[]) parser.add_argument('-E','--exclude-path', dest='excludepath', type=str, action='append', default=[]) parser.add_argument('-e','--execute', dest='execute', action='store_true') parser.add_argument('-h','--help', action='store_true') return parser def main(): sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf8', errors='backslashreplace', line_buffering=True) sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding='utf8', errors='backslashreplace', line_buffering=True) parser = buildargparser() args = parser.parse_args() if args.help or not args.dir: print_help() sys.exit(0) args.dir = os.path.realpath(args.dir) ctrlfile = mergerfs_control_file(args.dir) if not ismergerfs(ctrlfile): print("%s is not a mergerfs mount" % args.dir) sys.exit(1) basedir = args.dir execute = args.execute max_files = args.max_files max_size = args.max_size path_includes = ['*'] if not args.includepath else args.includepath path_excludes = args.excludepath srcmounts = mergerfs_srcmounts(ctrlfile) mount_stats = get_stats(srcmounts) try: for (root,dirs,files) in os.walk(basedir): if len(files) <= 1: continue if len(files) > max_files: continue if match(root,path_excludes): continue if not match(root,path_includes): continue total_size = 0 file_stats = {} for file in files: fullpath = os.path.join(root,file) st = os.lstat(fullpath) if not stat.S_ISREG(st.st_mode): continue total_size += st.st_size file_stats[fullpath] = st if total_size >= max_size: continue tgtpath = sorted(mount_stats.items(),key=lambda x: x[1],reverse=True)[0][0] for (fullpath,st) in sorted(file_stats.items()): srcpath = xattr_basepath(fullpath) if srcpath == tgtpath: continue relpath = xattr_relpath(fullpath) mount_stats[srcpath] += st.st_size mount_stats[tgtpath] -= st.st_size args = build_move_file(srcpath,tgtpath,relpath) print_args(args) if execute: execute_cmd(args) except (KeyboardInterrupt,BrokenPipeError): pass sys.exit(0) if __name__ == "__main__": main()