# vim: ts=4:sw=4:expandtab # -*- coding: UTF-8 -*- # BleachBit # Copyright (C) 2008-2018 Andrew Ziem # https://www.bleachbit.org # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . """ Scan directory tree for files to delete """ from __future__ import absolute_import, print_function import logging import os import platform import re import unicodedata import bleachbit UTF8 = 'utf-8' def to_unicode(s): """ Converts non-unicode UTF-8 string to unicode obj. Does nothing if string is already unicode. """ return s if isinstance(s, unicode) else unicode(s, UTF8) def normalized_walk(top, **kwargs): """ macOS uses decomposed UTF-8 to store filenames. This functions is like `os.walk` but recomposes those decomposed filenames on macOS """ if 'Darwin' == platform.system(): for dirpath, dirnames, filenames in os.walk(top, **kwargs): yield dirpath, dirnames, [ unicodedata.normalize('NFC', to_unicode(fn)).encode(UTF8) for fn in filenames ] else: if os.name == 'nt': # NTFS stores files as Unicode, and this makes os.walk() return # Unicode. top2 = unicode(top) else: # On Linux the file system encoding may be UTF-8, but deal with # bytestrings to avoid potential UnicodeDecodeError in # posixpath.join() top2 = str(top) for result in os.walk(top2, **kwargs): yield result class DeepScan: """Advanced directory tree scan""" def __init__(self): self.roots = [] self.searches = {} def add_search(self, dirname, regex): """Starting in dirname, look for files matching regex""" if dirname not in self.searches: self.searches[dirname] = [regex] else: self.searches[dirname].append(regex) def scan(self): """Perform requested searches and yield each match""" logging.getLogger(__name__).debug('DeepScan.scan: searches=%s', str(self.searches)) import time yield_time = time.time() for (top, regexes) in self.searches.items(): for (dirpath, dirnames, filenames) in normalized_walk(top): for regex in regexes: # fixme, don't match filename twice r = re.compile(regex) for filename in filenames: if r.search(filename): full_path = os.path.join(dirpath, filename) if isinstance(full_path, str): # Convert path to Unicode. full_path = full_path.decode(bleachbit.FSE) yield full_path if time.time() - yield_time > 0.25: # allow GTK+ to process the idle loop yield True yield_time = time.time()