import os import argparse import datetime from pathlib import Path import re from jinja2 import Template import requests EXT_EMOJIS = { "folder": "πŸ“", "archive": "πŸ“¦", "licence": "βš–οΈ", "image": "πŸ–ΌοΈ", "3d": "πŸ•ΈοΈ", "font": "πŸ‡¦", "table": "η”°", "data": "{}", "sound": "πŸ’Ώ", "video": "🎞️", "config": "πŸ› οΈ", "code": "<>", "storage": "πŸ’Ύ", "docs": "πŸ“–", "text": "πŸ—’οΈ" } EXT_BY_TYPE = { "archive": {".zip", ".gz", ".tar", ".7z", ".rar", ".bz2", ".lz", ".xz", ".z", ".tar.gz", ".tar.bz2", ".tar.xz", ".tar.z"}, "licence": {".lic", "licence", ".license", "licence.md", "licence.txt", "licence.rtf", "copying", "copying.md", "copying.txt", "copying.rtf", "copyright", "copyright.md", "copyright.txt", "copyright.rtf"}, "image": {".jpg", ".jpeg", ".bmp", ".png", ".svg", ".ps", ".lottie", ".riv", ".psd", ".gif", ".tiff", ".webp", ".ico", ".jxr", ".heic", ".bpg", ".avif"}, "3d": {".glsl", ".gltf", ".obj", ".fbx", ".stl", ".amf", ".stp"}, "font": {".ttf", ".otf", ".woff", ".woff2", ".eot", ".afm", ".dfont", ".pfa", ".pfb", ".pfm", ".suit", ".mf", ".cff", ".cid", ".otc", ".ttc", ".fnt", ".fon", ".bdf", ".pfr", ".sfd", ".otb", ".ttx", ".amfm", ".acfm"}, "data": {".json", ".pickle", ".proto", ".xml"}, "table": {".csv", ".tsv", ".tab", ".xls", ".xlsx", ".xlsm", ".ods", ".db", ".dbf", ".sqlite", ".sqlite3", ".sql", ".mdb", ".accdb", ".dta", ".sav", ".sas7bdat", ".rds", ".rdata", ".feather", ".arrow", ".hdf5", ".h5", ".nc", ".nc4", ".nc4c", ".zarr", ".grb", ".grib", ".grib2", ".orc", ".pqt", ".parquet", ".avro"}, "sound": {".mp3", ".wav", ".aiff", ".au", ".flac", ".m4a", ".ogg", ".wma", ".aac", ".alac", ".aif", ".ra", ".ram", ".opus", ".mp2", ".amr", ".dss", ".gsm", ".m3u", ".pls", ".mid", ".midi", ".rmi", ".vqf"}, "video": {".mp4", ".avi", ".mkv", ".webm", ".mov", ".flv", ".vob", ".ogv", ".ogg", ".drc", ".mng", ".qt", ".wmv", ".yuv", ".rm", ".rmvb", ".viv", ".asf", ".amv", ".m4p", ".m4v", ".mpg", ".mp2", ".mpeg", ".mpe", ".mpv", ".m2v", ".svi", ".3gp", ".3g2", ".mxf", ".roq", ".nsv", ".f4v", ".f4p", ".f4a", ".f4b", ".mts", ".m2ts", ".divx", ".xvid", ".vcd", ".svcd"}, "config": {".yaml", ".yml", ".toml", ".cfg", ".ini", ".env", ".conf", ".properties"}, "code": {".py", ".js", ".ts", ".java", ".c", ".cpp", ".rs", ".go", ".h", ".hpp", ".html", ".pug", ".jsx", ".tsx", ".vue", ".css", ".sass", ".pcss", ".scss", ".php", ".rb", ".pl", ".sh", ".bash", ".bat", ".ps1", ".ipynb", ".sol", ".sql", ".r", ".swift", ".kt", ".cs", ".erl", ".ex", ".exs", ".f90", ".fs", ".hs", ".jl", ".lua", ".m", ".mli", ".nim", ".pm", ".rake", ".tsql", ".vhd", ".vhdl"}, "docs": {".md", ".markdown", ".tex", ".rst", ".pdf", ".epub", ".mobi", ".djvu", ".fb2", ".lit", ".pdb", ".azw", ".azw3", ".ibooks"}, "text": {".txt", ".docx", ".odt", ".doc", ".rtf", ".asc"} } DEFAULT_EXCLUDES = {".git", ".vscode", ".idea", "node_modules", "__pycache__", "__init__.py", ".DS_Store", ".gitignore", ".gitattributes", ".gitmodules", ".gitkeep", "setup.bash"} def get_type_and_emoji(filename): if os.path.isdir(filename): return "folder", EXT_EMOJIS["folder"] ext = os.path.splitext(filename)[1].lower() basename = os.path.basename(filename).lower() for file_type, extensions in EXT_BY_TYPE.items(): if ext in extensions or basename in extensions: return file_type, EXT_EMOJIS[file_type] return "???", EXT_EMOJIS["text"] def humanize_bytes(size, precision=2): """Converts bytes to human-readable format (KB, MB, GB, etc.).""" power = 2**10 n = 0 power_labels = {0 : '', 1: 'k', 2: 'm', 3: 'g', 4: 't'} while size > power: size /= power n += 1 if power_labels[n] == '': precision = 0 return f"{size:.{precision}f}{power_labels[n]}b" def create_index_html(root, git_root, parent, template, missing_alts={}): files = os.listdir(root) files.sort(key=lambda f: (os.path.isdir(os.path.join(root, f)), -os.path.getsize(os.path.join(root, f)), f.lower())) # Sort folders first, then by size desc, then name asc index_root = git_root index_tree = str(root).rsplit(f"{parent}", maxsplit=1)[1] or "/" index_dir = index_root + (index_tree if index_tree != "/" else "") file_rows = "" for file in files: if file in DEFAULT_EXCLUDES: continue file_path = os.path.join(root, file) if file == "index.html": continue file_type, emoji = get_type_and_emoji(file_path) alt = f"https://github.com/AstrolabDAO/{parent}/tree/main{index_dir}/{file}" if file_type == "folder" else\ f"https://raw.githubusercontent.com/AstrolabDAO/{parent}/main{index_dir}/{file}" # alt_tag = "" if requests.get(alt).status_code == 404 else f" git" alt_tag = "" if file_path in missing_alts else f" git" file_name = f'{emoji}{file}' file_ext = os.path.splitext(file)[1] file_ext = f" ({file_ext})" if file_ext else "" file_size = os.path.getsize(file_path) file_creation_time = datetime.datetime.fromtimestamp(os.path.getctime(file_path)).strftime('%Y-%m-%d %H:%M') file_rows += f"{file_name}{alt_tag}{file_type}{humanize_bytes(file_size)}{file_creation_time}" print(f"Creating index file: {index_dir}") prev = "./index.html" if index_dir != index_root: prev = "." + prev html_content = template.render(prev_route=prev, directory=f"/{parent}{index_dir}"[len(git_root):], file_rows=file_rows) path = os.path.join(root, "index.html") with open(path, "w") as f: f.write(html_content) return path def generate_index_files(root, git_root="/", parent=None, template=None, template_path="index_tpl.html", missing_alts={}): indexed = [] is_root = False stem = str(root).split("/")[-1] parent = parent or stem if stem in DEFAULT_EXCLUDES: return indexed if not template: is_root = True if template_path.startswith("http"): tpl = requests.get(template_path).text # eg. https://cdn.astrolab.fi/libs/index_tpl.html else: template_path = (Path(__file__).parent / template_path) if not template_path.startswith("/") else template_path with open(template_path, "r") as f: tpl = f.read() template = Template(tpl) indexed.append(create_index_html(root=root, git_root=git_root, parent=parent, template=template, missing_alts=missing_alts)) for dirpath, dirnames, filenames in os.walk(root): # selective recursion (os.walk traverses excluded subfolders) dirnames[:] = [d for d in dirnames if d not in DEFAULT_EXCLUDES] for dirname in dirnames: sub_dir = os.path.join(dirpath, dirname) indexed += generate_index_files( root=sub_dir, git_root=git_root, parent=parent, template=template, missing_alts=missing_alts) if is_root: print(f"Indexed {len(indexed)} directories") return indexed if __name__ == "__main__": parser = argparse.ArgumentParser(description="Generate index files for directories") parser.add_argument("-d", "--directory", type=str, help="The root directory to start indexing") args = parser.parse_args() generate_index_files(args.directory)