;;; org-roam-async.el --- Sync the org-roam-db faster -*- lexical-binding: t; -*-
;; Copyright (C) 2025 Martin Edström
;; This program is free software: you can redistribute it and/or modify
;; it under the terms of the GNU General Public License as published by
;; the Free Software Foundation, either version 3 of the License, or
;; (at your option) any later version.
;; This program is distributed in the hope that it will be useful,
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
;; GNU General Public License for more details.
;; You should have received a copy of the GNU General Public License
;; along with this program. If not, see .
;; Author: Martin Edström
;; URL: https://github.com/meedstrom/org-roam-async
;; Created: 2025-10-06
;; Keywords: org-mode, roam, convenience
;; Package-Requires: ((emacs "29.1") (org-roam "2.3.1") (el-job "2.5.1"))
;;; Commentary:
;; Provide `org-roam-async-db-sync', a faster alternative to `org-roam-db-sync'.
;;; Code:
(require 'cl-lib)
(require 'subr-x)
(require 'org)
(require 'org-element)
(require 'org-roam-db)
(unless (fboundp 'el-job-ng-run)
(error "Update to el-job 2.5.1+ to use org-roam-async"))
(require 'el-job-ng)
(defgroup org-roam-async nil
"Make org-roam async."
:group 'org-roam)
(defcustom org-roam-async-file-name-handler-alist nil
"Override for `file-name-handler-alist'."
:type 'alist)
;;;; STAGE 1: Pick files to update
(defvar org-roam-async--called-from-db-sync nil)
(defvar org-roam-async--time-at-start nil)
(defun org-roam-async-db-sync (&optional force)
(interactive "P")
(el-job-ng-kill 'org-roam-async)
(setq org-roam-async--time-at-start (current-time))
(message "(org-roam) Beginning DB sync...")
(redisplay)
(org-roam-db--close)
(when force (delete-file org-roam-db-location))
(org-roam-db)
(let* ((file-name-handler-alist org-roam-async-file-name-handler-alist)
(gc-cons-threshold org-roam-db-gc-threshold)
(disk-files (org-roam-async--list-files (expand-file-name org-roam-directory)))
(db-mtimes (cl-loop
with tbl = (make-hash-table :test #'equal)
for (file mtime) in (org-roam-db-query
[:select [file mtime] :from files])
do (puthash file mtime tbl)
finally return tbl))
(modified-files nil))
(dolist (file disk-files)
(let* ((disk-mtime (file-attribute-modification-time (file-attributes file))))
(unless (time-equal-p disk-mtime (gethash file db-mtimes))
(push file modified-files)))
(remhash file db-mtimes))
(unless (hash-table-empty-p db-mtimes)
(emacsql-with-transaction (org-roam-db)
(dolist-with-progress-reporter (file (hash-table-keys db-mtimes))
"(org-roam) Clearing removed files..."
(org-roam-db-clear-file file))))
(when modified-files
(message "(org-roam) Processing modified files in the background...")
(redisplay)
(el-job-ng-run :id 'org-roam-async
:require '( org-roam-async )
:inject-vars (org-roam-async--relevant-user-settings)
:inputs modified-files
:funcall-per-input #'org-roam-async--parse-file
:callback #'org-roam-async--insert-into-db)
(org-roam-async--spinner-mode)
(setq org-roam-async--called-from-db-sync t))
(unless modified-files
(message "(org-roam) Synced the DB in total %.2fs"
(float-time (time-since org-roam-async--time-at-start))))))
(defvar org-roam-async--mode-line-refresher
(thread-first (timer-create)
(timer-set-function #'force-mode-line-update)
(timer-set-time 0 0.09)))
(define-minor-mode org-roam-async--spinner-mode
"Mode for showing animation in modeline while subprocesses at work.
Also watches if they take too long, and kills them.
Turns itself off."
:lighter (:eval (format " [%.2fs org-roam-async...]"
(float-time (time-since org-roam-async--time-at-start))))
:global t
:interactive nil
(if org-roam-async--spinner-mode
(let ((cell (assq 'org-roam-async--spinner-mode minor-mode-alist)))
;; Move leftmost for a better chance the user sees the animation
(when cell
(setq minor-mode-alist (assq-delete-all 'org-roam-async--spinner-mode minor-mode-alist))
(push cell minor-mode-alist))
(run-with-timer 1 nil #'org-roam-async--maybe-stop)
(timer-activate org-roam-async--mode-line-refresher))
(cancel-timer org-roam-async--mode-line-refresher)))
(defun org-roam-async--maybe-stop ()
(let ((elapsed (float-time (time-since org-roam-async--time-at-start))))
(if (< elapsed 500)
(if (el-job-ng-busy-p 'org-roam-async)
(run-with-timer 1 nil #'org-roam-async--maybe-stop)
(org-roam-async--spinner-mode 0))
(org-roam-async--spinner-mode 0)
(el-job-ng-kill-keep-bufs 'org-roam-async)
(message "(org-roam) Killed DB-sync because it took %.2fs" elapsed))))
;; NOTE: Cannot yet inject `org-roam-db-node-include-function' thru el-job-ng.
(defun org-roam-async--relevant-user-settings ()
(list (cons 'org-roam-db-extra-links-elements org-roam-db-extra-links-elements)
(cons 'org-roam-db-extra-links-exclude-keys org-roam-db-extra-links-exclude-keys)
(cons 'org-roam-directory org-roam-directory)
(cons 'org-roam-db-gc-threshold org-roam-db-gc-threshold)
(cons 'org-roam-async-file-name-handler-alist org-roam-async-file-name-handler-alist)))
;;;; STAGE 2: Work in child processes
(defvar org-roam-async--stored-queries nil
"List of \((SQL ARGS...) (SQL ARGS...) ...).")
(defun org-roam-async--store-query (&rest arglist)
"Add ARGLIST to `org-roam-async--stored-queries'."
(push arglist org-roam-async--stored-queries))
(defun org-roam-async--store-query! (_handler &rest arglist)
"Add ARGLIST to `org-roam-async--stored-queries'.
Ignore first arg HANDLER because this expects to run in a subprocess."
(apply #'org-roam-async--store-query arglist))
(defvar org-roam-async--hashes-tbl (make-hash-table :test #'equal))
(defvar org-roam-async--attrs-tbl (make-hash-table :test #'equal))
(defun org-roam-async--init-work-buffer (files)
"Read FILES into buffers and return an Org work buffer."
;; Pre-read all files, in case file-names change during parsing.
(save-current-buffer
(dolist (file files)
(set-buffer (get-buffer-create file t))
(cl-assert (and (bobp) (eobp) (eq major-mode 'fundamental-mode)))
(insert-file-contents file)
;; TODO: Profile with the empty string instead.
;; (puthash file "" org-roam-async--hashes-tbl)
(puthash file (org-roam-db--file-hash file) org-roam-async--hashes-tbl)
(puthash file (file-attributes file) org-roam-async--attrs-tbl)))
;; Enable `org-mode' only once for this process.
(with-current-buffer (get-buffer-create "*org-roam-async scratch*" t)
(let ((org-element-cache-persistent nil)
(org-agenda-files nil)
(org-inhibit-startup t))
(delay-mode-hooks
(org-mode)))
(setq-local org-element-cache-persistent nil)
(current-buffer)))
;; Called for every file (by `el-job-ng--child-work')
(defvar org-roam-async--work-buf nil)
(defun org-roam-async--parse-file (file rest)
"Parse FILE and return a list of EmacSQL queries reflecting it.
REST is the remaining files for this subprocess."
(let ((file-name-handler-alist org-roam-async-file-name-handler-alist)
(gc-cons-threshold org-roam-db-gc-threshold))
(unless (eq org-roam-async--work-buf (current-buffer))
(switch-to-buffer
(setq org-roam-async--work-buf
(org-roam-async--init-work-buffer (cons file rest)))))
(erase-buffer)
(org-element-cache-reset) ;; TODO: Profile with cache disabled.
(insert-buffer-substring (get-buffer file))
;; HACK: Simulate a file-visiting buffer.
(let ((buffer-file-name file)
(default-directory (file-name-directory file)))
(org-roam-async--mk-sql-queries))))
(defun org-roam-async--mk-sql-queries ()
"The meat of what was `org-roam-db-update-file'."
(require 'org-ref nil t)
(require 'oc)
(org-set-regexps-and-options 'tags-only)
(setq org-roam-async--stored-queries nil)
(org-roam-async--store-query [:delete :from files :where (= file $s1)]
buffer-file-name)
(org-roam-async--store-file-query)
(cl-letf* (((symbol-function #'org-roam-db-query) #'org-roam-async--store-query)
((symbol-function #'org-roam-db-query!) #'org-roam-async--store-query!))
(org-roam-db-insert-file-node)
(setq org-outline-path-cache nil) ;; REVIEW: Why?
(org-roam-db-map-nodes (list #'org-roam-db-insert-node-data
#'org-roam-db-insert-aliases
#'org-roam-db-insert-tags
#'org-roam-db-insert-refs))
(setq org-outline-path-cache nil) ;; REVIEW: Why?
(let ((info (org-element-parse-buffer))) ;; REVIEW: Why?
(org-roam-db-map-links (list #'org-roam-db-insert-link))
(org-roam-db-map-citations info (list #'org-roam-db-insert-citation))))
;; Put deletion queries before insertion queries.
(nreverse org-roam-async--stored-queries))
;; A work-around because our `org-roam-async--init-work-buffer' pre-reads all
;; files before we begin any parsing, so we should not use
;; `org-roam-db-insert-file' during parsing, as it re-accesses the filesystem.
(defun org-roam-async--store-file-query (&optional _)
"Like `org-roam-db-insert-file', but avoid the filesystem."
(let* ((file (buffer-file-name))
(file-title (org-roam-db--file-title))
(attr (gethash file org-roam-async--attrs-tbl))
(atime (file-attribute-access-time attr))
(mtime (file-attribute-modification-time attr))
(hash (gethash file org-roam-async--hashes-tbl)))
(org-roam-async--store-query
[:insert :into files
:values $v1]
(list (vector file file-title hash atime mtime)))))
;;; STAGE 3: All children returned, process their combined results
(defvar org-roam-async--last-queries nil)
(defun org-roam-async--insert-into-db (outputs)
(setq org-roam-async--last-queries outputs) ;; inspect this for fun
(let ((n-files (length outputs))
(n-queries (apply #'+ (mapcar #'length outputs)))
(ctr 0)
(gc-cons-threshold org-roam-db-gc-threshold))
(emacsql-with-transaction (org-roam-db)
(dolist (arg-sets outputs)
(when org-roam-async--called-from-db-sync
(message "(org-roam) Running %d SQL queries... (for %d/%d files)"
n-queries (cl-incf ctr) n-files))
(dolist (args arg-sets)
(apply #'org-roam-db-query args))))
(when org-roam-async--called-from-db-sync
(message "(org-roam) Synced the DB in total %.2fs"
(float-time (time-since org-roam-async--time-at-start))))
(setq org-roam-async--called-from-db-sync nil)))
;;; AFTER-SAVE-HOOK
(defvar org-roam-async--queue-timer (timer-create))
(defvar org-roam-async--queue nil)
(defun org-roam-async--enqueue (files)
(dolist (file (ensure-list files))
(push file org-roam-async--queue))
(cancel-timer org-roam-async--queue-timer)
(setq org-roam-async--queue-timer
(run-with-timer 1 nil #'org-roam-async--try-update)))
(defun org-roam-async--try-update-on-save-h ()
"An alternative to `org-roam-db-autosync--try-update-on-save-h'."
(let ((file (buffer-file-name (buffer-base-buffer))))
(when (and file org-roam-db-update-on-save)
(org-roam-async--try-update (list file)))))
(defun org-roam-async--try-update (&optional files)
(setq files (delete-dups (append org-roam-async--queue (ensure-list files))))
(setq org-roam-async--queue nil)
(when files
(if (el-job-ng-busy-p 'org-roam-async)
(org-roam-async--enqueue files)
;; (message "Updating files: %S" files)
(el-job-ng-run :id 'org-roam-async
:require '( org-roam-async )
:inject-vars (org-roam-async--relevant-user-settings)
:inputs files
:funcall-per-input #'org-roam-async--parse-file
:callback #'org-roam-async--insert-into-db))))
;;; OPTIONAL STUFF
(defun org-roam-async-open-db ()
"Browse the DB contents."
(interactive)
(require 'sqlite-mode)
(sqlite-mode-open-file org-roam-db-location))
;;;; Fix "finalizer failed: wrong type argument sqlitep nil"
;; It seems to affect nothing, it's just noise.
(defun org-roam-async@emacsql-close (connection &rest _args)
"Prevent calling emacsql-close if connection handle is nil."
(when (oref connection handle)
t))
(advice-add 'emacsql-close :before-while #'org-roam-async@emacsql-close)
;;;; Faster `org-roam-list-files'
;; Because that thing takes 5 full seconds on a SSD...
;; Remove this section if PR gets accepted:
;; https://github.com/org-roam/org-roam/pull/2546
(defvar org-roam-async--suffixes nil)
(defvar org-roam-async--suffixes-re nil)
(defun org-roam-async--recalc-suffixes ()
(setq org-roam-async--suffixes
(cl-loop for ext in org-roam-file-extensions
append (list (concat "." ext)
(concat "." ext ".age")
(concat "." ext ".gpg"))))
(setq org-roam-async--suffixes-re
(rx (regexp (regexp-opt org-roam-async--suffixes)) eos)))
(defun org-roam-async--list-files (dir)
"Replacement for `org-roam--list-files'."
(org-roam-async--recalc-suffixes)
(cl-loop for file in (directory-files-recursively
dir
org-roam-async--suffixes-re
nil
nil
t)
when (and (file-readable-p file)
(org-roam-async--roam-file-p file))
collect file))
(defun org-roam-async--roam-file-p (&optional file)
"Replacement for `org-roam-file-p'."
(and (setq file (or file (buffer-file-name (buffer-base-buffer))))
(cl-loop for suffix in org-roam-async--suffixes
thereis (string-suffix-p suffix file))
(cl-loop for exclude-re in (ensure-list org-roam-file-exclude-regexp)
never (string-match-p exclude-re file))
(let ((file-name-handler-alist org-roam-async-file-name-handler-alist))
(file-in-directory-p file org-roam-directory))))
(provide 'org-roam-async)
;;; org-roam-async.el ends here