# -*- mode: org; orgstrap-cypher: sha256; orgstrap-norm-func-name: orgstrap-norm-func--dprp-1-0; orgstrap-block-checksum: 0fb3361237c1d930ca53055fd40c2d31fcbe309e958e1fcbb24745219be63849; -*-
#+title: ApiNATOMY model RDF export and deployment
# [[orgstrap][jump to the orgstrap block for this file]]
# reminder that num:nil breaks table of contents so if one is nil both should probably be nil
#+options: num:nil toc:nil
#+startup: showall
#+property: header-args:elisp :lexical yes
#+property: header-args :eval no-export

# [[file:./apinatomy.pdf]]
# [[file:./apinatomy.html]]

#+name: orgstrap-shebang
#+begin_src bash :eval never :results none :exports none
set -e "-C" "-e" "-e"
{ null=/dev/null;} > "${null:=/dev/null}"
{ args=;file=;MyInvocation=;__p=$(mktemp -d);touch ${__p}/=;chmod +x ${__p}/=;__op=$PATH;PATH=${__p}:$PATH;} > "${null}"
$file = $MyInvocation.MyCommand.Source
{ file=$0;PATH=$__op;rm ${__p}/=;rmdir ${__p};} > "${null}"
emacs -batch -no-site-file -eval "(let (vc-follow-symlinks) (defun org-restart-font-lock ()) (defun orgstrap--confirm-eval (l _) (not (memq (intern l) '(elisp emacs-lisp)))) (let ((file (pop argv)) enable-local-variables) (find-file-literally file) (end-of-line) (when (eq (char-before) ?\^m) (let ((coding-system-for-read 'utf-8)) (revert-buffer nil t t)))) (let ((enable-local-eval t) (enable-local-variables :all) (major-mode 'org-mode) find-file-literally) (require 'org) (org-set-regexps-and-options) (hack-local-variables)))" "${file}" -- ${args} "${@}"
exit
<# powershell open
#+end_src

* Using this file :noexport:
This is an executable org file! Here is an example of how to use it to build an apinatomy model.
#+begin_src bash
./apinatomy.org --model-id keast-bladder
#+end_src
It +can also+ /will soon be able to/ deploy ttl files.
#+begin_src bash
./apinatomy.org --deploy
#+end_src
* ApiNATOMY to RDF/OWL2 conversion
:PROPERTIES:
:visibility: folded
:END:
** Basic strategy
JSON -> RDF -> OWL2 \\
Conversion from json to rdf should have few if any changes in semantics. \\
Conversion from a direct rdf translation to OWL2 is where the semantic \\
translation of ApiNATOMY structures into OWL2 constructs will happen.
* Server setup
:PROPERTIES:
:visibility: folded
:END:
On the ontology host (read, ttl host, not SciGraph host) you will need the following.
#+begin_src bash :dir /ssh:host-apinat-ttl|sudo:host-apinat-ttl: :eval never
mkdir /var/www/sparc/ApiANTOMY
mkdir /var/www/sparc/ApiANTOMY/archive
mkdir /var/www/sparc/ApiANTOMY/archive/manual
mkdir /var/www/sparc/ApiANTOMY/ontologies
chown -R nginx:nginx /var/www/sparc/ApiANTOMY
#+end_src
* SciGraph pipeline
** Everything
#+begin_src bash :noweb yes :tangle ../bin/apinat-functions.sh
function apinat-full-to-prod () {
    echo this is not ready
    return 1

    apinat-build-all &&
    pushd ~/git/apinatomy-models
    git commit &&
    popd
    echo This would deploy: $(git diff --name-only HEAD~1..HEAD | cut -d'/' -f 1 | sort -u)
    <<&are-you-sure>>
    apinat-deploy-ttls $(git diff --name-only HEAD~1..HEAD | cut -d'/' -f 1 | sort -u)
    ~/git/pyontutils/nifstd/scigraph/bin/run-load-graph-sparc-data &&
    ~/git/pyontutils/nifstd/scigraph/bin/run-deploy-graph-sparc-data
}

function apinat-full-to-dev () {
    apinat-build-all &&
    ~/git/pyontutils/nifstd/scigraph/bin/run-load-graph-sparc-data-dev &&
    echo TODO figure out the dance we have to do for docker may need a separate function
}
#+end_src
** Parallel build
For the time being (until a bit more machinery is in place for
orthauth style configuration in elisp) you can put the path to
=secrets.sxpr= in =~/.emacs.d/orgstrap-init.el= with the following
expression =(setq oa-secrets "/path/to/secrets.sxpr")=.
#+name: &aba
#+begin_src bash :noweb no :tangle ../bin/apinat-functions.sh :mkdirp yes
function apinat-build-all () {
    pushd ~/git/apinatomy-models
    pushd models
    local ids=$(ls -d *)
    popd
    echo $ids | \
    xargs -P10 -r -I {} \
        bash -c 'apinat-build --repo $(pwd) --model-id ${@}'" ${@}" _ {}

    # only add models where the json has changed since the xlsx is opaque
    git add -u $(git status --porcelain | grep json | grep M | cut -d' ' -f3 | cut -d'/' -f1-2)
    popd
}
#+end_src

#+begin_src elisp
(let ((default-directory apinat-model-repo))
  (cl-remove-if-not
   (lambda (s) (string-match ".json$" s))
   (split-string
    (run-command "git" "diff" "--name-only" "HEAD" "HEAD~1" "--" "models"))))
#+end_src

** Export to ttl
#+link: r-apin-mod git:79316499d7987f73a56ce2bc54d07afe91886cd1:

# these link abbreviations should be in the file themselves, or materialized from a common source
# in a way that can be synced, or actually it is probably ok to put them in a setup/startup file
# as long as orgstrap tells you how to get that file
# the gsl local index should not be here in the file, but the local path names can and should be
#+git-share-local: git:79316499d7987f73a56ce2bc54d07afe91886cd1:HEAD: file:~/git/apinatomy-models/
r-apin-mod:HEAD:{model-id}/
#+link: gsx https://docs.google.com/spreadsheets/d/%s/export?format=xlsx
YEAH it DOES support arbitrary locations, with the ~%s~ implicitly at the tail by default

gsx:google-sheet-id

# org
# org-set-regexps-and-options

#+name: flow-to-ttl
#+begin_src elisp :results none
(defvar-local apinat-model-repo "~/git/apinatomy-models/"
  "Path to a local copy of the apinatomy-models repository.")

(defvar-local apinat-converter-command "apinat-converter"
  "Command name or full path to javascript apinatomy converter.")

(defvar-local apinat-exclude-models nil
  "list of model ids is strings to exclude from `all-models'")

(defvar-local apinat-converter-debug nil
  "Dynamic variable used to control debug behavior.")

(defun-local ex-do (&rest nothing)
  "Executor do. Eats the input since it is to be run by the executor."
  ;; maybe use this to resolve the ex-come-from flows?
  ;; while loop would be annoying here
  (yes-or-no-p "Step done? "))

(defun-local ex-label (label) ;; call/cc, prompt, or cl condition handling is likely the abstraction we would want
  "Target acquired!")

(defun-local ex-come-from (label)
  ;; detangling what is going on when using this, there is an implicit assertion
  ;; that a linear set of `ex-do' steps result in the same final state as a series
  ;; of sequential calls, the reason to use come-from is that the conductor program
  ;; can return and go on to the next step as expected (though a post condition)
  ;; measurement ala a contract needs to be confirmed, the conductor continues
  ;; along to the next step, and the extracted and compiled `ex-do' (which is really
  ;; a macro) command uses the come-from to stich the dependency tree back together
  ;; for the real world steps that are most efficient, the problem with this is that
  ;; you have to prove that the ordering for both graphs is compatible, namely that
  ;; for a particular set of sequential calls that there is only a single come-from
  ;; per line, otherwise you have to figure out what it means to have 20 things that
  ;; can all in princilple happen after the completion of a step, which is possible
  ;; but would need a solution
  ;;
  ;; scheduling, queue theory, with the additional notion of affinity, because human
  ;; executors are not interchangable in the same way as a cpu
  ;; I think you model it as generic + specialzed, and you schedule specialized
  ;; first because the pool for those is limited
  "AAAAAAAAA!")

(defmacro apinat--with-model (model-id &rest body)
  (declare (indent defun))
  `(let ((default-directory (expand-file-name
                             (symbol-name ,model-id)
                             (expand-file-name "models" apinat-model-repo)))
         (xlsx    (format "source/%s.xlsx"    model-id))
         (json    (format "source/%s.json"    model-id))
         (jsong   (format "derived/%s.json"   model-id))
         (jsonld  (format "derived/%s.jsonld" model-id))
         (ttl     (format "derived/%s.ttl"    model-id)))
     ,@body))

(defun-local flow-to-ttl (model-id)
  ;; probably need 1 more level of indirection to handle cases where the model-id
  ;; will be put inside the models/ folder which should probably happen sooner rather than later
  ;;(let ((default-directory (expand-file-name model-id (git-share-local r-apin-mod:HEAD:))) ; TODO ))
  '
  (apinat--with-model model-id
    ;; TODO make sure the directories exist
    ;; TODO resume from previous step on failure <- this is critical
    ;; check out `file-newer-than-file-p' as a reasonable approach ala make
    ;; the only issue is how to prevent the xlsx retrieval from notching it up
    ;; every single time, maybe we can compare checksums on the xlsx file?
    ;; TODO push the model id further down the call chain since input
    ;; and output paths are defined by convention
    (funcall (checksum-or #'-mx->)  model-id   xlsx) ; source/{model-id}.xlsx
    ;;; FIXME TODO message about opening the open physiology viewer
    (-xjl-> xlsx json jsonld)
    ;;(funcall (out-or      #'-xj->)  xlsx       json) ; source/{model-id}.xlsx source/{model-id}.json
    ;;(funcall (out-or      #'-jl->)  json     jsonld) ; source/{model-id}.json derived/{model-id}.jsonld
    (funcall (out-or      #'-lt->)  jsonld      ttl) ; derived/{model-id}.jsonld derived/{model-id}.ttl
    )

  (apinat--with-model model-id
    (-mxjjl-> model-id xlsx json jsong jsonld)
    (-lt-> jsonld ttl)))

(defun-local checksum-or (fun)
  (lambda (thing path-out)
    ;;(message "dd: %s" default-directory)
    (if (file-exists-p path-out)
        (let ((path-temp (let ((temporary-file-directory default-directory))
                           ;; keep the temp nearby rather than where ever the tfd is
                           (make-temp-file (concat path-out "._maybe_new_")))))
          (unwind-protect
              (progn
                (funcall fun thing path-temp)
                (let ((checksum-new (securl-path-checksum path-temp))
                      (checksum-old (securl-path-checksum path-out)))
                  (unless (string= checksum-new checksum-old)
                    (rename-file path-temp path-out t))))
            (when (file-exists-p path-temp)
              (delete-file path-temp))))
      (funcall fun thing path-out))))

(defun-local out-or (fun)
  (lambda (path-in path-out)
    (when (and (file-exists-p path-in)
               (or (not (file-exists-p path-out))
                   (and (file-exists-p path-out)
                        (file-newer-than-file-p path-in path-out))))
      (funcall fun path-in path-out))))

;; model id -> some path

(defun-local -mt-> (model-id path-ttl) "`model-id' to `path-ttl'")
(defun-local -ml-> (model-id path-jsonld) "`model-id' to `path-jsonld'")
(defun-local -mxjjl-> (model-id path-xlsx path-json path-jsong path-jsonld)
  "`model-id' to everything except the ttl"
  (let ((path-xlsx (concat default-directory "/" path-xlsx))
        (path-json (concat default-directory "/" path-json))
        (path-jsong (concat default-directory "/" path-jsong))
        (path-jsonld (concat default-directory "/" path-jsonld))
        (path-temp (make-temp-file "apinat-conversion" 'directory)))
    (unwind-protect
        (let* ((default-directory path-temp)
               (google-sheet-id (symbol-name (-ms-> model-id)))
               (path-internal (concat path-temp "/build")) ; should not exist to avoid date suffix
               (_ (let (backtrace-on-error-noninteractive ; we know where the error happens
                        (rc-argv (list
                                  apinat-converter-command
                                  "-f" "id"
                                  "-t" "xlsx"
                                  "-t" "json"
                                  "-t" "json-resources"
                                  "-t" "json-flattened"
                                  (if apinat-converter-debug "-t" "")
                                  (if apinat-converter-debug "json-ld" "")
                                  "-i" google-sheet-id
                                  "-o" path-internal)))
                    (when apinat-converter-debug
                      (message "command: %s" (string-join rc-argv " ")))
                    (apply #'run-command rc-argv))))
          (cl-loop for path in (list path-xlsx path-json path-jsonld) do
                   (let ((parent (file-name-directory path)))
                     (unless (file-directory-p parent)
                       (make-directory parent t))))
          (rename-file (concat path-internal "/" "model.xlsx") path-xlsx t)
          (rename-file (concat path-internal "/" "model.json") path-json t)
          (rename-file (concat path-internal "/" "model-generated.json") path-jsong t)
          (rename-file (concat path-internal "/" "model-flattened.jsonld") path-jsonld t))
      (unless apinat-converter-debug
        (delete-directory path-temp 'recursive)))))

(defun-local -mj-> (model-id path-json) "`model-id' to `path-json'")

;; intermediate steps for model id

(defun-local -m-lt-> (model-id)
  (apinat--with-model model-id
    (funcall (out-or #'-lt->) jsonld ttl)))

(defun-local -m-x-> (model-id)
  (apinat--with-model model-id
    (-mx-> model-id xlsx)))

(defun-local -ms-> (model-id)
  (oa-path :google :sheets (if (keywordp model-id)
                               model-id
                             (intern (format ":%s" model-id)))))

(defun-local -mx-> (model-id path-xlsx)
  ;; automated
  (let* ((google-sheet-id (-ms-> model-id))
         (url (format "https://docs.google.com/spreadsheets/d/%s/export?format=xlsx" google-sheet-id)))
    ' ; it probably makes more sense to implement stuff like this using the condition system?
    ;; in terms of flow control for a DAG you try to do the thing,
    ;; stop at your first error and then go do the dependency? but in
    ;; reality there is often an explicit step where all checks must
    ;; pass before the whole process can continue because of some time
    ;; constraint or similar
    (ex-do (message "Make sure that the permissions are set correctly on %s" url))
    ;; NOTE `url-copy-file' cannot detect login redirects correclty
    ;; google sends a 307 for the download if everything is going to work
    ;; in curl it sends a 302 but never something in the 400 range
    ;; ideally we would be able to (run-command "mimetype" path-xlsx)
    ;; but that requires that users have the mimetype command avaiable
    (url-copy-file url path-xlsx t)))

(defun-local -xj-> (path-xlsx path-json)
  "This is currently a manual step."
  (let (;(open-physiology-viewer "file:///home/tom/git/open-physiology-viewer/dist/test-app/index.html")
        (open-physiology-viewer "https://open-physiology-viewer.surge.sh/"))
    ;; TODO conditional open only if not already
    ;;(browse-url open-physiology-viewer)
    ;;(run-command "google-chrome-unstable" open-physiology-viewer)
    (ex-do (message "open file (left top folder) to upload to viewer from %s" path-xlsx)
           (message "save file (left bottom floppy) to download from viewer to %s" path-json)
           (ex-label 'viewer-after-open))))

(defun-local -jl-> (path-json path-jsonld)
  "Currently a manual step."
  (ex-do (ex-come-from 'viewer-after-open) ; This is amazing.
         ;; Allows decoupling of functional spec from the actual execution in the real world.
         ;; As a bonus we get to use my all time favorite control flow structure.
         (message "export flattened json-ld (right 2nd from bot white doc) to download from viewer to %s"
                  path-jsonld)))

(defun-local -xjl-> (path-xlsx path-json path-jsonld)
  ;; yay automated NOTE requires nodejs and open-physiology-viewer
  (let ((path-xlsx (concat default-directory "/" path-xlsx))
        (path-json (concat default-directory "/" path-json))
        (path-jsonld (concat default-directory "/" path-jsonld))
        (path-temp (make-temp-file "apinat-conversion" 'directory)))
    (unwind-protect
        (let* ((default-directory path-temp)
               (_ (run-command apinat-converter-command "-m" "xlsx" "-i" path-xlsx))
               (output-dir (car (directory-files default-directory nil "converted-*"))))
          ;; '("model-flattened.jsonLD" "model-generated.json" "model.json" "model.jsonLD")
          (rename-file (concat output-dir "/" "model.json") path-json t)
          (rename-file (concat output-dir "/" "model-flattened.jsonLD") path-jsonld t))
      (delete-directory path-temp 'recursive))))

(defun-local -lt-> (path-jsonld path-ttl)
  ;; automated
  (let (backtrace-on-error-noninteractive)
    (run-command (or (executable-find "pypy3") ; beware missing libs
                     (executable-find "python"))
                 "-m" "sparcur.cli" "apinat" path-jsonld path-ttl)))
#+end_src

#+name: all-ttl-models
#+begin_src elisp :results none
(defun-local update-models (model-ids) ; vs &rest model-ids
  ;; FIXME mapcar is inadequate for handling parallel processes that
  ;; might have `ex-do' parts
  (mapcar #'flow-to-ttl model-ids))

(defun-local all-models ()
  ;;(let ((default-directory (git-share-local r-apin-mod:HEAD:)) ; TODO ))
  (let ((default-directory (expand-file-name "models" apinat-model-repo)))
    ;; you could use something like model-repository but then you have to make
    ;; a bunch of concatentations, better just to switch the default directory
    ;; so that the context deals with alignment between name and local referent
    (cl-remove-if (lambda (p)
                    (or (not (file-directory-p p))
                        (string-prefix-p "." p)
                        (member p apinat-exclude-models)
                        (not (file-exists-p (concat p "/source/" p ".json")))))
                  (directory-files default-directory))))

(defun apinat--ttl-newer (model-id)
  (apinat--with-model model-id
    (let ((mtimes
           (mapcar (lambda (p)
                     (string-to-number
                      (format-time-string
                       "%s"
                       (file-attribute-modification-time (file-attributes p)))))
                   (list ttl xlsx))))
      (message "%S" mtimes)
      (and (file-exists-p ttl)
           (apply #'> mtimes)))))

;; TODO apinat--remote-older or something

(defun apinat--all-except (except)
  (cl-remove-if (lambda (id) (memq id except)) (mapcar #'intern (all-models))))

(defun-local filter-recent-models (model-ids)
  (cl-remove-if #'apinat--ttl-newer model-ids))

(defun-local update-all-models (&optional skip-recent)
  (update-models (if skip-recent
                     (filter-recent-models (mapcar #'intern (all-models)))
                   (mapcar #'intern (all-models)))))

(defvar-local apinat-process-results nil)

(defun sentinel (process message &optional stderr-process)
  (when (memq (process-status process) '(exit signal))
    (let ((ex (process-exit-status process))
          (buf (process-buffer process))
          (cmd (process-command process)))
      (setq apinat-process-results
            (cons (list ex (with-current-buffer buf (buffer-string) cmd))
                  apinat-process-results))
      (if (= ex 0)
        (message "completed: %S" process)
        (warn "command failed with %s: %s" ex (string-join cmd " "))
        (warn "stdout: %S stderr: %S"
              (with-current-buffer buf (buffer-string))
              (and stderr-process
                   (with-current-buffer (process-buffer stderr-process)
                     (buffer-string))))))))

(defun-local update-all-models-async ()
  (let ((model-ids (all-models)))
    (message "updating all models %s" model-ids)
    (cl-loop
     for model-id in model-ids
     collect
     ;; TODO consider whether we can somehow use invocation-name invocation-directory so that
     ;; specific versions of emacs are used to run the block instead of always the system version
     (ow-run-command-async "sh" :sentinel #'sentinel (buffer-file-name) "--model-id" model-id "--secrets" oa-secrets "--repo" apinat-model-repo
                           (if apinat-converter-debug "--debug" nil)))
    (while (< (length apinat-process-results) (length model-ids))
      (sleep-for 5)
      (message "complete: %s/%s" (length apinat-process-results) (length model-ids)))))
#+end_src

#+begin_src elisp
;; FIXME do fetch all in one batch so we don't have
;; to wait for the ttl export between each model
(update-all-models t)
' ; or pick your own models
(update-models '(vagus-nerve))
' ; jsonld -> ttl conversion
(-m-lt-> 'vagus-nerve)
#+end_src

if error clone the repo
#+begin_src sh
pushd ~/git
git clone https://github.com/open-physiology/apinatomy-models.git
#+end_src
if model-id error then we need to set the model ids in secrets but in
reality need to overwrite the defniition of ~-ms->~ is easier right
now

missing derived folders
#+begin_src powershell
pushd ~/git/apinatomy-models/
New-Item -Path * -Name derived -ItemType "directory"
#+end_src

#+begin_src bash
pushd ~/git/apinatomy-models/
find -maxdepth 1 -type d -not -path '*.git*' -not -path '.' -exec mkdir {}/derived \;
#+end_src
** Deploy ttl
After running the ttl export define the functions in
ref:deploy-ontology-file and then run ~apinat-deploy-from-ttl
bronchomotor.ttl~. NOTE Both functions need to be defined.

#+name: deploy-ttls
#+begin_src elisp
(defvar apinat--remote-onts-path ; TODO source from config
  "/ssh:cassava|sudo:nginx@cassava:/var/www/sparc/ApiNATOMY/ontologies/")

(defun apinat--write-to-remote (model-id)
  "Deploy a single apinatomy model MODEL-ID to `apinat--remote-onts-path'."
  (apinat--with-model model-id
    (with-current-buffer (find-file-noselect ttl 'no-warn 'rawfile)
      ;; FIXME this has inverted nesting if we want to start depositing
      ;; multiple different models
      (unwind-protect
          (let* (;(version "TODO datetime etc, or better, read it from the ttl file")
                 (version (int-to-string (time-convert nil 'integer)))
                 (version-path-local
                  (concat (symbol-name model-id) "/" version "/" (file-name-nondirectory ttl)))
                 (version-path
                  (expand-file-name
                   version-path-local
                   apinat--remote-onts-path))
                 (latest-path
                  (expand-file-name
                   (file-name-nondirectory ttl)
                   apinat--remote-onts-path))
                 (tramp-histfile-override "/dev/null"))
            ;; if the version already exists, error
            ;; otherwise make it via tramp
            (if (file-directory-p version-path)
                ;; XXX this call to `file-directory-p' outside where
                ;; `tramp-histfile-override' is bound to t is critical
                ;; to prevent the tramp ssh sudo chain from truncating
                ;; your .bash_history file, see
                (error "Version exists!")
              (let ((tramp-histfile-override t))
                ;; system users have no home, tramp will error without override t
                ;; XXX WARNING if `tramp-histfile-override' is set to t and somehow
                ;; the scope of a call affects a local file path or runs on the
                ;; local system it WILL truncate the history file for the current user ;_;
                (make-directory (file-name-directory version-path) 'parents)
                (write-file version-path)
                ;;(message "%S %S" version-path-local latest-path)
                (make-symbolic-link
                 version-path-local
                 latest-path
                 'ok-if-already-exists))))
        (kill-buffer (current-buffer))))))

(defun apinat-deploy-models (model-ids)
  ;; FIXME check already deployed
  (let (fails)
    (cl-loop for model-id in model-ids
             do (condition-case nil
                    (apinat--write-to-remote model-id)
                  (error (push model-id fails))))
    fails))
#+end_src

The current command to deploy all is.
#+begin_src bash
for f in $(ls models/*/derived/*.ttl); do echo apinat-deploy-from-ttl $f; done
#+end_src

Alternately use the following to deploy specific models.

# on deployment server
#+name: apinat-last-deploy-date
#+begin_src bash :dir /ssh:cassava:/var/www/sparc/ApiNATOMY/ontologies :cache yes
date -Is -d "@$(find | awk -F'/' '{ print $3 }' | sort | tail -n 1)"
#+end_src

# on devel
#+header: :var LAST_DEPLOY=apinat-last-deploy-date() REPO=(and (boundp 'apinat-model-repo) (expand-file-name apinat-model-repo))
#+name: apinat-json-changed-since-last-deploy
#+begin_src bash :results drawer
pushd "${REPO}" 2>&1 > /dev/null
git log --name-only --since "${LAST_DEPLOY}" --pretty="format:" | grep json | cut -d'/' -f2 | sort -u
#+end_src

Always run =apinat-deploy-ttls= from inside =apinat-model-repo=
#+begin_src bash
apinat-deploy-ttls $(git diff --name-only HEAD~1..HEAD | cut -d'/' -f 2 | sort -u)
#+end_src

If you add a new model you will need to update the imports in
https://cassava.ucsd.edu/ApiNATOMY/ontologies/sparc-data.ttl.
The update process should be automated as part of the workflows
described here. See also [[file:./../resources/scigraph/ontologies-sparc-data.yaml]].

# [[tramp:/ssh:cassava|sudo:cassava:/var/www/sparc/ApiNATOMY/ontologies/sparc-data.ttl]]

# FIXME it should be possible to implement this whole process
# using OntResIriWrite or something like that
# read the header, lookup the uri -> server file system path
# write the version iri if it doesn exist (otherwise error)
# and symlink it to the remote, I don't have an implementation
# of RemoteUnixPath that could use something like sftp to
# allow direct execution of file operations on a remote path
# from a local python representation of that class so it is
# too big to bite off right now

#+name: deploy-ontology-file
#+begin_src bash :tangle ../bin/apinat-functions.sh
function apinat-remote-operations () {
    local PATH_SOURCE="${1}"
    local PATH_TARGET="${2}"
    local PATH_LINK="${3}"
    local FILE_NAME_TTL=$(basename -- "${PATH_TTL}")
    local DIR_LINK="$(dirname "${PATH_LINK}")"
    local LINK_TARGET="$(realpath -m --relative-to="${DIR_LINK}" "${PATH_TARGET}")"
    mkdir -p "$(dirname "${PATH_TARGET}")"
    chown nginx:nginx "${PATH_SOURCE}"
    mv "${PATH_SOURCE}" "${PATH_TARGET}"
    # FIXME we need to fail if the source path does not exist otherwise we end in broken state
    unlink "${PATH_LINK}"
    ln -s "${LINK_TARGET}" "${PATH_LINK}"
}

function apinat-deploy-from-ttl () {
    # TODO loop over positional argument paths, but retain a single ssh command
    local PATH_TTL="${1}"  # FIXME careful with this, never allow a user to set the source path
    local DATE=$(date +%s)  # FIXME source from the ontology directly? better to spend time implementing OntResIriWrite
    local HOST_APINAT_ONTOLOGY=cassava
    local FILE_NAME_TTL=$(basename -- "${PATH_TTL}")
    local NAME_TTL="${FILE_NAME_TTL%.*}"
    local PATH_REMOTE_TARGET_BASE=/var/www/sparc/ApiNATOMY/ontologies/
    local VERSION_PATH="${NAME_TTL}/${DATE}/${FILE_NAME_TTL}"
    local PATH_REMOTE_SOURCE="/tmp/${FILE_NAME_TTL}"
    local PATH_REMOTE_TARGET="${PATH_REMOTE_TARGET_BASE}${VERSION_PATH}"
    local PATH_REMOTE_LINK="${PATH_REMOTE_TARGET_BASE}${FILE_NAME_TTL}"

    # FIXME also notify host for sudo
    local SUDO_OR_SU='$(command -v sudo 1>& 2 && echo sudo ${0} -c || { echo For su on ${HOSTNAME} 1>& 2; echo su -c; })'

    # TODO ensure that apinat-remote-operations is defined
    rsync --rsh ssh "${PATH_TTL}" ${HOST_APINAT_ONTOLOGY}:"${PATH_REMOTE_SOURCE}"
    ssh -t ${HOST_APINAT_ONTOLOGY} "${SUDO_OR_SU} '$(typeset -f apinat-remote-operations); apinat-remote-operations \
\"${PATH_REMOTE_SOURCE}\" \
\"${PATH_REMOTE_TARGET}\" \
\"${PATH_REMOTE_LINK}\"'"
}

function apinat-deploy-ttls () {
    # TODO do it in batch, derive the timesamps correctly etc.
    for id in $@; do
        apinat-deploy-from-ttl "models/${id}/derived/${id}.ttl"
    done
}
#+end_src

Check [[https://cassava.ucsd.edu/ApiNATOMY/ontologies/]] for success if needed.
# [[tramp:/ssh:cassava|sudo:cassava:/var/www/sparc/ApiNATOMY/ontologies/sparc-data.ttl]]

#+begin_src bash
spc report changes \
--ttl-file https://cassava.ucsd.edu/ApiNATOMY/ontologies/keast-bladder/1620348301/keast-bladder.ttl \
--ttl-compare https://cassava.ucsd.edu/ApiNATOMY/ontologies/keast-bladder/1617055182/keast-bladder.ttl
#+end_src
** Load and deploy graph
Then run
[[file:../../pyontutils/nifstd/scigraph/README.org::#run-load-deploy-graph-sparc-data][run-load-deploy-graph-sparc-data]]
to load and deploy in one shot.

An example run is
#+begin_src bash
~/git/pyontutils/nifstd/scigraph/bin/run-load-graph-sparc-data
~/git/pyontutils/nifstd/scigraph/bin/run-deploy-graph-sparc-data
#+end_src
# TODO consider ob-screen ... for cases like this
# where we aren't really writing bash so much as just
# running commands
** Review query output
[[http://ontology.neuinfo.org/trees/sparc/dynamic/demos/apinat/somas][All somas]]
[[http://ontology.neuinfo.org/trees/sparc/dynamic/demos/apinat/soma-processes][Soma processes]]
[[http://ontology.neuinfo.org/trees/sparc/simple/dynamic/demos/apinat/soma-processes][Soma processes simple]]
* NPO identifiers
:PROPERTIES:
:CUSTOM_ID: npo-identifiers
:END:
** minimal listing
#+begin_src elisp :exports none
(jupyter-repl-restart-kernel) ; and this is why we don't use python kids
#+end_src

Run this and commit the output to the neurons branch of the ontology.

#+name: apinatomy-neuron-populations.py
#+begin_src jupyter-python :session pys
import rdflib
import augpathlib as aug
from pyontutils.core import OntGraph, OntResPath
from pyontutils.config import auth
from pyontutils.namespaces import *
repo_relative_path = 'ttl/generated/neurons/apinatomy-neuron-populations.ttl'
uri_base = 'https://raw.githubusercontent.com/SciCrunch/NIF-Ontology/neurons/'
oid = rdflib.URIRef(uri_base + repo_relative_path)

def _genlabel(oid):
    x = (oid.rsplit("-", 3)[-3]
         if oid.endswith('-prime')
         else (oid.rsplit("-", 3)[-3] if 'unbranched' in oid else oid.rsplit("-", 2)[-2]))
    l = (oid.rsplit("-", 3)[-3]
         if oid.endswith('-prime')
         else (' '.join(oid.rsplit("-", 3)[-3:-1]) if 'unbranched' in oid else oid.rsplit("-", 2)[-2]))
    t = (oid.rsplit("-", 2)[-2] + "'"
         if oid.endswith('-prime')
         else (oid.rsplit("-", 1)[-1] if 'unbranched' in oid else oid.rsplit("-", 1)[-1]))

    # XXX manual fix for consistency
    if x == 'keast':
        x = 'kblad'
    if l == 'keast':
        l = 'kblad'

    label = f'neuron type {l} {t}'
    return label, x

def genlabel(oid):
    return _genlabel(oid)[0]

g = OntGraph()
g.populate_from_triples(
    ((oid, p, o) for p, o in
     ((rdf.type, owl.Ontology),
)))

skip = 'small-intestine',
amr = aug.LocalPath("~/git/apinatomy-models").expanduser()
models = [c for c in (amr / 'models').children if c.name != 'too-map']
mgraphs = []
for m in models:
    if m.name in skip:
        continue
    for p in (m / 'derived' / (m.name + '.ttl'),):
        if p.exists():
            try:
                mgraphs.append(OntResPath(p).graph)
            except Exception as e:
                raise Exception(p) from e

for mg in mgraphs:
    mg.namespace_manager.populate(g)
    for s in mg[:rdf.type:elements.OntologyTerm]:
        if 'readable/neuron-type' in s:  # FIXME haaaack
            g.add((s, rdfs.subClassOf, ilxtr.NeuronEBM))
            g.add((s, rdfs.label, rdflib.Literal(genlabel(s))))

olr = aug.LocalPath(auth.get('ontology-local-repo')).expanduser()
g.write(olr / repo_relative_path)
#+end_src

* Dynamic cypher queries
:PROPERTIES:
:visibility: folded
:END:
NOTE: this section contains temporary instructions.
This should really be done on a development instance of data services.
Sometimes it is faster to edit [[tramp:/ssh:aws-scigraph-data-scigraph:services.yaml]] directly.
Use the following command to restart services to load the updated dynamic queries.
#+begin_src bash :results none
ssh aws-scigraph-data sudo systemctl restart scigraph
#+end_src
When you have a query working as desired add it or update it in
[[file:../resources/scigraph/cypher-resources.yaml][cypher resources]].
# TODO need that local/remote git link ...
See also [[file:../../pyontutils/nifstd/scigraph/README.org::#sparc-data-services-build-deploy][data services build and deploy]].
* Add new ApiNATOMY model to SciGraph load
Edit [[file:../resources/scigraph/sparc-data.ttl][sparc-data.ttl]] and
add a new line to the second =owl:import= statement.
* ApiNATOMY model server specification
:PROPERTIES:
:visibility: folded
:END:
# file is in [[../../pyontutils/nifstd/resolver/apinatomy-resolver.conf]]
See nginx config at https://github.com/tgbugs/pyontutils/blob/master/nifstd/resolver/apinatomy-resolver.conf
** Intro
While an ApiNATOMY server has been on the roadmap for some time, there have not been
clear requirements and use cases to drive the development in a way that is productive.
As the conversion of ApiNATOMY models to RDF has progressed, some of the requirements
and use cases have presented themselves and helped to solidify a set of initial use cases.
The need to integrate knowledge represented in ApiNATOMY into the larger linked data space
provides some initial requirements which are the that the server be able to provide persistent
and resolvable identifiers for ApiNATOMY models, and that it be able to provide high granularity
access to the version history of these models. In addition, we are ultimately aiming for
the server to be able to automatically convert input models or spreadsheets into generated
models and resource maps. We have mapped out three phases for arriving at this end goal.
The first phase is to be able to resolve input models, the second is to be able to upload
and link the generated model and resource map and track which input model they came from.
These two will address our primary short-term needs.

To accomplish this, the plan is to use git (via GitHub) as the primary datastore for the models.
This will allow us to leverage the significant existing infrastructure around GitHub for version
control, collaboration, review, content hosting, and backup. In front of this there will be a
server that provides resolvable persistent identifiers for ApiNATOMY models so that the identifiers
appearing in the linked data graphs will be resolvable and interoperable with the rest of the
NIF-Ontology search and discovery tooling.

In the future as part of the third phase we can work towards automating the conversion of input models,
and it might also be possible to have the server automatically convert and serve the RDF version of the
models as well.

A brief outline of the initial requirements needed to meet the needs of the RDF conversion pipeline
are documented below.
** Architecture diagram
[[file:./images/apinatomy-server-diagram.png]]
Legend.
| Solid lines         | initial  |
| Dashed lines        | soon     |
| Dotted lines        | later    |
| Dashed dotted lines | dataflow |
** https by default
** url structure
*** apinatomy.org
**** /
landing page to maximize engagement

existing index page
links to the various git repositories
youtube introduction
wikipedia page and similar
funding
papers
use cases
**** /viewer
the open-physiology-viewer
a search entry point could also appear here
**** /dashboard
list of anatomical entities that have been used to annotate
number of models etc. from the queries.org file
somehow link this into search?
**** /docs/{page}
***** /docs/source.html
move under /docs
***** /docs/identifiers.html
generated via esdoc, issues with angular components
**** /docs/manual
currently at /manual
out of date
coming from open-physiology-viewer/manual markdown files
**** /uris
alternately https://uri.apinatomy.org
***** /uris/models/{model-id}
need a landing page that has all the model metadata and would allow
users to open the model in the viewer with a single click, maybe even
have a static image of the model rendered on the page
***** /uris/models/{model-id}.{ext}
how to deal with json/ttl and model, generated, map
***** /uris/models/{model-id}/ids/{local-id}
***** /uris/models/{model-id}/snapshot/{snapshot-id}
Implies that snapshots are always associated with a single model, so
that if there are multiple models they should be imported into a
single top level file.

need some way to resolve snapshot files
***** /uris/readable/{string}
***** /uris/elements/{string}
**** TODO json-ld context
*** tests
http://apinatomy.org/docs
http://apinatomy.org/docs/
http://apinatomy.org/docs/manual/usage.html
http://apinatomy.org/viewer
http://apinatomy.org/uris/models/keast-bladder
http://apinatomy.org/uris/ontologies/keast-bladder.ttl
http://apinatomy.org/uris/models/keast-bladder/source/keast-bladder.json
http://apinatomy.org/uris/models/keast-bladder/ref/master/source/keast-bladder.json
http://apinatomy.org/uris/models/keast-bladder/ref/df0dc5f9e96620c8f4deef3727a81868a6606eea/source/keast-bladder.json
http://apinatomy.org/uris/models/keast-bladder/version/1627520229
** transformed models/copies need to be able to point back to the exact commit
for deposition on blackfynn, export to scigraph, etc.
the source model hash needs to be separat
** Serve the JSONLD context
** return authoring metadata
** store the source model
** have endpoint for resource-map and generated
** overlap with loading in the client
*** load all formats from local
*** google sheets import
*** load from a url
* Reporting
#+begin_src python :epilogue "return main()" :exports both
import json
import augpathlib as aug
from pyontutils.core import OntGraph
from pyontutils.namespaces import rdf, owl


def path_json(string):
    with open(string, 'rt') as f:
        return json.load(f)


def main():
    graph = OntGraph()
    apinat_models = aug.RepoPath('~/git/apinatomy-models').expanduser()
    [graph.parse(f) for f in apinat_models.rglob('*.ttl')]
    # rdf
    n_trip = len(graph)
    n_class = len(set(graph[:rdf.type:owl.Class]))
    n_ind = len(set(graph[:rdf.type:owl.NamedIndividual]))
    # json
    js = [path_json(p) for p in apinat_models.rglob('*.json')]
    keys = ('publications', 'nodes', 'links', 'lyphs', 'materials', 'chains', 'groups')
    n_obj = sum([sum([len(j[k])
                      if k in j else 0 for k in keys])
                 for j in js])
    n_pair = sum([sum([sum([len(o) for o in j[k]])
                       if k in j else 0 for k in keys])
                  for j in js])
    print(f'''rdf
trip:  {n_trip}
class: {n_class}
ind:   {n_ind}

json
obj:   {n_obj}
obj:   {n_pair}''')
    return [['Type', 'Authored', 'Expanded'],
            ['Individual', n_obj, n_ind],
            ['Statement', n_pair, n_trip],
            ['owl:Class', 'n/a', n_class],]
#+end_src

#+RESULTS:
| Type       | Authored | Expanded |
|------------+----------+----------|
| Individual |     1714 |    25940 |
| Statement  |     8274 |   318378 |
| owl:Class  |      n/a |      395 |
* External links
https://scicrunch.org/sawg/about/ApiNATOMY
* Bootstrap :noexport:

#+name: orgstrap
#+begin_src elisp :results none :lexical yes :noweb yes
;;; load remote code

(unless (featurep 'reval)
  (defvar reval-cache-directory (concat user-emacs-directory "reval/cache/"))
  (defun reval-minimal (cypher checksum path-or-url &rest alternates)
    "Simplified and compact implementation of reval."
    (let* (done (o url-handler-mode) (csn (symbol-name checksum))
           (cache-path (concat reval-cache-directory (substring csn 0 2) "/" csn
                               "-" (file-name-nondirectory path-or-url))))
      (url-handler-mode)
      (unwind-protect
          (cl-loop for path-or-url in (cons cache-path (cons path-or-url alternates))
                   do (when (file-exists-p path-or-url)
                        (let* ((buffer (find-file-noselect path-or-url))
                               (buffer-checksum (intern (secure-hash cypher buffer))))
                          (if (eq buffer-checksum checksum)
                              (progn
                                (unless (string= path-or-url cache-path)
                                  (let ((parent-path (file-name-directory cache-path))
                                        make-backup-files)
                                    (unless (file-directory-p parent-path)
                                      (make-directory parent-path t))
                                    (with-current-buffer buffer
                                      (write-file cache-path))))
                                (eval-buffer buffer)
                                (setq done t))
                            (kill-buffer buffer) ; kill so cannot accidentally evaled
                            (error "reval: checksum mismatch! %s" path-or-url))))
                   until done)
        (unless o
          (url-handler-mode 0)))))
  (defalias 'reval #'reval-minimal)
  (reval 'sha256 '3620321396c967395913ff19ce507555acb92335b0545e4bd05ec0e673a0b33b 
         "https://raw.githubusercontent.com/tgbugs/orgstrap/300b1d5518af53d76d950097bcbcd7046cfa2285/reval.el"))

(let ((ghost "https://raw.githubusercontent.com/tgbugs/orgstrap/"))
  (unless (featurep 'ow)
    (reval 'sha256 '670c68e5649987fb64a93a7b5610ace0f18a0b71f376faf7499de933247931f2
           (concat ghost "021b66c8f1dd4bf55714a4de889f31741f8460f6" "/ow.el"))))

(unless (fboundp 'run-command)
  ;; ow.el doesn't set the alias because it is doubles as a real package
  (defalias 'run-command #'ow-run-command))

;; (ow-enable-use-package)

;; (ow-use-packages docopt)

;; local function definitions

<<flow-to-ttl>>

<<all-ttl-models>>

<<deploy-ttls>>

(defun apinat---pre-tangle () ; (ref:sure)
  ;; FIXME hardcoded paths issues
  (unless (assq '&are-you-sure org-babel-library-of-babel)
    (org-babel-lob-ingest "~/git/pyontutils/nifstd/scigraph/README.org")))

(add-hook 'org-babel-pre-tangle-hook #'apinat---pre-tangle nil t)

(unless (or noninteractive (and (boundp 'ow-nth-time) ow-nth-time))
  (setq-local ow-nth-time t)
  (ow-hide-section-0-blocks))

;; entry point for batch command line

(when noninteractive
  (unless user-init-file
    ;; FIXME I can't decide whether this approach or the
    ;; ~/.config/app-name/init.el approach is better and whether I
    ;; should use "~/.orgstrap/init.el" or something ... but I think
    ;; the point here is that we just want to provide a place for a
    ;; stripped down init file that will load as fast as the user
    ;; wants and can hold pointers to things like oa-secrets
    (let ((orgstrap-init-file (expand-file-name "orgstrap-init.el" user-emacs-directory)))
      (when (file-exists-p orgstrap-init-file)
        (setq user-init-file orgstrap-init-file)
        (load user-init-file))))
  (ow-cli-gen
      ((:install)
       (:tangle)
       (:deploy) ; FIXME decouple from build
       (:after-commit nil)
       (:after-tag nil)
       (:all)
       (:model-id nil) ; the id of the model to build
       (:exclude nil) ; FIXME handle more than one
       ((:repo apinat-model-repo) apinat-model-repo) ; path to the models repo
       ((:converter apinat-converter-command) apinat-converter-command) ; command or full path to converter
       ((:secrets oa-secrets) oa-secrets) ; path to secrets.sxpr file
       ((:debug) apinat-converter-debug)) ; enable debug mode
    (let ((apinat-exclude-models (or (and exclude (cons exclude apinat-exclude-models)) apinat-exclude-models)))
      (cond
       (tangle
        (let (enable-local-eval)
          ;; this pattern is required when tangling to avoid infinite loops
          (revert-buffer nil t nil)
          (setq-local find-file-literally nil))
        (org-babel-tangle))
       (deploy ; XXX TODO multiple models logic from diffs etc. XXX better yet, diff remote
        (message "argv: %S ac: %S" argv after-commit)
        (cond
         (all (apinat-deploy-models (all-models)))
         (model-id (apinat-deploy-models (list (intern model-id))))))
       ((or all model-id)
        (message "updating %s" (or model-id "all"))
        (if all
            (progn
              (update-all-models-async)
              (let ((fails (cl-remove-if (lambda (l) (= (car l) 0)) apinat-process-results)))
                (when fails
                  (message "some failures %S" fails)
                  (kill-emacs 1))))
          (update-models (list (intern model-id)))))
       ))))
#+end_src

#+begin_src elisp
' ; slow but works
(apinat-deploy-models (apinat--all-except '(keast-bladder bolser-lewis fcomp too-map)))
;; (apinat-deploy-models '(keast-bladder too-map))

(let (password-cache) ; password-data
  ;; (apinat-deploy-models '(too-map))
  (apinat-deploy-models (apinat--all-except '(too-map))))

;; tramp-cleanup-all-buffers ; FIXME nginx sudo password issues
;; (tramp-list-remote-buffers)
#+end_src

[[(sure)]] Ensure that the
[[file:../../pyontutils/nifstd/scigraph/README.org::&are-you-sure][&are-you-sure]]
block can be nowebbed for tangling.

** Local Variables :ARCHIVE:
# close powershell comment #>
# Local Variables:
# eval: (progn (setq-local orgstrap-min-org-version "8.2.10") (let ((a (org-version)) (n orgstrap-min-org-version)) (or (fboundp #'orgstrap--confirm-eval) (not n) (string< n a) (string= n a) (error "Your Org is too old! %s < %s" a n))) (defun orgstrap-norm-func--dprp-1-0 (body) (let ((p (read (concat "(progn\n" body "\n)"))) (m '(defun defun-local defmacro defvar defvar-local defconst defcustom)) print-quoted print-length print-level) (cl-labels ((f (b) (cl-loop for e in b when (listp e) do (or (and (memq (car e) m) (let ((n (nthcdr 4 e))) (and (stringp (nth 3 e)) (or (cl-subseq m 3) n) (f n) (or (setcdr (cddr e) n) t)))) (f e))) p)) (prin1-to-string (f p))))) (unless (boundp 'orgstrap-norm-func) (defvar-local orgstrap-norm-func orgstrap-norm-func-name)) (defun orgstrap-norm-embd (body) (funcall orgstrap-norm-func body)) (unless (fboundp #'orgstrap-norm) (defalias 'orgstrap-norm #'orgstrap-norm-embd)) (defun orgstrap-org-src-coderef-regexp (_fmt &optional label) (let ((fmt org-coderef-label-format)) (format "\\([:blank:]*\\(%s\\)[:blank:]*\\)$" (replace-regexp-in-string "%s" (if label (regexp-quote label) "\\([-a-zA-Z0-9_][-a-zA-Z0-9_ ]*\\)") (regexp-quote fmt) nil t)))) (unless (fboundp #'org-src-coderef-regexp) (defalias 'org-src-coderef-regexp #'orgstrap-org-src-coderef-regexp)) (defun orgstrap--expand-body (info) (let ((coderef (nth 6 info)) (expand (if (org-babel-noweb-p (nth 2 info) :eval) (org-babel-expand-noweb-references info) (nth 1 info)))) (if (not coderef) expand (replace-regexp-in-string (org-src-coderef-regexp coderef) "" expand nil nil 1)))) (defun orgstrap--confirm-eval-portable (lang _body) (not (and (member lang '("elisp" "emacs-lisp")) (let* ((body (orgstrap--expand-body (org-babel-get-src-block-info))) (body-normalized (orgstrap-norm body)) (content-checksum (intern (secure-hash orgstrap-cypher body-normalized)))) (eq orgstrap-block-checksum content-checksum))))) (unless (fboundp #'orgstrap--confirm-eval) (defalias 'orgstrap--confirm-eval #'orgstrap--confirm-eval-portable)) (let (enable-local-eval) (vc-find-file-hook)) (let ((ocbe org-confirm-babel-evaluate) (obs (org-babel-find-named-block "orgstrap"))) (if obs (unwind-protect (save-excursion (setq-local orgstrap-norm-func orgstrap-norm-func-name) (setq-local org-confirm-babel-evaluate #'orgstrap--confirm-eval) (goto-char obs) (org-babel-execute-src-block)) (when (eq org-confirm-babel-evaluate #'orgstrap--confirm-eval) (setq-local org-confirm-babel-evaluate ocbe)) (ignore-errors (org-set-visibility-according-to-property))) (warn "No orgstrap block."))))
# End: