#!/usr/bin/env bash ## mrbones --- a bare-bones static site generator ## mrbones by Dimitri Kokkonis is licensed under a Creative Commons Attribution-ShareAlike 4.0 ## International License. See the accompanying LICENSE file for more info. VERSION="0.3.1-dev" # Do not edit this field. It is automatically populated during installation. BUILD="unknown" DEPENDENCIES=(realpath find sort) set -e TEMPLATES_DIR_NAME="_templates" SITE_DIR_NAME="_site" VERBOSE=0 # The following values are used for color: # - 0: never # - 1: auto (sensible defaults) # - 2: always USE_COLOR=1 WORKING_DIR=$PWD TEMPLATES_DIR="$WORKING_DIR/$TEMPLATES_DIR_NAME" SITE_DIR="$WORKING_DIR/$SITE_DIR_NAME" ESCAPED_AT="{@@MRBONES@@}" USE_TARGET_REGEX="[^[:space:]][^"$'\t'$'\r'$'\n'$'\f'$'\v'"]+" INCLUDE_TARGET_REGEX="$USE_TARGET_REGEX" PERMALINK_TARGET_REGEX="$USE_TARGET_REGEX" CONTENT_SECTION_NAME_REGEX="[a-zA-Z0-9_-]+" PERMALINK_REGEX="@permalink[[:blank:]]+($PERMALINK_TARGET_REGEX)[[:blank:]]*("$'\n'"|$)" USE_REGEX="@use" USE_REGEX_FULL="${USE_REGEX}[[:blank:]]+($USE_TARGET_REGEX)" INCLUDE_REGEX="@include" INCLUDE_REGEX_FULL="${INCLUDE_REGEX}[[:blank:]]+($INCLUDE_TARGET_REGEX)" BEGIN_REGEX_FULL="@begin[[:blank:]]+($CONTENT_SECTION_NAME_REGEX)" CONTENT_REGEX="@content\.($CONTENT_SECTION_NAME_REGEX)([^a-zA-Z0-9_-]?)" # Check globally if we're in a tty. This should be done here because inside a function the context # at the callsite may change the result. IS_TTY=0 if [ -t 1 ] then IS_TTY=1 fi # Decide if we should use colors in the output. # # This follows the standard described in http://bixense.com/clicolors/. should_use_color() { case $USE_COLOR in 0) return 1 # False ;; 1) if [[ $NO_COLOR ]] then return 1 # False elif [[ ($CLICOLOR_FORCE) || ($IS_TTY == 1) ]] then return 0 # True else return 1 # False fi ;; 2) return 0 # True ;; *) echo "INTERNAL: invalid value '$USE_COLOR' for \$USE_COLOR." 1>&2 exit 1 ;; esac } # Print an error message to `stderr`. error_message() { if should_use_color then echo -e "\e[1m[mrbones]\e[0m \e[31mERROR: $*\e[0m" 1>&2 else echo "[mrbones] ERROR: $*" 1>&2 fi } # Print an error message to `stderr` and exit with code 1. error() { error_message "$@" rm -rf "${SITE_DIR:?}/" exit 1 } # Print an info message to `stderr`. info_message() { if should_use_color then echo -e "\e[1m[mrbones]\e[0m \e[32m$*\e[0m" 1>&2 else echo "[mrbones] $*" 1>&2 fi } # Print a verbose message to `stderr`. verbose_message() { if [[ $VERBOSE == 0 ]] then return fi if should_use_color then echo -e "\e[1m[mrbones]\e[0m \e[38;5;244m$*\e[0m" 1>&2 else echo "[mrbones] $*" 1>&2 fi } # Parse arguments to `mrbones`. parse_arguments() { local color_args while [[ $# -gt 0 ]] do case $1 in --color=*) # In case the user passed `--color=WHEN`, split on the `=`. IFS='=' read -ra color_args <<< "$1" # Skip the `--color=WHEN` argument now that we've parsed it. shift; # Add the parsed (separated) `--color WHEN` at the front of the arguments and fall # through to the "normal" `--color WHEN` case. set -- "${color_args[@]}" "$@" ;& "--color") case "$2" in "always") USE_COLOR=2 ;; "auto") USE_COLOR=1 ;; "never") USE_COLOR=0 ;; *) error "unrecognized value '$2' for option '--color'." \ "Try \`mrbones --help\` for more information." esac # Skip over `--color` and its argument. shift shift ;; "-h" | "--help") echo -e "mrbones - a barebones static site generator\n" \ "\nusage: mrbones [option(s)]" \ "\n --color= specify when color should be used:" \ "\n - never: color is never used" \ "\n - auto: sensible defaults apply" \ "\n - always: color is always used" \ "\n -h, --help print this help message" \ "\n -v, --verbose print more verbose messages" \ "\n -V, --version print this program's version number" exit 0 ;; "-v" | "--verbose") VERBOSE=1 # Skip over `-v`/`--verbose`. shift ;; "-V" | "--version") echo "mrbones $VERSION (build $BUILD)" exit 0 ;; --* | -*) error "unrecognized option '$1'." \ "Try \`mrbones --help\` for more information." ;; *) WORKING_DIR="$(realpath "$1")" TEMPLATES_DIR="$WORKING_DIR/$TEMPLATES_DIR_NAME" SITE_DIR="$WORKING_DIR/$SITE_DIR_NAME" # Skip over the argument. shift ;; esac done } # Check that the necessary dependencies exist. check_dependencies() { for dependency in "${DEPENDENCIES[@]}" do if [[ ! $(command -v "$dependency") ]] then error "missing dependency: $dependency." fi done } main() { local site_content rel_file_path parent_dir_path src_pages cache src_page_path \ rel_src_page_path dest_page_path page_content raw_permalink_target permalink_match \ use_chain use_target use_template_content content_sections begin_match section_name \ end_regex_full end_match section_regex section_match section_content content_match \ content_key post_content content replacement include_chain include_match include_target \ include_template_content rel_dest_page_path dest_page_dir page_filename page_name \ page_name_dir check_dependencies parse_arguments "$@" info_message "Setting up output directory '$SITE_DIR'..." verbose_message " Removing '$SITE_DIR/'..." rm -rf "$SITE_DIR" verbose_message " Creating '$SITE_DIR/'..." mkdir -p "$SITE_DIR" info_message "Copying site content..." # Make sure to *not* copy templates and the output site directory. # We are also *not* copying HTML files, since we have already treated them before and put them # where they belong (according to the permalinks). mapfile -t site_content < <( \ find "$WORKING_DIR" -type f \ -not -name "*.html" \ -not -name "*.htm" \ -not -path "$SITE_DIR/*" \ -not -path "$TEMPLATES_DIR/*" \ ) for file_path in "${site_content[@]}" do rel_file_path="${file_path##"$WORKING_DIR"}" parent_dir_path="$SITE_DIR/${rel_file_path%/*}" mkdir -p "$parent_dir_path" cp "$file_path" "$parent_dir_path" done info_message "Generating pages..." mapfile -t src_pages < <( \ find "$WORKING_DIR" -type f \ "(" -name "*.html" -or -name "*.htm" ")" \ -not -path "$SITE_DIR/*" \ -not -path "$TEMPLATES_DIR/*" \ | sort \ ) declare -A cache=() for src_page_path in "${src_pages[@]}" do rel_src_page_path="${src_page_path##"$WORKING_DIR/"}" dest_page_path="$SITE_DIR/$rel_src_page_path" verbose_message " Generating page '$rel_src_page_path'..." page_content="$(<"$src_page_path")" # We escape any literal `\@`s to avoid mistaking them for directives. # We will put them back at the end. page_content="${page_content//"\\@"/"$ESCAPED_AT"}" raw_permalink_target="" if [[ $page_content =~ $PERMALINK_REGEX ]] then raw_permalink_target="${BASH_REMATCH[1]}" # Remove any `@permalink `s. while [[ $page_content =~ $PERMALINK_REGEX ]] do permalink_match="${BASH_REMATCH[0]}" page_content="${page_content//"$permalink_match"}" done fi if [[ -n $raw_permalink_target ]] then verbose_message " Resolving destination (permalink)..." if [[ ${raw_permalink_target:0:1} != "/" ]] then error "$src_page_path: \`@permalink $raw_permalink_target\`: permalinks must be" \ "absolute paths (starting with '/')." fi permalink_target="$raw_permalink_target" # Make sure to normalize the permalink, ensuring that it ends in ".html"/".htm". if [[ ($permalink_target != *.htm) && ($permalink_target != *.html) ]] then permalink_target="$permalink_target.html" fi # ".." is not allowed in the permalink. if [[ $permalink_target == *..* ]] then error "$src_page_path: \`@permalink $raw_permalink_target\`: '..' is not allowed" \ "in permalinks." fi # Remove the leading '/' of the permalink. dest_page_path="$SITE_DIR/${permalink_target#/*}" fi # Handle `@use`s. # Keep track of which `@use` targets have been already handled to identify recursion. declare -A use_chain=() while [[ $page_content =~ $USE_REGEX_FULL ]] do use_target="${BASH_REMATCH[1]}" verbose_message " Handling \`@use $use_target\`..." # Check that the use target exists. if [[ ! -f "$TEMPLATES_DIR/$use_target" ]] then error "$src_page_path: \`@use\` target '$use_target' not found in $TEMPLATES_DIR." fi # Check if we've already seen the target. if [[ -v use_chain["$use_target"] ]] then error "$src_page_path: \`@use $use_target\`: recursive \`@use\`." else use_chain["$use_target"]=1 fi # Look at the cache for the `@use` target. use_template_content="${cache[$use_target]}" if [[ -z $use_template_content ]] then use_template_content="$(<"$TEMPLATES_DIR/$use_target")" # We escape any literal `\@`s to avoid mistaking them for directives. # We will put them back at the end. use_template_content="${use_template_content//"\\@"/"$ESCAPED_AT"}" cache["$use_target"]="$use_template_content" fi # Parse the `@use` content from the current file. declare -A content_sections=() while [[ $page_content =~ $BEGIN_REGEX_FULL ]] do begin_match="${BASH_REMATCH[0]}" section_name="${BASH_REMATCH[1]}" end_regex_full="@end $section_name" if [[ $page_content =~ $end_regex_full ]] then end_match="${BASH_REMATCH[0]}" section_regex="$begin_match"$'\n'"(|.+)"$'\n'"$end_match" if [[ $page_content =~ $section_regex ]] then section_match="${BASH_REMATCH[0]}" section_content="${BASH_REMATCH[1]}" content_sections["$section_name"]="$section_content" # Eat the section. page_content="${page_content//"$section_match"}" else error "INTERNAL: should be able to find section: $section_regex" fi else error "$src_page_path: can't find matching \`@end $section_name\`." fi done # Apply the discovered content sections. while [[ $use_template_content =~ $CONTENT_REGEX ]] do content_match="${BASH_REMATCH[0]}" content_key="${BASH_REMATCH[1]}" post_content="${BASH_REMATCH[2]}" content="${content_sections[$content_key]}" # If the key exists in the page, then replace it with the actual contents. # Otherwise, just delete the `@content.$key` part. if [[ -n $content ]] then replacement="$content$post_content" else replacement="$post_content" fi use_template_content="${use_template_content//"$content_match"/"$replacement"}" done page_content="$use_template_content" done # We should be done with `@use`s by now. If there are any left over, it means that their # targets are empty. if [[ $page_content =~ $USE_REGEX ]] then error "$src_page_path: empty \`@use\`." fi # Handle `@include`s. # Keep track of which `@include` targets have been already handled to identify recursion. declare -A include_chain=() while [[ $page_content =~ $INCLUDE_REGEX_FULL ]] do include_match="${BASH_REMATCH[0]}" include_target="${BASH_REMATCH[1]}" verbose_message " Handling \`@include $include_target\`..." # Check that the include target exists. if [[ ! -f "$TEMPLATES_DIR/$include_target" ]] then error "$src_page_path: \`@include\` target '$include_target' not found in" \ "$TEMPLATES_DIR." fi # Check if we've already seen the target. if [[ -v include_chain["$include_target"] ]] then error "$src_page_path: \`@include $include_target\`: recursive \`@include\`." else include_chain["$include_target"]=1 fi # Look at the cache for the `@include` target. include_template_content="${cache[$include_target]}" if [[ -z $include_template_content ]] then include_template_content="$(cat "$TEMPLATES_DIR/$include_target")" # We escape any literal `\@`s to avoid mistaking them for directives. # We will put them back at the end. include_template_content="${include_template_content//"\\@"/"$ESCAPED_AT"}" cache["$include_target"]="$include_template_content" fi page_content="${page_content//"$include_match"/"$include_template_content"}" done # We should be done with `@include`s by now. If there are any left over, it means that their # targets are empty. if [[ $page_content =~ $INCLUDE_REGEX ]] then error "$src_page_path: empty \`@include\`." fi rel_dest_page_path="${dest_page_path##"$SITE_DIR/"}" dest_page_dir="${dest_page_path%/*}" verbose_message " Putting page in '$SITE_DIR_NAME/$rel_dest_page_path'..." mkdir -p "$dest_page_dir" # Since we potentially escaped some literal `\@`s to avoid mistaking them for directives, # it is now time to put them back. page_content="${page_content//"$ESCAPED_AT"/"\\@"}" echo "$page_content" > "$dest_page_path" # In order to support links that do not end in ".html"/".htm", we should generate dummy # `index.html` pages that enable directories to work as pages. # # For example, if there is a page `/stuff/things.html`, then `/stuff/things` will lead to a # 404 by default. By creating a copy of the same page in `/stuff/things/index.html`, we # enable that last URL to work as intended. # # This procedure shall be done on all pages except the ones titled 'index.html', since those # have a special meaning anyway (and creating an `index/` subdirectory would be redundant). page_filename="${dest_page_path##*/}" if [[ $page_filename != "index.html" && $page_filename != "index.htm" ]] then page_filename="${dest_page_path##*/}" page_name="${page_filename%%.*}" page_name_dir="$dest_page_dir/$page_name" rel_dest_page_path="${page_name_dir##"$SITE_DIR/"}/index.html" verbose_message " Creating page copy in '$SITE_DIR_NAME/$rel_dest_page_path'..." mkdir -p "$page_name_dir" echo "$page_content" > "$page_name_dir/index.html" fi done info_message "Done! Site is ready at '$SITE_DIR'." } # If the script is `source`d (as is the case during unit tests), do not run `main()`. if ! (return 0 2>/dev/null) then main "$@" fi