#!/usr/bin/env bash # --- STANDARD SCRIPT-GLOBAL CONSTANTS kTHIS_NAME=${BASH_SOURCE##*/} kTHIS_HOMEPAGE='https://github.com/mklement0/trl' kTHIS_VERSION='v0.4.0' # NOTE: This assignment is automatically updated by `make version VER=` - DO keep the 'v' prefix. unset CDPATH # To prevent unexpected `cd` behavior. # --- Begin: STANDARD HELPER FUNCTIONS die() { echo "$kTHIS_NAME: ERROR: ${1:-"ABORTING due to unexpected error."}" 1>&2; exit ${2:-1}; } dieSyntax() { echo "$kTHIS_NAME: ARGUMENT ERROR: ${1:-"Invalid argument(s) specified."} Use -h for help." 1>&2; exit 2; } # SYNOPSIS # openUrl # DESCRIPTION # Opens the specified URL in the system's default browser. openUrl() { local url=$1 platform=$(uname) cmd=() case $platform in 'Darwin') # OSX cmd=( open "$url" ) ;; 'CYGWIN_'*) # Cygwin on Windows; must call cmd.exe with its `start` builtin cmd=( cmd.exe /c start '' "$url " ) # !! Note the required trailing space. ;; 'MINGW32_'*) # MSYS or Git Bash on Windows; they come with a Unix `start` binary cmd=( start '' "$url" ) ;; *) # Otherwise, assume a Freedesktop-compliant OS, which includes many Linux distros, PC-BSD, OpenSolaris, ... cmd=( xdg-open "$url" ) ;; esac "${cmd[@]}" || { echo "Cannot locate or failed to open default browser; please go to '$url' manually." >&2; return 1; } } # Prints the embedded Markdown-formatted man-page source to stdout. printManPageSource() { sed -n -e $'/^: <<\'EOF_MAN_PAGE\'/,/^EOF_MAN_PAGE/ { s///; t\np;}' "$BASH_SOURCE" } # Opens the man page, if installed; otherwise, tries to display the embedded Markdown-formatted man-page source; if all else fails: tries to display the man page online. openManPage() { local pager embeddedText if ! man 1 "$kTHIS_NAME" 2>/dev/null; then # 2nd attempt: if present, display the embedded Markdown-formatted man-page source embeddedText=$(printManPageSource) if [[ -n $embeddedText ]]; then pager='more' command -v less &>/dev/null && pager='less' # see if the non-standard `less` is available, because it's preferable to the POSIX utility `more` printf '%s\n' "$embeddedText" | "$pager" else # 3rd attempt: open the the man page on the utility's website openUrl "${kTHIS_HOMEPAGE}/doc/${kTHIS_NAME}.md" fi fi } # Prints the contents of the synopsis chapter of the embedded Markdown-formatted man-page source for quick reference. printUsage() { local embeddedText # Extract usage information from the SYNOPSIS chapter of the embedded Markdown-formatted man-page source. embeddedText=$(sed -n -e $'/^: <<\'EOF_MAN_PAGE\'/,/^EOF_MAN_PAGE/!d; /^## SYNOPSIS$/,/^#/{ s///; t\np; }' "$BASH_SOURCE") if [[ -n $embeddedText ]]; then # Print extracted synopsis chapter - remove backticks for uncluttered display. printf '%s\n\n' "$embeddedText" | tr -d '`' else # No SYNOPIS chapter found; fall back to displaying the man page. echo "WARNING: usage information not found; opening man page instead." >&2 openManPage fi } # --- End: STANDARD HELPER FUNCTIONS # --- PROCESS STANDARD, OUTPUT-INFO-THEN-EXIT OPTIONS. case $1 in --version) # Output version number and exit, if requested. echo "$kTHIS_NAME $kTHIS_VERSION"$'\nFor license information and more, visit '"$kTHIS_HOMEPAGE"; exit 0 ;; -h|--help) # Print usage information and exit. printUsage; exit ;; --man) # Display the manual page and exit, falling back to printing the embedded man-page source. openManPage; exit ;; --man-source) # private option, used by `make update-man` # Print raw, embedded Markdown-formatted man-page source and exit printManPageSource; exit ;; --home) # Open the home page and exit. openUrl "$kTHIS_HOMEPAGE"; exit ;; esac # --- MAIN BODY # --- BEGIN: functions # Helper function for exiting with error message due to runtime error. # die [errMsg [exitCode]] # Default error message states context and indicates that execution is aborted. Default exit code is 1. # Prefix for context is always prepended. # Note: An error message is *always* printed; if you just want to exit with a specific code silently, use `exit n` directly. die() { echo "$kTHIS_NAME: ERROR: ${1:-"ABORTING due to unexpected error."}" 1>&2 exit ${2:-1} # Note: If the argument is non-numeric, the shell prints a warning and uses exit code 255. } # Helper function for exiting with error message due to invalid arguments. # dieSyntax [errMsg] # Default error message is provided, as is prefix and suffix; exit code is always 2. dieSyntax() { echo "$kTHIS_NAME: ARGUMENT ERROR: ${1:-"Invalid argument(s) specified."} Use -h for help." 1>&2 exit 2 } # Core helper function that transforms the stdin input passed to it, based on the environment variables set by the script after options parsing. # Note that the output by design has NO trailing \n, so we can append output directly to it (closing wrapper string) without a forced line break. transform() { # Note: Thanks to -n, the script passed to -e is executed for *every line* of input. perl -MText::ParseWords -ne ' chomp; # Remove trailing \n. # Apply delimiters sub enclose { my $odelimOpen = $ENV{odelimOpen}; my $odelimClose = $ENV{odelimClose}; # Escape embedded instances of the delimiters if ($odelimOpen or $odelimClose) { s/[\Q$odelimOpen$odelimClose\E]/\\$&/g } return $odelimOpen . $_ . $odelimClose; } # If requested (for multi-line input), \-escape double and single quotes on lines that are not quoted as a whole. if ($ENV{autoEscapeUnquotedLines}) { if ($_ !~ /^".*"$/ and $_ !~ /^\047.*\047$/) { s/["\047]/\\$&/g; } } # Parse into words, ignoring empty words. my @flds = grep { $_ =~ /\S/ } Text::ParseWords::parse_line($ENV{sep}, $ENV{keepQuotes}, $_); # Output the result with the requested reformatting. if ($#flds >= 0) { print join($ENV{osep}, map { enclose $_ } @flds); if ($ENV{orsep} ne "") { print $ENV{orsep} unless eof; } }' } # Trims (strips off) *trailing* whitespace from a *single line* (any mix of spaces and tabs); pass text as a single (and only) argument. # E.g.: trimmed=$(rtrimSingleLine ' a b ') # -> $trimmed == ' a b' rtrimSingleLine() { [[ $1 =~ ^(.*[^[:blank:]])([[:blank:]]+)$ ]] && printf '%s' "${BASH_REMATCH[1]}" || printf '%s' "$1" } # Trims (strips off) *leading* whitespace from a *single line* (any mix of spaces and tabs); pass text as a single (and only) argument. # E.g.: trimmed=$(ltrimSingleLine $' \t \t a b ') # -> $trimmed == 'a b ' ltrimSingleLine() { [[ $1 =~ ^[[:blank:]]+([^[:blank:]].*)$ ]] && printf '%s' "${BASH_REMATCH[1]}" || printf '%s' "$1" } # --- END: functions # Make sure that Perl is available. [[ -n $(command -v perl) ]] || die "This utility requires Perl. Executable \`perl\` cannot be located." # NOTE: We explicitly UNSET the variables corresponding to options, # so that we can distinguish between never having set a value and # deliberately having assigned the empty string. unset odelim sep osep rsep orsep keepQuotes outWrapper noAutoEscapeOfUnquotedLines inputIsSingleLine # ----- BEGIN: OPTIONS PARSING: This is MOSTLY generic code, but: # - SET allowOptsAfterOperands AFTER THIS COMMENT TO 1 to ALLOW OPTIONS TO BE MIXED WITH OPERANDS rather than requiring all options to come before the 1st operand, as POSIX mandates. # - The SPECIFIC OPTIONS MUST BE HANDLED IN A CASE ... ESAC STATEMENT BELOW; look for "BEGIN: CUSTOMIZE HERE ... END: CUSTOMIZE HERE" # - Assumes presence of function dieSyntax(); if not present, define as: dieSyntax() { echo "$(basename -- "$BASH_SOURCE"): ARGUMENT ERROR: ${1:-"Invalid argument(s) specified."} Use -h for help." >&2; exit 2; } # - After the end of options parsing, $@ only contains the operands (non-option arguments), if any. allowOptsAfterOperands=1 operands=() i=0 optName= isLong=0 prefix= optArg= haveOptArgAttached=0 haveOptArgAsNextArg=0 acceptOptArg=0 needOptArg=0 optsSpecified=0 while (( $# )); do if [[ $1 =~ ^(-)[a-zA-Z0-9]+.*$ || $1 =~ ^(--)[a-zA-Z0-9]+.*$ ]]; then # an option: either a short option / multiple short options in compressed form or a long option prefix=${BASH_REMATCH[1]}; [[ $prefix == '--' ]] && isLong=1 || isLong=0 for (( i = 1; i < (isLong ? 2 : ${#1}); i++ )); do acceptOptArg=0 needOptArg=0 haveOptArgAttached=0 haveOptArgAsNextArg=0 optArgAttached= optArgOpt= optArgReq= if (( isLong )); then # long option: parse into name and, if present, argument optName=${1:2} [[ $optName =~ ^([^=]+)=(.*)$ ]] && { optName=${BASH_REMATCH[1]}; optArgAttached=${BASH_REMATCH[2]}; haveOptArgAttached=1; } else # short option: *if* it takes an argument, the rest of the string, if any, is by definition the argument. optName=${1:i:1}; optArgAttached=${1:i+1}; (( ${#optArgAttached} >= 1 )) && haveOptArgAttached=1 fi (( haveOptArgAttached )) && optArgOpt=$optArgAttached optArgReq=$optArgAttached || { (( $# > 1 )) && { optArgReq=$2; haveOptArgAsNextArg=1; }; } optsSpecified=1 # ---- BEGIN: CUSTOMIZE HERE case $optName in # ---- ltr options k|keep) # keep input quoting keepQuotes=1 ;; s|separator|separator=*) # input item separator needOptArg=1 sep=$optArgReq ;; S|out-separator|out-separator=*) # output item separator needOptArg=1 osep=$optArgReq ;; R|out-rec-separator|out-rec-separator=*) # output record separator needOptArg=1 orsep=$optArgReq ;; D|out-delim|out-delim=*) # output delims - note that the only *input* delims supported are single and double quotes needOptArg=1 odelim=$optArgReq ;; W|out-wrapper|out-wrapper=*) needOptArg=1 outWrapper=$optArgReq ;; x|as-is) # keep input quoting noAutoEscapeOfUnquotedLines=1 ;; *) dieSyntax "Unknown option: ${prefix}${optName}." ;; esac # ---- END: CUSTOMIZE HERE (( needOptArg )) && { (( ! haveOptArgAttached && ! haveOptArgAsNextArg )) && dieSyntax "Option ${prefix}${optName} is missing its argument." || (( haveOptArgAsNextArg )) && shift; } (( acceptOptArg || needOptArg )) && break done else # an operand if [[ $1 == '--' ]]; then shift; operands+=( "$@" ); break elif (( allowOptsAfterOperands )); then operands+=( "$1" ) # continue else operands=( "$@" ) break fi fi shift done (( "${#operands[@]}" > 0 )) && set -- "${operands[@]}"; unset allowOptsAfterOperands operands i optName isLong prefix optArgAttached haveOptArgAttached haveOptArgAsNextArg acceptOptArg needOptArg # ----- END: OPTIONS PARSING: "$@" now contains all operands (non-option arguments). # Gather the input: remaining operand(s) OR stdin: # We also determine if the input is single- or multi-line, as that fundamentally governs the default behavior. inputIsSingleLine=1 if (( $# > 0 )); then if (( $# == 1 )); then txt=$1 # Determine if the input - the one and only operand - is single-line or not. [[ $txt =~ $'\n' ]] && inputIsSingleLine=0 else # if multiple operands were specified, we always consider the input single-line, but convert them to 1-item-per-line presentation so as to preserve item boundaries txt=$(printf '%s\n' "$@") fi else # read from stdin txt=$( 0 )) || exit 0 # echo "?? [$inputIsSingleLine]"; exit 5 # !! If we blindly escape ' and " instances in the input, we lose the ability to treat them as delimiters, so we don't do it. # !! Unfortunately, conversely, unbalanced instances cause this script to malfunction (empy or incomplete output, with no error message). # txt=${txt//\'/\\\'} # txt=${txt//\"/\\\"} # Check for incompatible options: (( keepQuotes )) && [[ ! (-z $odelim && -n ${odelim-unset}) ]] && dieSyntax "Incompatible options specified." # Determine the default option values, based on whether the input is single- or multi-line. if (( inputIsSingleLine )); then # single-line input # recognize whitespace, a comma, or a comma followed by whitespace as the input-field separator # strip quotes, put each field on its own line (( $# > 1 )) && sep=${sep-$'\n'} || sep=${sep-',?\s+|,'} osep=${osep-$'\n'} orsep=${orsep-$osep} keepQuotes=${keepQuotes-0} else # multi-line input # recognize newlines as input-field separators # double-quote fields on output (irrespective of whether they were quoted or not), and separate them with ', ' sep=${sep-$'\n'} osep=${osep-$', '} orsep=${orsep-$osep} keepQuotes=${keepQuotes-0} (( ! keepQuotes )) && odelim=${odelim-\"} fi # If a delimiter was specified, split into opening and closing delimiter, if necessary. odelimOpen= odelimClose= if [[ -n $odelim ]]; then if (( ${#odelim} == 1 )); then # just 1 char? use as both openening and closing delim. odelimOpen=$odelim odelimClose=$odelimOpen else # split in half odelimOpen=${odelim:0:${#odelim}/2} odelimClose=${odelim:${#odelimOpen}} fi fi # If wrapper string was specified, split into prologue and epilogue. outWrapperOpen= outWrapperClose= if [[ -n $outWrapper ]]; then if (( ${#outWrapper} == 1 )); then # just 1 char? use as both openening and closing delim. outWrapperOpen=$outWrapper outWrapperClose=$outWrapperOpen else # split in half outWrapperOpen=${outWrapper:0:${#outWrapper}/2} outWrapperClose=${outWrapper:${#outWrapperOpen}} fi # Adjust the wrapper strings, if the output is multi-line. if [[ $osep =~ $'\n' || $orsep =~ $'\n' ]]; then [[ $outWrapperOpen =~ $'\n' ]] || outWrapperOpen="$(rtrimSingleLine "$outWrapperOpen")"$'\n' # The closing string doesn't need a leading \n, because transform() output always ends in \n. [[ $outWrapperClose =~ ^$'\n' ]] || outWrapperClose=$'\n'"$(ltrimSingleLine "$outWrapperClose")" fi fi # Print opening wrapper string, if any. [[ -n $outWrapperOpen ]] && printf %s "$outWrapperOpen" # pv odelimOpen odelimClose sep osep orsep keepQuotes txt # Export the behavior-controlling variables for use by the Perl command inside transform() # and perform the transformation. export odelimOpen odelimClose sep osep rsep orsep keepQuotes autoEscapeUnquotedLines transform < <(printf %s "$txt") || die # Print closing wrapper string, if any. # Either way, print a terminating \n, because transform() - by design - returns its output without one. [[ -n $outWrapperClose ]] && printf '%s\n' "$outWrapperClose" || printf '\n' exit 0 #### # MAN PAGE MARKDOWN SOURCE # - Place a Markdown-formatted version of the man page for this script # inside the here-document below. # The document must be formatted to look good in all 3 viewing scenarios: # - as a man page, after conversion to ROFF with marked-man # - as plain text (raw Markdown source) # - as HTML (rendered Markdown) # Markdown formatting tips: # - GENERAL # To support plain-text rendering in the terminal, limit all lines to 80 chars., # and, for similar rendering as HTML, *end every line with 2 trailing spaces*. # - HEADINGS # - For better plain-text rendering, leave an empty line after a heading # marked-man will remove it from the ROFF version. # - The first heading must be a level-1 heading containing the utility # name and very brief description; append the manual-section number # directly to the CLI name; e.g.: # # foo(1) - does bar # - The 2nd, level-2 heading must be '## SYNOPSIS' and the chapter's body # must render reasonably as plain text, because it is printed to stdout # when `-h`, `--help` is specified: # Use 4-space indentation without markup for both the syntax line and the # block of brief option descriptions; represent option-arguments and operands # in angle brackets; e.g., '' # - All other headings should be level-2 headings in ALL-CAPS. # - TEXT # - Use NO indentation for regular chapter text; if you do, it will # be indented further than list items. # - Use 4-space indentation, as usual, for code blocks. # - Markup character-styling markup translates to ROFF rendering as follows: # `...` and **...** render as bolded (red) text # _..._ and *...* render as word-individually underlined text # - LISTS # - Indent list items by 2 spaces for better plain-text viewing, but note # that the ROFF generated by marked-man still renders them unindented. # - End every list item (bullet point) itself with 2 trailing spaces too so # that it renders on its own line. # - Avoid associating more than 1 paragraph with a list item, if possible, # because it requires the following trick, which hampers plain-text readability: # Use ' ' in lieu of an empty line. #### : <<'EOF_MAN_PAGE' # trl(1) - transform lists of strings ## SYNOPSIS Transforms lists of unquoted and/or quoted strings. trl [] [...] -s input list separator -S output list separator -k keep input item delimiters -D output item delimiter (cannot be combined with -k) -W text to wrap the result list in -R output record separator (multi-line + multi-item-per-line input only) -x do not auto-escape quotes on lines not quoted as a whole By default, * a multi-line list is transformed to a single-line list with double-quoted items separated by a comma followed by a space. * a single-line list is transformed to a multi-line list with unquoted items. Standard options: `--help`, `--man`, `--version`, `--home` ## DESCRIPTION trl *tr*ansforms *l*ists of unquoted and/or quoted strings, by default between single- and multi-line forms. Separators, delimiters, and an optional wrapper string are configurable. Both single- and double-quotes are recognized as input item delimiters, and embedded literal quotes of the same type must be backslash-escaped. **Note**: * In the input, for embedded quotes of the same type to be properly recognized as literals inside quoted tokens, they must be backslash-escaped. * However, with multi-line input, if a given line is not quoted as a whole, backslash-escaping is implicitly applied to any single- or double-quotes on the line, allowing lines with imbalanced quotes, such as: Ten o'clock By contrast, if your input lines each contain multiple, individually quoted tokens, use -x to suppress this behavior; otherwise, such lines will be treated as a single token each, with embedded quotes escaped on output. * CAVEAT: Malformed input can result in LOSS OF TOKENS on output. * Similarly, on output, embedded instances of the output delimiters are backslash-escaped. Input is provided via one or more operands or, in their absence, via stdin. Note that stdin input is read into memory as a whole. To disambiguate operands from options, precede operands with `--` as a separate argument. Sensible defaults are used for options that aren't explicitly specified; their values depend on whether the input is single- or multi-line: * Single-line input: List items are assumed to be separated with whitespace, a comma, or a comma followed by whitespace. They are transformed into multi-line output with each item on its own line, stripped of surrounding quotes. * Multi-line input: List items are assumed to be each on a separate line. They are transformed into a single-line list with items double-quoted and separated by a comma followed by a space. Explicitly specified options override the behavior described. ## OPTIONS Some options described below come in two flavors: * Lowercase options such as `-s` describe the *input*. * [Corresponding] uppercase options such as `-S` control the *output*. With the exception of `-s`, which accepts a Perl regular expression to describe the separators between input items, options with arguments expect literals rather than escape sequences; if needed, use ANSI C-quoted strings in Bash, Ksh, or Zsh to create literal control characters; e.g., `$'\n'` and `$'\t'` create a literal newline and tab, respectively. * `-k`, `--keep` Keeps all items in their input form, whether quoted or unquoted; the default is to remove quoting on input processing; double-quoting is by default applied on *output* when transforming a multi-line list to a single-line list; to unconditionally produce unquoted items, use `-D ''`, to enforce a uniform quoting character, use `-D `. * `-s `, `--separator ` Specifies the input item separator, i.e., what separates items in the input, using a Perl regular expression. Note that *runs* of matches (i.e., one or more contiguous instances) are invariably considered a *single* separator. * `-S `, `--out-separator ` Specifies the output item separator, i.e., how to separate the items on output, irrespective of what separated them in the input. * `-D `, `--out-delim ` Specifies the delimiter to put around the output items; this may be any string, but note that on *input* only single- and double-quotes are recognized; cannot be combined with `-k`. Use `-D ''` to make all output items unquoted. If a single char. is specified, it is used as both the opening and closing delimiter; otherwise, the first *half* of the specified string is used as the opening delimiter, and the second half as the closing one. * `-W `, `--out-wrapper ` Specifies output wrapper text to place around the entire output list, including desired whitespace, if any: if a single char. is specified, that char. is used both before and the after the output list; otherwise, the first *half* of the text is placed before, and the second one after; if the output list is multi-line, a newline is appended / prepended to the opening/closing string. * `-R `, `--out-rec-separator ` Specifies the output record separator, which only applies to multi-line input whose lines contain multiple items each: by default, the output record separator is set to the same value as the input item separator, so that a multi-line list results in a uniformly separated single-line list. Specify `-R $'\n'` to instead retain the line breaks from the input (while transforming the items on each line), or pass an alternative separator to replace them. * `-x`, `--as-is` Disables the auto-escaping behavior for lines of multi-line input that aren't quoted as a whole. Use this if your input lines may contain multiple, (possibly selectively) individually quoted tokens; otherwise, such lines will be treated as a single token each, with embedded quotes escaped on output. ## STANDARD OPTIONS All standard options provide information only. * `-h, --help` Prints the contents of the synopsis chapter to stdout for quick reference. * `--man` Displays this manual page, which is a helpful alternative to using `man`, if the manual page isn't installed. * `--version` Prints version information. * `--home` Opens this utility's home page in the system's default web browser. ## PREREQUISITES Perl 5 must be installed. ## LICENSE For license information, bug reports, and more, visit this utility's home page by running `trl --home` ## EXAMPLES The examples in part use ANSI C-quoted input strings (`$'...'`) for brevity, which are supported in Bash, Ksh, and Zsh. # Default single-line to multi-line transformation. $ trl '"one", "two", "three"' one two three # Default multi-line to single-line transformation. $ trl <<<$'one\ntwo\nthree\n3 " of rain' "one", "two", "three", "3 \" of rain" # Transform the argument list to a C-style array: $ trl -S ', ' -D \" -W '{ }' one two three 'four (4)' { "one", "two", "three", "four (4)" } # Transform a list to multi-line form using a regex to identify items; # US-format telephone number to CSV: $ trl -s '[() -]' -S , '(789) 123-456' 789,123,456 # Transform a multi-line list with multiple items per line. $ trl -s ' ' -R ' <-> ' <<<$'one two\nthree four' "one", "two" <-> "three", "four" EOF_MAN_PAGE