#!/usr/bin/env python
# sedsed - Debugger and code formatter for sed scripts
# Since 27 November 2001, by Aurelio Jargas
# pylint: disable=invalid-name
# pylint: disable=redefined-outer-name
# pylint: disable=too-many-branches
# pylint: disable=too-many-lines
# pylint: disable=too-many-locals
# pylint: disable=too-many-statements
from __future__ import print_function # pylint: disable=unused-variable
import sys
import re
import os
import getopt
import tempfile
# sedparse is a translation to Python of the GNU sed parser C code
# https://github.com/aureliojargas/sedparse
import sedparse
__version__ = "2.0.0"
myname = "sedsed"
myhome = "https://aurelio.net/projects/sedsed/"
# Default config
# fmt: off
sedbin = "sed" # name (or full path) of the sed program
color = 1 # colored output or not? (--color, --nocolor)
dump_debug = 0 # dump debug script to screen? (--dump-debug)
indent_prefix = " "*4 # default indent prefix for blocks (--prefix)
debug_prefix = "\t\t" # default prefix for debug commands
action = "indent" # default action if none specified (-d,-i,-t,-H)
DEBUG = 0 # set developer's debug level [0-3]
# fmt: on
# HTML data for --htmlize
# You may edit here to change the defaults
html_colors = {
# fmt: off
"addr1": "#8080ff",
"addr1flag": "#ff6060",
"addr2": "#8080ff",
"addr2flag": "#ff6060",
"lastaddr": "",
"modifier": "#ff6060",
"id": "#ffff00",
"content": "#ff00ff",
"delimiter": "#ff6060",
"pattern": "#8080ff",
"replace": "",
"flag": "#00ff00",
"comment": "#00ffff",
"escape": "#ff6060",
"special": "#00ff00",
"pattmeta": "#ff00ff",
"plaintext": "",
"branch": "",
"BGCOLOR": "#000000",
"TEXT": "#ffffff",
"LINK": "#ff00ff",
"ALINK": "#ff00ff",
"VLINK": "#ff00ff"
}
# Note that the %s will be expanded later
html_header = """\
Colorized %s
\
""".format(
**html_colors
)
html_footer = """
### colorized by sedsed, \
a debugger and code formatter for sed scripts
\
""".format(
html_colors["comment"], myhome
)
# sedsed expects multiline text (aic text, s/// replacement) to have this
# odd string instead of inner \n's in the string
linesep = "@#linesep#@"
# Data holders that will be set by command line options
# fmt: off
action_modifiers = [] # --hide contents and others
sedscript = [] # join all scripts found here
script_file = "" # last sedscript filename for --htmlize
quiet_flag = 0 # tell if the #n is needed or not
textfiles = []
# fmt: on
# Color-related variables, will be set in set_colors()
color_YLW = ""
color_RED = ""
color_REV = ""
color_NO = ""
# Color-dependent variable, will be set after the command line parsing
newlineshow = ""
# Debug-related variables, will be set by set_debug_commands()
showpatt = ""
showhold = ""
save_t = ""
showcomm = ""
nullcomm = ""
# Regex to match the shebang, grouping the sed options
topopts_regex = r"#!\s*/[^\s]+\s+-([nf]+)"
# All sed commands grouped by kind
sedcmds = {
"file": "rw" + "RW", # standard + GNU sed
"multi": "sy",
"solo": "=dDgGhHnNpPx" + "Fz", # standard + GNU sed
"text": "aci" + "e", # standard + GNU sed
"jump": ":bt" + "T", # standard + GNU sed
"block": "{}",
"int": "qQlL", # standard (q, l), GNU sed (q, l, Q, L)
"misc": "v", # GNU sed
}
# All fields used by the sedsed AST dictionary
cmdfields = [
"linenr",
"addr1",
"addr1flag",
"addr2",
"addr2flag",
"lastaddr",
"modifier",
"id",
"content",
"delimiter",
"pattern",
"replace",
"flag",
"comment",
]
# -----------------------------------------------------------------------------
# Special adjustments
# -----------------------------------------------------------------------------
# The identifier recognized by sed as STDIN
# - BSD sed does not support '-'
# - Windows, Termux and others do not have /dev/stdin
if os.path.exists("/dev/stdin"):
stdin_id = "/dev/stdin"
else:
stdin_id = "-"
# Turn color OFF on Windows because ANSI.SYS is not installed by default.
# Windows users who have ANSI.SYS configured, can use the --color option
# or comment the following line.
# ANSI.SYS resources:
# http://www.evergreen.edu/biophysics/technotes/program/ansi_esc.htm#notes
# http://www3.sympatico.ca/rhwatson/dos7/v-ansi-escseq.html
if os.name == "nt":
color = 0
# -----------------------------------------------------------------------------
# General Functions
# -----------------------------------------------------------------------------
def print_usage(exitcode=1):
print(
"""
Usage: sedsed OPTION [-e sedscript] [-f sedscriptfile] [inputfile]
OPTIONS:
-f, --file add file contents to the commands to be parsed
-e, --expression add the script to the commands to be parsed
-n, --quiet suppress automatic printing of pattern space
--silent alias to --quiet
-d, --debug debug the sed script
--hide hide some debug info (options: PATT,HOLD,COMM)
--color shows debug output in colors (default: ON)
--nocolor no colors on debug output
--dump-debug dumps to screen the debugged sed script
-i, --indent script beautifier, prints indented and
one-command-per-line output do STDOUT
--prefix indent prefix string (default: 4 spaces)
--sedbin specify sed executable (name or full path)
-t, --tokenize script tokenizer, prints extensive
command by command information
-H, --htmlize converts sed script to a colorful HTML page
-V, --version prints the program version and exit
-h, --help prints this help message and exit
"""
)
print("Website: %s\n" % myhome)
sys.exit(exitcode)
def fatal_error(msg):
"All error messages are handled by me"
print("ERROR: %s: %s" % (myname, msg), file=sys.stderr)
sys.exit(1)
def echo(msg): # pylint: disable=unused-variable
print("\033[33;1m%s\033[m" % msg)
def devdebug(msg, level=1): # pylint: disable=unused-variable
if DEBUG and DEBUG >= level:
print("+++ DEBUG%d: %s" % (level, msg))
def read_file(file_path):
"Reads a file into a list, removing line breaks"
if file_path in (stdin_id, "-"):
try:
data = sys.stdin.readlines()
except KeyboardInterrupt: # ^C
sys.exit(1)
# Ideally the exit code should be 128+signal.SIGINT in Unix, but
# I'm not sure about other platforms. So I'll keep it simple.
else:
try:
with open(file_path) as f:
data = f.readlines()
except IOError as e:
fatal_error("Cannot read file: %s\n%s" % (file_path, e))
return [re.sub("[\n\r]+$", "", x) for x in data]
def write_file(file_path, lines):
"Writes a list contents into file, adding correct line breaks"
try:
with open(file_path, "w") as f:
# TODO maybe use os.linesep? - all this is really necessary?
# ensuring line break
lines = [re.sub("\n$", "", x) + "\n" for x in lines]
f.writelines(lines)
except IOError as e:
fatal_error("Cannot write file: %s\n%s" % (file_path, e))
def system_command(cmd):
"Returns a (#exit_code, program_output[]) tuple"
ret = None
output = []
fd = os.popen(cmd)
for line in fd.readlines():
output.append(line.rstrip()) # stripping \s*\n
ret = fd.close()
if ret:
ret = ret / 256 # 16bit number
return ret, output
def validate_script_syntax(script_text):
"""Validate a sed script using system's sed."""
# Using tmpfile2 because "sed -f script /dev/null" won't work in Windows
tmpfile1 = tempfile.mktemp()
tmpfile2 = tempfile.mktemp()
write_file(tmpfile1, script_text)
write_file(tmpfile2, "")
# Note that even when running against an empty file, there could be
# consequences on the system, such as a 'w' command writing files.
# sed -f sed_script empty_file
ret, _ = system_command("%s -f '%s' '%s'" % (sedbin, tmpfile1, tmpfile2))
os.remove(tmpfile1)
os.remove(tmpfile2)
# The sed command will fail when there's something wrong:
# - syntax error
# - unknown command
# - permission denied for file read/write commands (r, w, s///w)
# Example: touch a; chmod 000 a; sedsed -d -e 'w a'
if ret:
# At this point, the sed error message was already shown to the user,
# explaining the reason for the failure. So now we abort giving some
# context of what we were trying to do.
fatal_error(
"%d: Failed validating your script using system's sed: %s" % (ret, sedbin)
)
def set_colors():
# pylint: disable=global-statement
global color_YLW
global color_RED
global color_REV
global color_NO
# Add the terminal escapes for color (or not):
# yellow text, red text, reverse video, back to default
if color:
color_YLW = "\033[33;1m"
color_RED = "\033[31;1m"
color_REV = "\033[7m"
color_NO = "\033[m"
else:
color_YLW = color_RED = color_REV = color_NO = ""
# -----------------------------------------------------------------------------
# Command line
# -----------------------------------------------------------------------------
def parse_command_line(arguments=None):
# pylint: disable=global-statement, global-variable-not-assigned
global action
global action_modifiers
global color
global DEBUG
global dump_debug
global indent_prefix
global newlineshow
global quiet_flag
global script_file
global sedscript
global sedbin
global textfiles
arguments = arguments or sys.argv[1:]
# Here's all the valid command line options
short_options = "he:f:ditVHn"
long_options = [
# actions
"debug",
"tokenize",
"htmlize",
"indent",
# sed-like
"version",
"help",
"file=",
"expression=",
"silent",
"quiet",
# misc
"nocolor",
"color",
"hide=",
"prefix=",
"sedbin=",
# other
"dump-debug",
# admin
"_debuglevel=",
"_stdout-only",
"dumpcute",
]
# Check it!
try:
opt, args = getopt.getopt(arguments, short_options, long_options)
except getopt.error as errmsg:
fatal_error("%s (try --help)" % errmsg)
# Command Line is OK, now let's parse its values
for o in opt:
if o[0] in ("-d", "--debug"):
action = "debug"
elif o[0] in ("-i", "--indent"):
action = "indent"
color = 0
elif o[0] in ("-t", "--tokenize"):
action = "token"
color = 0
elif o[0] in ("-H", "--htmlize"):
action = "html"
color = 0
elif o[0] in ("-n", "--quiet", "--silent"):
quiet_flag = 1
elif o[0] in ("-e", "--expression"):
sedscript.extend(o[1].split("\n"))
elif o[0] in ("-f", "--file"):
sedscript.extend(read_file(o[1]))
script_file = o[1]
elif o[0] in ("-h", "--help"):
print_usage(0)
elif o[0] in ("-V", "--version"):
print("%s v%s" % (myname, __version__))
sys.exit(0)
elif o[0] == "--dump-debug":
action = "debug"
dump_debug = 1
color = 0
elif o[0] == "--nocolor":
color = 0
elif o[0] == "--color":
color = 1
elif o[0] == "--hide":
# --hide=comm,hold ==> action_modifiers = ['nocomm', 'nohold']
for hide in o[1].split(","):
hide_me = hide.strip().lower()
action_modifiers.append("no" + hide_me)
elif o[0] == "--prefix":
# Is the prefix valid?
if re.sub(r"\s", "", o[1]):
fatal_error("--prefix: must be spaces and/or TABs")
indent_prefix = o[1]
elif o[0] == "--sedbin":
sedbin = o[1]
# Undocumented admin options
elif o[0] == "--_debuglevel":
DEBUG = int(o[1])
elif o[0] == "--_stdout-only":
action = "debug"
action_modifiers.append(o[0][2:])
elif o[0] == "--dumpcute":
action = "dumpcute"
DEBUG = 0
color = 1
# There's a SED script?
if not sedscript:
if args:
# the script is the only argument (echo | sed 's///')
sedscript.append(args.pop(0))
else:
fatal_error("there's no SED script to parse! (try --help)")
# Get all text files, if none, use STDIN
textfiles = args or [stdin_id]
# All command line arguments were read and parsed. Now we need to do some
# adjustments in the data, based on the current config state.
# Add the leading #n to the sed script, when using -n
if quiet_flag:
sedscript.insert(0, "#n")
# At this point we know if colors are configured or not
set_colors()
# When showing the inner \n's to the user use this red \N
newlineshow = "%s\\N%s" % (color_RED, color_NO)
# The SED debugger magic lines
# ----------------------------
#
# Here is where the 'magic' lives. The heart of this program are the
# following lines, which are the special SED commands responsible for
# the DEBUG behaviour. For *each* command of the original script,
# several commands are added before, to show buffers and command
# contents. Some tricks are needed to preserve script's original
# behaviour, they are explained ahead.
#
# 1. Show PATTERN SPACE contents:
# The 'PATT:' prefix is added, then the 'l' command shows the
# buffer contents, then the prefix is removed.
#
# 2. Show HOLD SPACE contents:
# Similar to PATTERN SPACE, but use the 'x' command to access and
# restore the HOLD buffer contents. The prefix used is 'HOLD:'.
#
# 3. Show current SED COMMAND:
# Uses a single 'i' command to show the full 'COMM:' line, as it
# does not depend on execution data. The color codes are added or
# not, depending on user options.
#
# 4. 'Last Address' trick:
# On SED, the empty address // refers to the last address matched.
# As this behaviour can be affected when several DEBUG lines are
# inserted before the command, sedsed uses a trick to force it.
# The last address used on the original script is repeated with a
# null command (/last-address/ y/!/!/). This way sedsed repeat the
# addressing, ensuring the next command will have it as the right
# 'last' address.
#
# 5. 't Status' trick:
# The 't' command behaviour, from SED manual page:
#
# If a s/// has done a successful substitution since the last
# input line was read and since the last t command, then branch
# to label
#
# As all the DEBUG commands use lots of 's///' commands, the 't'
# status is always true. The trick here is to add fake labels
# between *any* command and fake 't' commands to jump to them:
#
#
# t zzset001
# ... debug commands ...
# t zzclr001
# : zzset001
# ... debug commands ...
# : zzclr001
#
#
# The DEBUG commands are repeated and placed into two distinct
# blocks: 'zzset' and 'zzclr', which represents the 't' status
# of the last command. The execution order follows:
#
# zzset: 1st jump (t), then debug (s///), t status is ON
# zzclr: 1st debug (s///), then jump (t), t status is OFF
#
# The 001 count is incremented on each command to have unique
# labels.
#
# For the GNU sed 'T' command, the behaviour is the opposite: it only
# branches when there was *no* successful substitution. Luckily, the
# trick used for 't' applies to 'T' with no changes, because we can
# save and restore the correct last 's///' status.
#
#
# --- THANK YOU VERY MUCH ---
#
# - Paolo Bonzini (GNU sed 4.x maintainer) for the idea of the
# 't status' trick.
#
# - Thobias Salazar Trevisan for the idea of using the 'i'
# command for the COMM: lines.
#
def set_debug_commands():
# pylint: disable=global-statement
global showpatt
global showhold
global save_t
global showcomm
global nullcomm
# show pattern space, show hold space, show sed command
# null sed command to restore last address, 't' and 'T' status trick
# fmt: off
showpatt = [ "s/^/PATT:/", "l", "s/^PATT://" ]
showhold = ["x", "s/^/HOLD:/", "l", "s/^HOLD://", "x"]
showcomm = ["i\\", "COMM:%s\a%s" % (color_YLW, color_NO)]
nullcomm = ["y/!/!/"]
save_t = ["t zzset\a\n#DEBUG#", "t zzclr\a",
":zzset\a\n#DEBUG#", ":zzclr\a"]
# fmt: on
def format_debugcmds(cmds):
"One per line, with prefix (spaces)"
return debug_prefix + ("\n" + debug_prefix).join(cmds) + "\n"
showpatt = format_debugcmds(showpatt)
showhold = format_debugcmds(showhold)
save_t = format_debugcmds(save_t)
showcomm = debug_prefix + "\n".join(showcomm) + "\n"
nullcomm = nullcomm[0]
# If user specified --hide, unset DEBUG commands for them
if "nopatt" in action_modifiers:
showpatt = ""
if "nohold" in action_modifiers:
showhold = ""
if "nocomm" in action_modifiers:
showcomm = ""
# -----------------------------------------------------------------------------
# Auxiliary Functions - Tools
# -----------------------------------------------------------------------------
def escape_text_commands_specials(text):
text = text.replace("\\", "\\\\") # escape the escape
return text
def paint_html(element, txt=""):
if not txt:
return txt # nothing to paint
# Escape HTML special chars
txt = txt.replace("&", "&")
txt = txt.replace(">", ">")
txt = txt.replace("<", "<")
# Some color adjustments and emphasis
if element == "id" and txt in sedcmds["block"]:
element = "delimiter"
elif element == "id" and txt == ":":
element = "content"
elif element == "replace":
# highlight \n, & and \$
newtxt = paint_html("special", "\\" + linesep)
txt = txt.replace("\\" + linesep, newtxt)
txt = re.sub("(\\\\[1-9]|&)", paint_html("special", "\\1"), txt)
elif element == "pattern":
# highlight ( and |
txt = re.sub("(\\\\)([(|])", "\\1" + paint_html("pattmeta", "\\2"), txt)
elif element == "plaintext":
# highlight \$
newtxt = paint_html("special", "\\" + linesep)
txt = txt.replace("\\" + linesep, newtxt)
elif element == "branch":
# nice link to the label
txt = '%s' % (txt, txt)
elif element == "target":
# link target
txt = '%s' % (txt, txt)
element = "content"
# Paint it!
if html_colors.get(element) and txt:
font_color = html_colors[element]
txt = '%s' % (font_color, txt)
return txt
# -----------------------------------------------------------------------------
# Hardcore Address/Command Composer Functions
# -----------------------------------------------------------------------------
def compose_sed_address(data):
"""Format the full sed address as plain text or HTML."""
if not data["addr1"]:
return "" # no address
if action == "html":
address1 = "%s%s" % (
data["addr1html"],
paint_html("addr1flag", data.get("addr1flag")),
)
address2 = "%s%s" % (
data.get("addr2html"),
paint_html("addr2flag", data.get("addr2flag")),
)
else:
address1 = "%s%s" % (data.get("addr1"), data.get("addr1flag"))
address2 = "%s%s" % (data.get("addr2"), data.get("addr2flag"))
if data["addr2"]:
address = "%s,%s" % (address1, address2)
else:
address = address1
return address + " " # address, space, (command)
def compose_sed_command(data):
if data["delimiter"]: # s///
if action != "html":
cmd = "%s%s%s%s%s%s%s%s" % (
data["modifier"],
data["id"],
data["delimiter"],
data["pattern"],
data["delimiter"],
data["replace"],
data["delimiter"],
data["flag"],
)
if data["content"]: # s///w filename
cmd = cmd + " " + data["content"]
else:
cmd = """%s%s%s%s%s%s%s%s""" % (
# fmt: off
paint_html("modifier", data["modifier"]),
paint_html("id", data["id"]),
paint_html("delimiter", data["delimiter"]),
paint_html("pattern", data["pattern"]),
paint_html("delimiter", data["delimiter"]),
paint_html("replace", data["replace"]),
paint_html("delimiter", data["delimiter"]),
paint_html("flag", data["flag"]),
)
if data["content"]: # s///w filename
painted = paint_html("content", data["content"])
cmd = "%s %s" % (cmd, painted)
else:
idsep = ""
# spacer on r,w,b,t,v commands only
spaceme = sedcmds["file"] + sedcmds["jump"] + sedcmds["int"] + "v"
spaceme = spaceme.replace(":", "") # : label (no space!)
if data["id"] in spaceme and data["content"]:
idsep = " "
cmd = "%s%s%s%s" % (data["modifier"], data["id"], idsep, data["content"])
if action == "html":
if data["id"] in sedcmds["text"]:
content_type = "plaintext"
elif data["id"] in ("b", "t", "T"):
content_type = "branch"
elif data["id"] == ":":
content_type = "target"
else:
content_type = "content"
cmd = "%s%s%s%s" % (
paint_html("modifier", data["modifier"]),
paint_html("id", data["id"]),
idsep,
paint_html(content_type, data["content"]),
)
cmd = cmd.replace(linesep, "\n")
return cmd
# -----------------------------------------------------------------------------
# The dump* Functions - They 4mat 4you!
# -----------------------------------------------------------------------------
def dump_key_value_pair(datalist):
"Returns field:value command data line by line (lots of lines!)"
outlist = []
for data in datalist[1:]: # skip headers at 0
if not data["id"]: # blank line
continue
for key in datalist[0]["fields"]:
if key == "replace":
data[key] = data[key].replace(linesep, newlineshow)
outlist.append("%10s:%s" % (key, data[key]))
outlist.append("")
return outlist
# Format: line:ad1:ad1f:ad2:ad2f:mod:cmd:content:delim:patt:rplc:flag:comment
def dump_oneliner(datalist, fancy=0): # pylint: disable=unused-variable
"Returns a command per line, elements separated by : (looooong lines)"
outlist = []
r = n = ""
if fancy:
r = "\033[7m"
n = "\033[m"
for data in datalist[1:]: # skip headers at 0
outline = data["linenr"]
if data["id"]:
for key in datalist[0]["fields"][1:]: # skip linenr
outline = "%s:%s%s%s" % (outline, r, data[key], n)
outlist.append(outline)
return outlist
def dump_cute(datalist):
"Returns a strange representation of SED commands. Use --dumpcute."
outlist = []
r = color_REV
n = color_NO
for data in datalist[1:]: # skip headers at 0
if not data["id"]:
outlist.append("%40s" % "[blank]")
elif data["id"] == "#":
outlist.append(data["comment"])
else:
idsep = ""
if data["id"] in ("b", "t", "T"):
idsep = " "
cmd = "%s%s%s%s" % (data["modifier"], data["id"], idsep, data["content"])
if data["delimiter"]:
cmd = "%s%s%s%s%s%s%s" % (
cmd,
data["delimiter"],
data["pattern"],
data["delimiter"],
data["replace"],
data["delimiter"],
data["flag"],
)
cmd = cmd.replace(linesep, n + newlineshow + r)
outlist.append("%s" % "-" * 40)
outlist.append(
"adr: %s%s%s%s ::: %s%s%s%s"
% (
r,
data["addr1"],
data["addr1flag"],
n,
r,
data["addr2"],
data["addr2flag"],
n,
)
)
outlist.append("cmd: %s%s%s [%s]" % (r, cmd, n, data["comment"]))
return outlist
# dump_script: This is a handy function, used by --indent AND --htmlize
# It formats the SED script in a human-friendly way, with one command
# per line and adding spaces on the right places. If --htmlize, it
# also adds the HTML code to the script.
#
def dump_script(datalist, indent_prefix):
"Returns the indented script in plain text or HTML"
indfmt = {"string": indent_prefix, "initlevel": 0}
outlist = []
indent = indfmt["initlevel"]
if action == "html":
outlist.append(html_header % os.path.basename(script_file))
for data in datalist[1:]: # skip headers at 0
if not data["id"]: # blank line
outlist.append("")
continue
if data["id"] == "#":
indentstr = indfmt["string"] * indent
if action != "html":
outlist.append(indentstr + data["comment"])
else:
outlist.append(indentstr + paint_html("comment", data["comment"]))
else:
if data["id"] == "}":
indent = indent - 1
# only indent++ after open {
indentstr = indfmt["string"] * indent
if data["id"] == "{":
indent = indent + 1
cmd = compose_sed_command(data)
addr = compose_sed_address(data)
# saving full line
cmd = "%s%s%s" % (indentstr, addr, cmd)
if data["comment"]:
# Inline comments are aligned at column 40
# The leading ; before # is required by non-GNU seds
outlist.append("%-39s;%s" % (cmd, data["comment"]))
else:
outlist.append(cmd)
if action == "html":
outlist.append(html_footer)
return outlist
# -----------------------------------------------------------------------------
# do_debug - Here is where the fun begins
# -----------------------------------------------------------------------------
#
# This function performs the --debug action.
#
# After the SED script was parsed by the parser (below), this function
# is called with the script data found. It loops, shouts and screams,
# inserting the nice DEBUG lines between the SED script commands.
#
# After all lines are composed, it call the system's SED to run the
# script, and SED will do its job, but this time showing you all the
# secrets that the PATTERN SPACE and HOLD SPACE buffers holds.
#
def do_debug(datalist):
outlist = []
cmdlineopts = "f"
t_count = 0
hideregisters = 0
set_debug_commands()
if "topopts" in datalist[0]:
cmdlineopts = datalist[0]["topopts"]
# If we have at least one 't' or 'T' command on the script, we need
# to save the t command status between debug commands. As they perform
# s/// commands, the t status of the "last substitution" is lost.
# So, we save the status doing a nice loop trick before *every*
# command (necessary overhead). This loops uses the :zzsetNNN and
# zzclrNNN labels, where NNN is the label count.
# TIP: t status resets: line read, t call
if datalist[0]["has_t"]:
t_count = 1
for i, data in enumerate(datalist):
if i == 0:
continue # skip headers at 0
if not data["id"]:
continue # ignore blank line
if data["id"] == "#":
outlist.append("%s\n" % (data["comment"]))
else:
cmd = compose_sed_command(data)
addr = compose_sed_address(data)
cmdshow = cmd.replace("\n", newlineshow + color_YLW)
cmdshow = escape_text_commands_specials(addr + cmdshow)
showsedcmd = showcomm.replace("\a", cmdshow)
registers = showpatt + showhold
if hideregisters:
registers = ""
showall = "%s%s" % (registers, showsedcmd)
# Add the 't status' trick to commands.
# Exception: read-next-line commands (n,d,q)
# Exception: no PATT/HOLD registers to show (no s///)
if t_count and showall:
if data["id"] not in ("n", "d", "q") and registers:
tmp = save_t.replace("\a", "%03d" % t_count)
showall = tmp.replace("#DEBUG#", showall)
t_count = t_count + 1
# null cmd to restore last addr: /addr/y/!/!/
# Bug: https://github.com/aureliojargas/sedsed/issues/15
if data["lastaddr"]:
showall = showall + debug_prefix + data["lastaddr"] + nullcomm + "\n"
# after jump, block or void commands don't show
# registers, because they're not affected.
# exception: after b or t without target
# (read next line)
hideregisters = 0
if data["id"] in sedcmds["jump"] and data["content"]:
hideregisters = 1
elif data["id"] in sedcmds["block"]:
hideregisters = 1
elif data["id"] == "v":
hideregisters = 1
outlist.append("%s#%s\n%s\n" % (showall, "-" * 50, addr + cmd))
outlist.append(showpatt + showhold) # last line status
# executing sed script
cmdextra = ""
if "_stdout-only" in action_modifiers:
# cmdextra = "| egrep -v '^PATT|^HOLD|^COMM|\$$|\\$'" # sed
cmdextra = "-l 9999 | egrep -v '^PATT|^HOLD|^COMM'" # gsed
inputfiles = " ".join(textfiles)
if dump_debug:
for line in [re.sub("\n$", "", x) for x in outlist]:
print(line)
print(
"\n# Debugged SED script generated by %s-%s (%s)"
% (myname, __version__, myhome)
)
else:
tmpfile = tempfile.mktemp()
write_file(tmpfile, outlist)
os.system(
"%s -%s %s %s %s" % (sedbin, cmdlineopts, tmpfile, inputfiles, cmdextra)
)
os.remove(tmpfile)
###############################################################################
# #
# SED Script Parser #
# ------------------------- #
# Extract Every Info of Every Command #
# #
###############################################################################
#
# Here we used to have a custom brute force buggy parser.
# Now we are using sedparse, a direct translation of the GNU sed C code.
#
# To avoid having to adapt the whole sedsed code to the sedparse AST, the
# following `parse()` function will convert the sedparse AST into the same AST
# used by the old parser: a list having a flat dictionary for each command.
#
# TODO properly document sedsed and sedparse AST's
def parse(sedscript):
"""
Parse the sedscript (a list of strings) and return a sedsed AST (a flat list
of dictionaries).
"""
the_program = []
ret = []
ret.append({}) # for header
# Parse the sed script and save the output to `the_program`
sedparse.compile_string(the_program, "\n".join(sedscript) + "\n")
### Translate from GNU sed struct_sed_cmd objects to sedsed ZZ objects
# Flag to detect if there's at least one 't' or 'T' command in the script.
# If so, some special treatment is required in the debugger.
has_t = 0
# Stores the lastest address. When an empty address command such as //p or
# s//foo/ is found, this value will be saved into `cmddict['lastaddr']`.
lastaddr = ""
def set_address(gsed_data, sedsed_data, prefix="addr1"):
if not gsed_data:
return
if gsed_data.addr_regex:
# set cmddict['addr1'] = /foo/
sedsed_data[prefix] = "%s%s%s%s" % (
gsed_data.addr_regex.escape(),
gsed_data.addr_regex.slash,
gsed_data.addr_regex.pattern,
gsed_data.addr_regex.slash,
)
# set cmddict['addr1html']
sedsed_data[prefix + "html"] = "%s%s%s%s" % (
paint_html("escape", gsed_data.addr_regex.escape()),
paint_html("delimiter", gsed_data.addr_regex.slash),
paint_html("pattern", gsed_data.addr_regex.pattern),
paint_html("delimiter", gsed_data.addr_regex.slash),
)
# set cmddict['addr1flag'] = I
sedsed_data[prefix + "flag"] = gsed_data.addr_regex.flags
else:
# set cmddict['addr1'] = 99 | $
sedsed_data[prefix] = str(gsed_data)
sedsed_data[prefix + "html"] = paint_html("pattern", str(gsed_data))
# For each sed command found by the parser
for xx in the_program:
# Set empty dict with all the keys
cmddict = {}
for key in cmdfields:
cmddict[key] = ""
cmddict["id"] = xx.cmd
cmddict["linenr"] = xx.line
if xx.addr_bang:
cmddict["modifier"] = "!"
set_address(xx.a1, cmddict, "addr1")
set_address(xx.a2, cmddict, "addr2")
# Special case for regex addresses
# Set cmddict['lastaddr'] for the current command when the address is
# empty //. Otherwise just update the lastaddr holder.
# TODO investigate bug in sedsed if both addresses are regexes, the
# 'reset' address command should involve both addresses again, and
# not only `lastaddr`
if xx.a1 and xx.a1.addr_regex:
if not xx.a1.addr_regex.pattern:
cmddict["lastaddr"] = lastaddr
else:
lastaddr = cmddict["addr1"] + cmddict["addr1flag"]
if xx.a2 and xx.a2.addr_regex:
if not xx.a2.addr_regex.pattern:
cmddict["lastaddr"] = lastaddr
else:
lastaddr = cmddict["addr2"] + cmddict["addr2flag"]
if xx.cmd == "\n":
cmddict["id"] = ""
elif xx.cmd == "#":
cmddict["comment"] = "#" + xx.x.comment
# 1st line, try to find #!/...
if cmddict["linenr"] == 1:
m = re.match(topopts_regex, cmddict["comment"])
if m: # we have options!
ret[0]["topopts"] = m.group(1) # saved on list header
del m
elif xx.cmd in sedcmds["solo"] + sedcmds["block"]:
pass # nothing else to collect
elif xx.cmd in sedcmds["text"]:
if str(xx.x.cmd_txt): # command "e" is allowed to be empty
cmddict["content"] = "\\%s%s" % (
linesep,
str(xx.x.cmd_txt).replace("\n", linesep),
)
elif xx.cmd in sedcmds["jump"] + "v":
cmddict["content"] = xx.x.label_name
elif xx.cmd in sedcmds["file"]:
cmddict["content"] = xx.x.fname
elif xx.cmd in sedcmds["int"]:
if xx.x.int_arg > -1:
cmddict["content"] = str(xx.x.int_arg)
elif xx.cmd in sedcmds["multi"]: # s/// & y///
cmddict["delimiter"] = xx.x.cmd_subst.regx.slash
cmddict["pattern"] = str(xx.x.cmd_subst.regx.pattern)
cmddict["replace"] = str(xx.x.cmd_subst.replacement.text).replace(
"\n", linesep
)
cmddict["flag"] = "".join(xx.x.cmd_subst.regx.flags)
if "w" in cmddict["flag"]:
cmddict["content"] = xx.x.cmd_subst.outf.name
## save sedsed specific data
# saving last address content
if xx.cmd == "s":
if cmddict["pattern"]:
lastaddr = (
xx.x.cmd_subst.regx.escape()
+ cmddict["delimiter"]
+ cmddict["pattern"]
+ cmddict["delimiter"]
)
else:
cmddict["lastaddr"] = lastaddr
if xx.cmd in ("t", "T"):
has_t = 1
ret.append(cmddict)
# populating list header
ret[0]["fields"] = cmdfields
ret[0]["has_t"] = has_t
return ret
def fix_partial_comments(commands):
"""
Scan all commands and move comments to the previous command, if necessary.
If there's only one command in the line, and a comment at the end, being it
preceded by a ';' or not, this comment will be "tied" to the command.
Examples:
/foo/ d # remove foo
/bar/ d ;# remove bar
In both cases, the comment will be moved to the 'comment' field of
the respective 'd' command. The --indent output will be:
/foo/ d ;# remove foo
/bar/ d ;# remove bar
"""
headers = commands[0]
data = commands[1:]
accept_comment = (
sedcmds["solo"]
+ sedcmds["block"]
+ sedcmds["jump"]
+ sedcmds["multi"]
+ sedcmds["int"]
+ "v"
)
fake = {"linenr": 0}
data.insert(0, fake) # because of i-2
data.append(fake) # because of i+1
# i==0 skip: it's the fake
# i==1 skip: first command (nothing previous to append)
# i==2 good: first possible partial comment
# last skip: it's the fake
i = 2
while i < len(data) - 1:
# Move solo comment into previous command as partial comment when...
if (
data[i]["id"] == "#"
# ...previous command accepts comments
and data[i - 1]["id"] in accept_comment
# ...there's only *one* previous command in the same source line
and data[i]["linenr"] != data[i - 2]["linenr"]
and data[i]["linenr"] == data[i - 1]["linenr"]
and data[i]["linenr"] != data[i + 1]["linenr"]
):
# Move solo comment to previous command
data[i - 1]["comment"] = data[i]["comment"]
del data[i]
# Since we're removing, 'i' won't be incremented
else:
i += 1
return [headers] + data[1:-1] # remove fakes
def entrypoint():
parse_command_line()
# When debugging, the system's sed will be used to run the modified script.
# So it's mandatory that the original script is runnable in that specific
# sed version (i.e., no syntax errors and no unknown commands or flags).
if action == "debug":
validate_script_syntax(sedscript)
# Parse the script and process/fix the resulting data.
# AST is sedsed's internal data structure to represent a sed script.
try:
AST = fix_partial_comments(parse(sedscript))
except sedparse.ParseError as err:
fatal_error(err.message)
if action == "indent":
print("\n".join(dump_script(AST, indent_prefix)))
elif action == "html":
print("\n".join(dump_script(AST, indent_prefix)))
elif action == "debug":
do_debug(AST)
elif action == "token":
print("\n".join(dump_key_value_pair(AST)))
elif action == "dumpcute":
print("\n".join(dump_cute(AST)))
if __name__ == "__main__":
entrypoint()