#!/usr/bin/env python3 # -*- coding: utf-8 -*- # vim: ts=4 sw=4 expandtab # # pylint: disable=invalid-name # pylint: disable=too-many-statements, too-many-branches, too-many-locals # pylint: disable=global-statement # # pre-commit-exec-exempt # """ xyz ~~~ Plot a (X, Y, Z) heatmap, polar, scatter, or bubble chart from a dataset. Parameters: - Dataset_file (csv) - X-axis column (input feature) name - mapped to X-axis - Y-axis column (input feature) name - mapped to Y-axis - Z-axis column (target feature) name - mapped to Z-axis (e.g color) Optional parameters/modifiers: name=value These will be passed to the plotting func and if supported, will modify the appearance of the graph. Can be called with a different name (e.g. via a link/symlink) xyz: will generate a contoured heat-map (default) xyzs: will generate a scatter-plot only (no heatmap) xyzb: will generate a bubble-chart (Z is bubble-size & color) xyzp: polar projection: X-dimension is theta Call xyz without parameters to get a usage() message. """ import sys import os import re from pprint import pformat import signal import numpy as np import pandas as pd # type: ignore import scipy import scipy.interpolate from scipy.ndimage import gaussian_filter import matplotlib.pyplot as plt from matplotlib import colors # Don't print stack trace on CTRL-C signal.signal(signal.SIGINT, lambda x, y: sys.exit(0)) ARGV0 = os.path.basename(sys.argv[0]) # Debug level: d= on command line DBG = 0 # # A better dark red to dark blue (trimmed rainbow-spectrum) # cdict with yellow centered exactly at 0 and around it # (note: rgb->bgr order is inverted) ariel_cdict = { 'blue': [[0.0, None, 0.0], [0.25, 0.0, 0.0], [0.48, 0.0, 0.0], [0.52, 0.0, 0.2], [0.75, 0.6666666666666666, 0.8], [1.0, 1.0, None]], 'green': [[0.0, None, 0.0], [0.25, 0.0, 0.2], # This is the yellow range: [0.48, 0.9333333333333333, 1.0], [0.52, 1.0, 1.0], [0.75, 0.8666666666666667, 0.8666666666666667], [1.0, 0.0, None]], 'red': [[0.0, None, 0.6666666666666666], [0.25, 1.0, 1.0], [0.48, 1.0, 1.0], [0.52, 1.0, 0.8666666666666667], [0.75, 0.0, 0.0], [1.0, 0.0, None]] } ariel_cmap = colors.LinearSegmentedColormap('ariel', ariel_cdict) plt.colormaps.register(cmap=ariel_cmap) # -- Old syntax, matplotlib deprecation warnings # plt.register_cmap(name='ariel', cmap=ariel_cmap) # plt.register_cmap(name='a', cmap=ariel_cmap) # short alias MyFont = { 'family': ['FreeSans', 'Bitstream Vera Sans', 'Arial'], 'weight': 'bold', 'style': 'italic', # 'weight': 'medium', } def err(msg): """Print msg to stderr w/o terminating newline""" sys.stderr.write(msg) sys.stderr.flush() def myusage(msg=None): """Print usage and exit""" if msg: err(msg) err(f"Usage: {ARGV0}[sb] [param=value ...] data_file x_name y_name z_name\n\n" " data_file can be *.csv or *tsv\n" " If data_file is '-' read data from stdin\n" " mode is determined by argv[0]:\n" " xyz: 3D heatmap w/ contours (default)\n" " xyzs: scatter-plot\n" " xyzb: bubble-plot (like scatter, but bigger dots)\n" " xyzp: polar projection (X-dimension is angle (theta))\n" " x_name y_name z_name can be numeric (0-based indices)\n" " Supported params:\n" "\ttitle= # Set chart title\n" "\tcmap= # Colormap/colorscheme name (e.g. 'jet_r')\n" "\tstyle= # Style name (e.g. 'seaborn', 'ggplot')\n" "\t[xyz]label= # [XYZ]-label (defaults to [xyz]name)\n" "\t[xyz]min= # [XYZ]-axis bottom clipping/trimming\n" "\t[xyz]max= # [XYZ]-axis top clipping/trimming\n" "\t[xyz]res= # [XYZ]-axis resolution\n" "\t[xyz]blur= # [XYZ]-axis gaussian blur\n" "\t[xyz]cs= # [XYZ]-axis PDF -> CDF via cumsum()\n" "\t[xy]scale= # [XY]-axis scale: linear|log|symlog\n" "\tds= # Scatter-plot dot size multiplier\n" "\talpha= # Alpha transparency for dots/bubbles\n" "\tgrid= # Add grid with specified alpha\n" "\tgriddata=0 # Do NOT gridify (interpolate) the data\n" "\tcl= # Add contour-lines (0 to disable)\n" "\tedgecolor= # Add edge to scatter-points/bubbles\n" "\tlinewidth= # Set linewidth in scatter-points/bubbles\n" "\tdbg= # Set debug level\n" "\thelp=[sc] # Help on avail styles & color schemes\n") sys.exit(1) def dbg(level, msg): """Print message to stderr iff debug level is at least the 1st arg""" if int(DBG) >= int(level): err(msg) def myget(dat, pat, default_value): """ User-friendly get-value from kw-dict (user supplied name=value on command line), with flexible regex/prefix matching """ if pat in dat: # simplest case, exact match, done return dat[pat] # No exact match, try regex/prefix matches match_keys = [k for k in dat if re.match(pat, k)] if len(match_keys) >= 1: if len(match_keys) > 1: err(f"multiple matches for pat:'{pat}': " f"({match_keys}): using 1st match only\n") return dat[match_keys[0]] # nothing worked, use the default value return default_value def data2df(datafile, xname, yname, zname, kw): """ Load the data from a file into memory Select only the wanted/needed columns Return as DataFrame """ dbg(1, f"| data2df: read_csv({datafile})...") df = pd.read_csv(datafile, sep='[\t,]', engine='python') dbg(1, f" done: df.shape: {df.shape}\n") # X, Y, X columns: by either by name or by integer index if re.match(r'^\d+$', xname): x = df.iloc[:, int(xname)].to_numpy() else: x = df[xname] if re.match(r'^\d+$', yname): y = df.iloc[:, int(yname)].to_numpy() else: y = df[yname] if re.match(r'^\d+$', zname): z = df.iloc[:, int(zname)].to_numpy() else: z = df[zname].to_numpy() df = pd.DataFrame({xname:x, yname:y, zname:z}) # Axis range trimming from command-line: [xyz]{min,max} val = myget(kw, '^(?:xm?i?n)$', None) if val is not None: xmin = float(val) else: xmin = df[xname].min() val = myget(kw, '^(?:xm?a?x)$', None) if val is not None: xmax = float(val) else: xmax = df[xname].max() val = myget(kw, '^(?:ym?i?n)$', None) if val is not None: ymin = float(val) else: ymin = df[yname].min() val = myget(kw, '^(?:ym?a?x)$', None) if val is not None: ymax = float(val) else: ymax = df[yname].max() val = myget(kw, '^(?:zm?i?n)$', None) if val is not None: zmin = float(val) else: zmin = df[zname].min() val = myget(kw, '^(?:zm?a?x)$', None) if val is not None: zmax = float(val) else: zmax = df[zname].max() df = df.loc[ (xmin <= df[xname]) & (df[xname] <= xmax) & (ymin <= df[yname]) & (df[yname] <= ymax) & (zmin <= df[zname]) & (df[zname] <= zmax) ] return df def grid_set_style() -> None: """Set our preferred style for the grid""" plt.grid(which='major', linestyle='-', alpha=0.7, color='black', linewidth=0.2) plt.grid(which='minor', linestyle=':', alpha=0.8, color='black', linewidth=0.2) def my_style_tweaks(): """ Mostly to make the labels nicer """ style_dict = { 'font.sans-serif': 'FreeSans', 'font.family': 'sans-serif', 'font.weight': 'bold', 'font.style': 'italic', # 'font.size': 14.0, 'text.color': 'white', 'figure.facecolor': '#000', 'axes.grid': True, 'axes.grid.axis': 'both', 'axes.grid.which': 'both', 'grid.linestyle': '-', 'grid.alpha': 1.0, 'grid.color': 'black', 'grid.linewidth': 0.5, 'axes.facecolor': 'black', 'axes.labelcolor': 'white', 'axes.edgecolor': 'white', 'axes.linewidth': 1.0, # external frame around the chart # 'axes.titlesize': 20.0, # 'xtick.minor.visible': True, # matplotlib bug? ghost vertical lines # 'ytick.minor.visible': True, 'xtick.color': 'white', 'ytick.color': 'white', # Small little ticks/wicks (not the grid) 'xtick.major.size': 4, # length of tick 'ytick.major.size': 4, 'xtick.major.width': 2, # width of tick 'ytick.major.width': 2, 'xtick.major.pad': 4.0, # distance of label from end of tick 'ytick.major.pad': 4.0, # We don't want minor ticks, have to force to 0, or they show up 'xtick.minor.size': 0, 'ytick.minor.size': 0, 'xtick.minor.width': 0, 'ytick.minor.width': 0, 'xtick.labelsize': 14.0, 'ytick.labelsize': 14.0, 'legend.fontsize': 13.0, } plt.rcParams.update(style_dict) def xyz(kw): """ Plot a heatmap/bubble-chart/scatterplot of column name Z vs column names (X, Y) """ datafile = kw.get('datafile') xname = kw.get('xname') yname = kw.get('yname') zname = kw.get('zname') if not all((datafile, xname, yname, zname)): myusage() dbg(1, "+--- xyz: param init...") global DBG DBG = int(float(myget(kw, r'^(?:debug|dbg|db?)$', 0))) mode = 'h' # default: heatmap (gridded & interpolated) if 'b' in ARGV0: mode = 'b' # bubble-chart elif 's' in ARGV0: mode = 's' # scatter plot elif 'p' in ARGV0: mode = 'p' # polar projection (X is theta) # good style are: # ggplot fast dark_background # seaborn-notebook # seaborn-paper # seaborn-bright # seaborn-darkgrid wanted_style = myget(kw, '(?:style|st)', 'ggplot') plt.style.use(wanted_style) my_style_tweaks() # plt.rc('font', **MyFont) plt.rc('grid', c='0.5', ls='-', lw=0.5) # midgray, line-width plt.rc('lines', c='0.5', lw=0.5, aa=True) # midgray, line-width # cmapstr = myget(kw, '(?:col|cm)', 'bwr_r') cmapstr = myget(kw, '(?:col|cm)', 'ariel') # -- Old syntax, matplotlib deprecation warnings # cmap = plt.cm.get_cmap(cmapstr) cmap = plt.colormaps[cmapstr] # grid: whether to add a grid (alpha) grid = float(myget(kw, '(?:gr(?:id)?$)', 0.33)) # gd (griddata): whether to linearly 'grid' (interpolate) the data gd = int(float(myget(kw, '(?:g(ri?d?)?d)', 1))) # Whether to plot contour lines in contour-plot (alpha) contourlines = float(myget(kw, '^(?:cl|con)', 1.0)) xlab = myget(kw, '^(?:xl)', xname) ylab = myget(kw, '^(?:yl)', yname) zlab = myget(kw, '^(?:zl)', zname) default_title = f'f({xlab}, {ylab}) -> {zlab}' title = myget(kw, '^(?:ti)', default_title) xres = int(float(myget(kw, 'xr', 60))) yres = int(float(myget(kw, 'yr', 60))) zres = int(float(myget(kw, 'zr', 50))) # Transparency of bubbles alpha = float(myget(kw, 'al', 1.0)) # PDF => CDF via cumsum xcs = int(float(myget(kw, 'xc', 0))) ycs = int(float(myget(kw, 'yc', 0))) zcs = int(float(myget(kw, 'zc', 0))) # Apply a gaussian-blur (gaussian filter) w/ sigma on any dimension xblur = float(myget(kw, '^(?:xg?[bf])', 0.0)) yblur = float(myget(kw, '^(?:yg?[bf])', 0.0)) zblur = float(myget(kw, '^(?:zg?[bf])', 0.0)) xscale = myget(kw, 'xs', 'linear') yscale = myget(kw, 'ys', 'linear') # Load the data into a DataFrame df = data2df(datafile, xname, yname, zname, kw) x = df[xname] y = df[yname] z = df[zname] # find limits min/max on each dimension xmin = x.min() xmax = x.max() ymin = y.min() ymax = y.max() zmin = z.min() zmax = z.max() if xcs: x = np.cumsum(x) xname = f'cumsum({xname})' if ycs: y = np.cumsum(y) yname = f'cumsum({yname})' % yname if zcs: z = np.cumsum(z) zname = f'cumsum({zname})' dbg(3, f"| xyz: [xyz]blur=({xblur},{yblur},{zblur}): blur x,y,z...") if xblur: x = gaussian_filter(x, sigma=xblur, order=0) if yblur: y = gaussian_filter(y, sigma=yblur, order=0) if zblur: z = gaussian_filter(z, sigma=zblur, order=0) dbg(3, " done\n") dbg(3, f"| xyz: (xscale={xscale} yscale={yscale}): scale x,y...") if xscale != 'linear': plt.xscale(xscale) xname = f'{xname} ({xscale} scale)' if yscale != 'linear': plt.yscale(yscale) yname = f'{yname} ({yscale} scale)' dbg(3, " done\n") # point/dot size multiplier (for scatter-plot) default_dotsize = { 'h': 0, 's': 4.0, 'b': 10.0, 'p': 2.0, } dotsize = float(myget(kw, '(?:(?:dot|point|d|p)s)', default_dotsize[mode])) dbg(1, "Data Ranges: " f"X:[{xmin} {xmax}] Y:[{ymin} {ymax}] Z:[{zmin} {zmax}]\n") dbg(1, f"Data resolutions: X:{xres} Y:{yres} Z:{zres}\n") # define grid. xi = np.linspace(xmin, xmax, xres) yi = np.linspace(ymin, ymax, yres) dbg(2, f"xi vector (shape={xi.shape}) after linspace: {xi}\n") dbg(2, f"yi vector (shape={yi.shape}) after linspace: {yi}\n") # grid the data. dbg(3, f"| xyz: gd={gd}: grid x,y -> zi...") zi = scipy.interpolate.griddata((x, y), z, (xi[None, :], yi[:, None]), method='linear') dbg(3, f"Z vector after gridding: {zi}\n") dbg(3, "| xyz: contour...") if mode == 'p': fig = plt.figure() _ax = fig.add_subplot(111, projection='polar') # ax.set_rorigin(-2.5) # hollow center # ax.set_theta_zero_location('W', offset=10) # angular rotation elif mode == 'h': # Color fill between contours if contourlines: plt.contour(xi, yi, zi, zres, colors='k', alpha=contourlines, extend='neither') plt.contourf(xi, yi, zi, zres, cmap=cmap) # , vmin=zmin, vmax=zmax) # Contour lines dbg(3, " done\n") plt.tight_layout(rect=(0.00, 0.0, 1, 1)) if grid: plt.grid(alpha=grid) # grid_set_style() # no effect... # plot data points. dotareas = 0.0 if dotsize > 0: dotareas = np.pi * (1.0 + abs(z)*dotsize)**2.0 # scatter/bubble point edge color ec = myget(kw, '^(?:e(?:dge)?c)', None) elw = float(myget(kw, '^(?:e?l(?:ine)?w)', 0.0)) dbg(3, f"| xyz: cmap={cmapstr} dotsize={dotsize} edgecolor={ec}: scatter...") plt.scatter(x, y, s=dotareas, # X, Y, size c=z, alpha=alpha, # color & alpha marker=('o' if mode in 'h' else None), cmap=cmap, edgecolors=ec, linewidths=elw, zorder=10) dbg(3, " done\n") # Axis flipping if 'yinv' in kw: ymin, ymax = ymax, ymin if 'xinv' in kw: xmin, xmax = xmax, xmin plt.xlim(xmin, xmax) plt.ylim(ymin, ymax) # draw a colorbar dbg(3, "| xyz: colorbar...") plt.grid(False) # quiet MatplotlibDeprecationWarning plt.colorbar(alpha=1.0) dbg(3, " done\n") if mode == 'p': plt.xlabel(f"theta:{xlab} radius:{ylab} color+size:{zlab}") else: plt.title(title, **MyFont, size=22) plt.xlabel(xlab, **MyFont, size=18) plt.ylabel(ylab, **MyFont, size=18) dbg(2, "| xyz: plt.show()...") plt.show() dbg(2, " done\n+--- xyz: END!\n") def process_args(): """Process CLI: assign all keyword params & positional args""" xname = None yname = None zname = None params = {} for arg in sys.argv[1:]: m = re.match('^([^=]+)=(.*)$', arg) if m: pname, pval = m.group(1, 2) params[pname] = pval continue if os.path.exists(arg): params['datafile'] = arg continue if arg == '-': params['datafile'] = '/dev/stdin' continue if xname is None: xname = arg params['xname'] = arg elif yname is None: yname = arg params['yname'] = arg elif zname is None: zname = arg params['zname'] = arg if 'help' in params: if 's' in params['help']: # styles err("=== Avaliable styles:\n%s\n\n" % re.sub( r"(?:u'|[',\n])", '', pformat(plt.style.available) )) if 'c' in params['help']: err("=== Avaliable colormaps:\n%s\n\n" % re.sub( r"(?:u'|[',\n])", '', pformat(list(plt.cm.cmap_d.keys())) )) return params def main(): """ Display the (X, Y, Z) heatmap from the data """ # process command line args params = process_args() # Create the chart from the DataFrame according to requested # args (column-selection and visualization params) xyz(params) return 0 if __name__ == '__main__': sys.exit(main())