{ "cells": [ { "cell_type": "code", "execution_count": null, "id": "8a46bf54", "metadata": {}, "outputs": [], "source": [ "#| default_exp xml" ] }, { "cell_type": "markdown", "id": "3e03bb3d", "metadata": {}, "source": [ "# XML\n", "\n", "> Concise generation of XML." ] }, { "cell_type": "code", "execution_count": null, "id": "f6c9a7f5", "metadata": {}, "outputs": [], "source": [ "#| export\n", "from fastcore.utils import *\n", "\n", "import types,json\n", "\n", "from dataclasses import dataclass, asdict\n", "from typing import Mapping\n", "from functools import partial\n", "from html import escape" ] }, { "cell_type": "code", "execution_count": null, "id": "42d18e5c", "metadata": {}, "outputs": [], "source": [ "from IPython.display import Markdown\n", "from pprint import pprint\n", "\n", "from fastcore.test import test_eq" ] }, { "cell_type": "markdown", "id": "a7448678", "metadata": {}, "source": [ "## FT functions" ] }, { "cell_type": "code", "execution_count": null, "id": "e1b62c2c", "metadata": {}, "outputs": [], "source": [ "#|export\n", "def _fix_k(k): return k if k=='_' else k.lstrip('_').replace('_', '-')" ] }, { "cell_type": "code", "execution_count": null, "id": "159d3560", "metadata": {}, "outputs": [], "source": [ "#| export\n", "_specials = set('@.-!~:[](){}$%^&*+=|/?<>,`')\n", "\n", "def attrmap(o):\n", " if _specials & set(o): return o\n", " o = dict(htmlClass='class', cls='class', _class='class', klass='class',\n", " _for='for', fr='for', htmlFor='for').get(o, o)\n", " return _fix_k(o)" ] }, { "cell_type": "code", "execution_count": null, "id": "6f000a63", "metadata": {}, "outputs": [], "source": [ "#| export\n", "def valmap(o):\n", " if is_listy(o): return ' '.join(map(str,o)) if o else None\n", " if isinstance(o, dict): return '; '.join(f\"{k}:{v}\" for k,v in o.items()) if o else None\n", " return o" ] }, { "cell_type": "code", "execution_count": null, "id": "ddc7d705", "metadata": {}, "outputs": [], "source": [ "#| export\n", "def _flatten_tuple(tup):\n", " if not any(isinstance(item, tuple) for item in tup): return tup\n", " result = []\n", " for item in tup:\n", " if isinstance(item, tuple): result.extend(item)\n", " else: result.append(item)\n", " return tuple(result)" ] }, { "cell_type": "code", "execution_count": null, "id": "df5d12c7", "metadata": {}, "outputs": [], "source": [ "#|export\n", "def _preproc(c, kw, attrmap=attrmap, valmap=valmap):\n", " if len(c)==1 and isinstance(c[0], (types.GeneratorType, map, filter)): c = tuple(c[0])\n", " attrs = {attrmap(k.lower()):valmap(v) for k,v in kw.items() if v is not None}\n", " return _flatten_tuple(c),attrs" ] }, { "cell_type": "code", "execution_count": null, "id": "b06c10f6", "metadata": {}, "outputs": [], "source": [ "#|export\n", "class FT:\n", " \"A 'Fast Tag' structure, containing `tag`,`children`,and `attrs`\"\n", " def __init__(self, tag:str, cs:tuple, attrs:dict=None, void_=False, **kwargs):\n", " assert isinstance(cs, tuple)\n", " self.tag,self.children,self.attrs,self.void_ = tag,cs,attrs,void_\n", " self.listeners_ = []\n", " \n", " def on(self, f): self.listeners_.append(f)\n", " def changed(self):\n", " [f(self) for f in self.listeners_]\n", " return self\n", "\n", " def __setattr__(self, k, v):\n", " if len(k)>1 and k.startswith('__') or k[-1]=='_' or k in ('tag','children','attrs','void_'): return super().__setattr__(k,v)\n", " self.attrs[_fix_k(k)] = v\n", " self.changed()\n", "\n", " def __getattr__(self, k):\n", " if k.startswith('__'): raise AttributeError(k)\n", " return self.get(k)\n", "\n", " @property\n", " def list(self): return [self.tag,self.children,self.attrs]\n", " def get(self, k, default=None): return self.attrs.get(_fix_k(k), default)\n", " \n", " def __repr__(self): return f'{self.tag}({self.children},{self.attrs})'\n", " def __iter__(self): return iter(self.children)\n", " def __getitem__(self, idx): return self.children[idx]\n", " \n", " def __setitem__(self, i, o):\n", " self.children = self.children[:i] + (o,) + self.children[i+1:]\n", " self.changed()\n", "\n", " def __call__(self, *c, **kw):\n", " c,kw = _preproc(c,kw)\n", " if c: self.children = self.children+c\n", " if kw: self.attrs = {**self.attrs, **kw}\n", " return self.changed()\n", "\n", " def set(self, *c, **kw):\n", " \"Set children and/or attributes (chainable)\"\n", " c,kw = _preproc(c,kw)\n", " if c: self.children = c\n", " if kw:\n", " self.attrs = {k:v for k,v in self.attrs.items() if k in ('id','name')}\n", " self.attrs = {**self.attrs, **kw}\n", " return self.changed()" ] }, { "cell_type": "code", "execution_count": null, "id": "06718948", "metadata": {}, "outputs": [], "source": [ "#| export\n", "def ft(tag:str, *c, void_:bool=False, attrmap:callable=attrmap, valmap:callable=valmap, ft_cls=FT, **kw):\n", " \"Create an `FT` structure for `to_xml()`\"\n", " return ft_cls(tag.lower(),*_preproc(c,kw,attrmap=attrmap, valmap=valmap), void_=void_)" ] }, { "cell_type": "code", "execution_count": null, "id": "45489975", "metadata": {}, "outputs": [], "source": [ "#| export\n", "voids = set('area base br col command embed hr img input keygen link meta param source track wbr !doctype'.split())\n", "_g = globals()\n", "_all_ = ['Head', 'Title', 'Meta', 'Link', 'Style', 'Body', 'Pre', 'Code',\n", " 'Div', 'Span', 'P', 'H1', 'H2', 'H3', 'H4', 'H5', 'H6', 'Strong', 'Em', 'B',\n", " 'I', 'U', 'S', 'Strike', 'Sub', 'Sup', 'Hr', 'Br', 'Img', 'A', 'Link', 'Nav',\n", " 'Ul', 'Ol', 'Li', 'Dl', 'Dt', 'Dd', 'Table', 'Thead', 'Tbody', 'Tfoot', 'Tr',\n", " 'Th', 'Td', 'Caption', 'Col', 'Colgroup', 'Form', 'Input', 'Textarea',\n", " 'Button', 'Select', 'Option', 'Label', 'Fieldset', 'Legend', 'Details',\n", " 'Summary', 'Main', 'Header', 'Footer', 'Section', 'Article', 'Aside', 'Figure',\n", " 'Figcaption', 'Mark', 'Small', 'Iframe', 'Object', 'Embed', 'Param', 'Video',\n", " 'Audio', 'Source', 'Canvas', 'Svg', 'Math', 'Script', 'Noscript', 'Template', 'Slot']\n", "\n", "for o in _all_: _g[o] = partial(ft, o.lower(), void_=o.lower() in voids)" ] }, { "cell_type": "markdown", "id": "732e44ab", "metadata": {}, "source": [ "The main HTML tags are exported as `ft` partials.\n", "\n", "Attributes are passed as keywords. Use 'klass' and 'fr' instead of 'class' and 'for', to avoid Python reserved word clashes." ] }, { "cell_type": "code", "execution_count": null, "id": "39834fcb", "metadata": {}, "outputs": [], "source": [ "#| export\n", "def Html(*c, doctype=True, **kwargs)->FT:\n", " \"An HTML tag, optionally preceeded by `!DOCTYPE HTML`\"\n", " res = ft('html', *c, **kwargs)\n", " if not doctype: return res\n", " return (ft('!DOCTYPE', html=True, void_=True), res)" ] }, { "cell_type": "code", "execution_count": null, "id": "9a8b4ddb", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(!doctype((),{'html': True}),\n", " html((head((title(('Some page',),{}),),{}), body((div(('Some text\\nanother line', input((),{'name': \"jph's\"}), img((),{'src': 'filename', 'data': 1})),{'class': 'myclass another', 'style': 'padding:1; margin:2'}),),{})),{}))\n" ] } ], "source": [ "samp = Html(\n", " Head(Title('Some page')),\n", " Body(Div('Some text\\nanother line', (Input(name=\"jph's\"), Img(src=\"filename\", data=1)),\n", " cls=['myclass', 'another'],\n", " style={'padding':1, 'margin':2}))\n", ")\n", "pprint(samp)" ] }, { "cell_type": "code", "execution_count": null, "id": "5c6c57e9", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "p\n", "('Some text',)\n", "{'id': 'myid'}\n" ] } ], "source": [ "elem = P('Some text', id=\"myid\")\n", "print(elem.tag)\n", "print(elem.children)\n", "print(elem.attrs)" ] }, { "cell_type": "markdown", "id": "bb61f88e", "metadata": {}, "source": [ "You can get and set attrs directly:" ] }, { "cell_type": "code", "execution_count": null, "id": "5c7f175d", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "newid newid missing\n" ] }, { "data": { "text/plain": [ "p(('Some text',),{'id': 'newid'})" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "elem.id = 'newid'\n", "print(elem.id, elem.get('id'), elem.get('foo', 'missing'))\n", "elem" ] }, { "cell_type": "code", "execution_count": null, "id": "116c886e", "metadata": {}, "outputs": [], "source": [ "#| export\n", "class Safe(str):\n", " def __html__(self): return self" ] }, { "cell_type": "markdown", "id": "99c9e1cc", "metadata": {}, "source": [ "## Conversion to XML/HTML" ] }, { "cell_type": "code", "execution_count": null, "id": "254c8ff3", "metadata": {}, "outputs": [], "source": [ "#| export\n", "def _escape(s): return '' if s is None else s.__html__() if hasattr(s, '__html__') else escape(s) if isinstance(s, str) else s\n", "def _noescape(s): return '' if s is None else s.__html__() if hasattr(s, '__html__') else s" ] }, { "cell_type": "code", "execution_count": null, "id": "0255b96f", "metadata": {}, "outputs": [], "source": [ "#| export\n", "def _to_attr(k,v):\n", " if isinstance(v,bool):\n", " if v==True : return str(k)\n", " if v==False: return ''\n", " if isinstance(v,str): v = escape(v, quote=False)\n", " elif isinstance(v, Mapping): v = json.dumps(v)\n", " else: v = str(v)\n", " qt = '\"'\n", " if qt in v:\n", " qt = \"'\"\n", " if \"'\" in v: v = v.replace(\"'\", \"'\")\n", " return f'{k}={qt}{v}{qt}'" ] }, { "cell_type": "code", "execution_count": null, "id": "ea224c94", "metadata": {}, "outputs": [], "source": [ "#| export\n", "_block_tags = {'div', 'p', 'ul', 'ol', 'li', 'table', 'thead', 'tbody', 'tfoot',\n", " 'html', 'head', 'body', 'meta', 'title', '!doctype', 'input', 'script', 'link', 'style',\n", " 'tr', 'th', 'td', 'section', 'article', 'nav', 'aside', 'header',\n", " 'footer', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'blockquote'}\n", "_inline_tags = {'a', 'span', 'b', 'i', 'u', 'em', 'strong', 'img', 'br', 'small',\n", " 'big', 'sub', 'sup', 'label', 'input', 'select', 'option'}\n", "\n", "def _is_whitespace_significant(elm):\n", " return elm.tag in {'pre', 'code', 'textarea', 'script'} or elm.get('contenteditable') == 'true'" ] }, { "cell_type": "code", "execution_count": null, "id": "a1ed9c01", "metadata": {}, "outputs": [], "source": [ "#| export\n", "def _to_xml(elm, lvl=0, indent=True, do_escape=True):\n", " \"Convert `FT` element tree into an XML string\"\n", " esc_fn = _escape if do_escape else _noescape\n", " if elm is None: return ''\n", " if hasattr(elm, '__ft__'): elm = elm.__ft__()\n", " if isinstance(elm, tuple):\n", " return ''.join(_to_xml(o, lvl=lvl, indent=indent, do_escape=do_escape) for o in elm)\n", " if isinstance(elm, bytes): return elm.decode('utf-8')\n", " if not isinstance(elm, FT): return f'{esc_fn(elm)}'\n", "\n", " tag, cs, attrs = elm.list\n", " is_void = getattr(elm, 'void_', False)\n", " is_block = tag in _block_tags\n", " if _is_whitespace_significant(elm): indent = False\n", "\n", " sp,nl = (' ' * lvl,'\\n') if indent and is_block else ('','')\n", " nl_end = nl\n", "\n", " stag = tag\n", " if attrs:\n", " sattrs = ' '.join(_to_attr(k, v) for k, v in attrs.items() if v not in (False, None, '') and (k=='_' or k[-1]!='_'))\n", " if sattrs: stag += f' {sattrs}'\n", "\n", " cltag = '' if is_void else f''\n", "\n", " if not cs:\n", " if is_void: return f'{sp}<{stag}>{nl_end}'\n", " else: return f'{sp}<{stag}>{cltag}{nl_end}'\n", " if len(cs) == 1 and not isinstance(cs[0], (list, tuple, FT)) and not hasattr(cs[0], '__ft__'):\n", " content = esc_fn(cs[0])\n", " return f'{sp}<{stag}>{content}{cltag}{nl_end}'\n", "\n", " res = f'{sp}<{stag}>{nl}'\n", " for c in cs:\n", " res += _to_xml(c, lvl=lvl+2 if indent else 0, indent=indent, do_escape=do_escape)\n", " if not is_void: res += f'{sp}{cltag}{nl_end}'\n", " return Safe(res)" ] }, { "cell_type": "code", "execution_count": null, "id": "dd054392", "metadata": {}, "outputs": [], "source": [ "#| export\n", "def to_xml(elm, lvl=0, indent=True, do_escape=True):\n", " \"Convert `ft` element tree into an XML string\"\n", " return Safe(_to_xml(elm, lvl, indent, do_escape=do_escape))\n", "\n", "FT.__html__ = to_xml" ] }, { "cell_type": "code", "execution_count": null, "id": "c2a921a2", "metadata": {}, "outputs": [], "source": [ "#| hide\n", "test_eq(to_xml(Div(\"Hello\")), '
Hello
\\n')\n", "test_eq(to_xml(P(\"Text\", Class=\"test\")), '

Text

\\n')\n", "test_eq(to_xml(Div(P(\"Nested\"))), '
\\n

Nested

\\n
\\n')\n", "test_eq(to_xml(Pre(\" Whitespace\\n Significant \")), '
  Whitespace\\n  Significant  
')\n", "test_eq(to_xml(Img(src=\"image.jpg\")), '')\n", "test_eq(to_xml(Div(\"Text\", contenteditable=\"true\")), '
Text
')\n", "test_eq(to_xml(None), '')\n", "test_eq(to_xml((\"Text\", P(\"Paragraph\"))), 'Text

Paragraph

\\n')\n", "test_eq(to_xml(b\"Bytes\"), 'Bytes')\n", "test_eq(to_xml(Div(P(\"Text\"), B(\"Bold\")), indent=False), '

Text

Bold
')\n", "test_eq(to_xml(Div(\"\"), do_escape=True),\n", " '
<script>alert('XSS')</script>
\\n')\n", "test_eq(to_xml(Div(\"\"), do_escape=False),\n", " \"
\\n\")\n", "test_eq(to_xml(Div(foo=False), indent=False), '
')" ] }, { "cell_type": "code", "execution_count": null, "id": "eef16b38", "metadata": {}, "outputs": [], "source": [ "#| hide\n", "test_eq(to_xml(B('Bold Text')), 'Bold Text')\n", "test_eq(to_xml(Div(P('Paragraph Text'))), '
\\n

Paragraph Text

\\n
\\n')\n", "test_eq(to_xml(Pre(' Preformatted\\n Text')), '
   Preformatted\\n   Text
')\n", "editable_div = Div('Editable Content', contenteditable='true')\n", "test_eq(to_xml(editable_div), '
Editable Content
')\n", "test_eq(to_xml(Div(Span('Inline Text'), P('Paragraph'))),\n", " '
\\nInline Text

Paragraph

\\n
\\n')\n", "test_eq(to_xml(Br()), '
')\n", "test_eq(to_xml(P(None)), '

\\n')\n", "test_eq(to_xml(Div()), '
\\n')\n", "test_eq(to_xml(Input(type='text', disabled=True)), '\\n')\n", "special_attr_tag = Div(id='main\"div', data_info=\"Some 'info'\")\n", "expected_special_attr = \"
\\n\"\n", "test_eq(to_xml(special_attr_tag), expected_special_attr)" ] }, { "cell_type": "code", "execution_count": null, "id": "d3d23c48", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "\n", " \n", " Some page\n", " \n", " \n", "
\n", "Some text\n", "another line \n", "
\n", " \n", "\n", "\n" ] } ], "source": [ "h = to_xml(samp, do_escape=False)\n", "print(h)" ] }, { "cell_type": "code", "execution_count": null, "id": "85a16341", "metadata": {}, "outputs": [], "source": [ "class PageTitle:\n", " def __ft__(self): return H1(\"Hello\")\n", "\n", "class HomePage:\n", " def __ft__(self): return Div(PageTitle(), Div('hello'))\n", "\n", "h = to_xml(Div(HomePage()))\n", "expected_output = \"\"\"
\n", "
\n", "

Hello

\n", "
hello
\n", "
\n", "
\n", "\"\"\"\n", "assert h == expected_output" ] }, { "cell_type": "code", "execution_count": null, "id": "61d63a79", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "
\n", "
\n", "

Hello

\n", "
hello
\n", "
\n", "
\n", "\n" ] } ], "source": [ "print(h)" ] }, { "cell_type": "code", "execution_count": null, "id": "51e58821", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Some page
Some text\n", "another line
\n" ] } ], "source": [ "h = to_xml(samp, indent=False)\n", "print(h)" ] }, { "cell_type": "markdown", "id": "4713bd8d", "metadata": {}, "source": [ "Interoperability both directions with Django and Jinja using the [__html__() protocol](https://jinja.palletsprojects.com/en/3.1.x/templates/#jinja-filters.escape):" ] }, { "cell_type": "code", "execution_count": null, "id": "798ae1d2", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "
Hello from Django
\n", "\n", "
\n", "

Hello from fastcore <3

\n", "
\n", "\n" ] } ], "source": [ "def _esc(s): return s.__html__() if hasattr(s, '__html__') else Safe(escape(s))\n", "\n", "r = Safe('Hello from Django')\n", "print(to_xml(Div(r)))\n", "print(_esc(Div(P('Hello from fastcore <3'))))" ] }, { "cell_type": "markdown", "id": "5ad30d7c", "metadata": {}, "source": [ "## Display" ] }, { "cell_type": "code", "execution_count": null, "id": "5f0e91e0", "metadata": {}, "outputs": [], "source": [ "#| export\n", "def highlight(s, lang='html'):\n", " \"Markdown to syntax-highlight `s` in language `lang`\"\n", " return f'```{lang}\\n{to_xml(s)}\\n```'" ] }, { "cell_type": "code", "execution_count": null, "id": "39fab735", "metadata": {}, "outputs": [], "source": [ "#| export\n", "def showtags(s):\n", " return f\"\"\"
\n",
    "{escape(to_xml(s))}\n",
    "
\"\"\"\n", "\n", "FT._repr_markdown_ = highlight" ] }, { "cell_type": "markdown", "id": "32b75e87", "metadata": {}, "source": [ "You can also reorder the children to come *after* the attrs, if you use this alternative syntax for `FT` where the children are in a second pair of `()` (behind the scenes this is because `FT` implements `__call__` to add children)." ] }, { "cell_type": "code", "execution_count": null, "id": "16c9bd71", "metadata": {}, "outputs": [ { "data": { "text/markdown": [ "```html\n", "\n", "
\n", "Some text 1<2in italics \n", "
\n", "\n", "\n", "```" ], "text/plain": [ "body((div(('Some text 1<2', i(('in italics',),{'spurious': True}), input((),{'name': 'me'}), img((),{'src': 'filename', 'data': 1})),{'style': 'padding:3px'}),),{'class': 'myclass'})" ] }, "execution_count": null, "metadata": {}, "output_type": "execute_result" } ], "source": [ "Body(klass='myclass')(\n", " Div(style='padding:3px')(\n", " 'Some text 1<2',\n", " I(spurious=True)('in italics'),\n", " Input(name='me'),\n", " Img(src=\"filename\", data=1)\n", " )\n", ")" ] }, { "cell_type": "code", "execution_count": null, "id": "1d8a28b1", "metadata": {}, "outputs": [], "source": [ "#| export\n", "def __getattr__(tag):\n", " if tag.startswith('_') or tag[0].islower(): raise AttributeError\n", " tag = _fix_k(tag)\n", " def _f(*c, target_id=None, **kwargs): return ft(tag, *c, target_id=target_id, **kwargs)\n", " return _f" ] }, { "cell_type": "markdown", "id": "df973d4e", "metadata": {}, "source": [ "# Export -" ] }, { "cell_type": "code", "execution_count": null, "id": "ad32b076", "metadata": {}, "outputs": [], "source": [ "#|hide\n", "import nbdev; nbdev.nbdev_export()" ] }, { "cell_type": "code", "execution_count": null, "id": "84fe290c", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "python3", "language": "python", "name": "python3" } }, "nbformat": 4, "nbformat_minor": 5 }