{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "## algorithm" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": true }, "outputs": [], "source": [ "def first_and_follow(grammar):\n", " # first & follow sets, epsilon-productions\n", " first = {i: set() for i in grammar.nonterminals}\n", " first.update((i, {i}) for i in grammar.terminals)\n", " follow = {i: set() for i in grammar.nonterminals}\n", " epsilon = set()\n", "\n", " while True:\n", " updated = False\n", " \n", " for nt, expression in grammar.rules:\n", " # FIRST set w.r.t epsilon-productions\n", " for symbol in expression:\n", " updated |= union(first[nt], first[symbol])\n", " if symbol not in epsilon:\n", " break\n", " else:\n", " updated |= union(epsilon, {nt})\n", " \n", " # FOLLOW set w.r.t epsilon-productions\n", " aux = follow[nt]\n", " for symbol in reversed(expression):\n", " if symbol in follow:\n", " updated |= union(follow[symbol], aux)\n", " if symbol in epsilon:\n", " aux = aux.union(first[symbol])\n", " else:\n", " aux = first[symbol]\n", " \n", " if not updated:\n", " return first, follow, epsilon" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": true }, "outputs": [], "source": [ "def union(first, begins):\n", " n = len(first)\n", " first |= begins\n", " return len(first) != n" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "collapsed": true }, "outputs": [], "source": [ "class Grammar:\n", " \n", " def __init__(self, *rules):\n", " self.rules = tuple(self._parse(rule) for rule in rules)\n", "\n", " def _parse(self, rule):\n", " return tuple(rule.replace(' ', '').split('::='))\n", " \n", " def __getitem__(self, nonterminal):\n", " yield from [rule for rule in self.rules if rule[0] == nonterminal]\n", " \n", " @staticmethod\n", " def is_nonterminal(symbol):\n", " return symbol.isalpha() and symbol.isupper()\n", " \n", " @property\n", " def nonterminals(self):\n", " return set(nt for nt, _ in self.rules)\n", " \n", " @property\n", " def terminals(self):\n", " return set(\n", " symbol\n", " for _, expression in self.rules\n", " for symbol in expression\n", " if not self.is_nonterminal(symbol)\n", " )" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## left-recursive grammar w/ epsilon-production" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "collapsed": true }, "outputs": [], "source": [ "first, follow, epsilon = first_and_follow(Grammar(\n", " '^ ::= A $',\n", " 'A ::= ABBC',\n", " 'A ::= B',\n", " 'A ::= 1',\n", " 'B ::= C',\n", " 'B ::= 2',\n", " 'C ::= 3',\n", " 'C ::= ',\n", "))" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "{'$': {'$'},\n", " '1': {'1'},\n", " '2': {'2'},\n", " '3': {'3'},\n", " 'A': {'1', '2', '3'},\n", " 'B': {'2', '3'},\n", " 'C': {'3'},\n", " '^': {'$', '1', '2', '3'}}" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "first" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "{'A': {'$', '2', '3'}, 'B': {'$', '2', '3'}, 'C': {'$', '2', '3'}, '^': set()}" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "follow" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "{'A', 'B', 'C'}" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "epsilon" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## arithmetic expressions" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "collapsed": true }, "outputs": [], "source": [ "first, follow, epsilon = first_and_follow(Grammar(\n", " '^ ::= E $',\n", " 'E ::= E + T',\n", " 'E ::= T',\n", " 'T ::= T * F',\n", " 'T ::= F',\n", " 'F ::= ( E )',\n", " 'F ::= x',\n", "))" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "{'$': {'$'},\n", " '(': {'('},\n", " ')': {')'},\n", " '*': {'*'},\n", " '+': {'+'},\n", " 'E': {'(', 'x'},\n", " 'F': {'(', 'x'},\n", " 'T': {'(', 'x'},\n", " '^': {'(', 'x'},\n", " 'x': {'x'}}" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "first" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "{'E': {'$', ')', '+'},\n", " 'F': {'$', ')', '*', '+'},\n", " 'T': {'$', ')', '*', '+'},\n", " '^': set()}" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "follow" ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "set()" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "epsilon" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.0" } }, "nbformat": 4, "nbformat_minor": 2 }