{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## algorithm"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def first_and_follow(grammar):\n",
    "    # first & follow sets, epsilon-productions\n",
    "    first = {i: set() for i in grammar.nonterminals}\n",
    "    first.update((i, {i}) for i in grammar.terminals)\n",
    "    follow = {i: set() for i in grammar.nonterminals}\n",
    "    epsilon = set()\n",
    "\n",
    "    while True:\n",
    "        updated = False\n",
    "        \n",
    "        for nt, expression in grammar.rules:\n",
    "            # FIRST set w.r.t epsilon-productions\n",
    "            for symbol in expression:\n",
    "                updated |= union(first[nt], first[symbol])\n",
    "                if symbol not in epsilon:\n",
    "                    break\n",
    "            else:\n",
    "                updated |= union(epsilon, {nt})\n",
    "                \n",
    "            # FOLLOW set w.r.t epsilon-productions\n",
    "            aux = follow[nt]\n",
    "            for symbol in reversed(expression):\n",
    "                if symbol in follow:\n",
    "                    updated |= union(follow[symbol], aux)\n",
    "                if symbol in epsilon:\n",
    "                    aux = aux.union(first[symbol])\n",
    "                else:\n",
    "                    aux = first[symbol]\n",
    "        \n",
    "        if not updated:\n",
    "            return first, follow, epsilon"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def union(first, begins):\n",
    "    n = len(first)\n",
    "    first |= begins\n",
    "    return len(first) != n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "class Grammar:\n",
    "    \n",
    "    def __init__(self, *rules):\n",
    "        self.rules = tuple(self._parse(rule) for rule in rules)\n",
    "\n",
    "    def _parse(self, rule):\n",
    "        return tuple(rule.replace(' ', '').split('::='))\n",
    "        \n",
    "    def __getitem__(self, nonterminal):\n",
    "        yield from [rule for rule in self.rules if rule[0] == nonterminal]\n",
    "        \n",
    "    @staticmethod\n",
    "    def is_nonterminal(symbol):\n",
    "        return symbol.isalpha() and symbol.isupper()\n",
    "        \n",
    "    @property\n",
    "    def nonterminals(self):\n",
    "        return set(nt for nt, _ in self.rules)\n",
    "        \n",
    "    @property\n",
    "    def terminals(self):\n",
    "        return set(\n",
    "            symbol\n",
    "            for _, expression in self.rules\n",
    "            for symbol in expression\n",
    "            if not self.is_nonterminal(symbol)\n",
    "        )"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## left-recursive grammar w/ epsilon-production"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "first, follow, epsilon = first_and_follow(Grammar(\n",
    "    '^ ::= A $',\n",
    "    'A ::= ABBC',\n",
    "    'A ::= B',\n",
    "    'A ::= 1',\n",
    "    'B ::= C',\n",
    "    'B ::= 2',\n",
    "    'C ::= 3',\n",
    "    'C ::= ',\n",
    "))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'$': {'$'},\n",
       " '1': {'1'},\n",
       " '2': {'2'},\n",
       " '3': {'3'},\n",
       " 'A': {'1', '2', '3'},\n",
       " 'B': {'2', '3'},\n",
       " 'C': {'3'},\n",
       " '^': {'$', '1', '2', '3'}}"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "first"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'A': {'$', '2', '3'}, 'B': {'$', '2', '3'}, 'C': {'$', '2', '3'}, '^': set()}"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "follow"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'A', 'B', 'C'}"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "epsilon"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## arithmetic expressions"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "first, follow, epsilon = first_and_follow(Grammar(\n",
    "    '^ ::= E $',\n",
    "    'E ::= E + T',\n",
    "    'E ::= T',\n",
    "    'T ::= T * F',\n",
    "    'T ::= F',\n",
    "    'F ::= ( E )',\n",
    "    'F ::= x',\n",
    "))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'$': {'$'},\n",
       " '(': {'('},\n",
       " ')': {')'},\n",
       " '*': {'*'},\n",
       " '+': {'+'},\n",
       " 'E': {'(', 'x'},\n",
       " 'F': {'(', 'x'},\n",
       " 'T': {'(', 'x'},\n",
       " '^': {'(', 'x'},\n",
       " 'x': {'x'}}"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "first"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'E': {'$', ')', '+'},\n",
       " 'F': {'$', ')', '*', '+'},\n",
       " 'T': {'$', ')', '*', '+'},\n",
       " '^': set()}"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "follow"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "set()"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "epsilon"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}