{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Exercises" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## #1: Display the total number of watchers per language (ignore repos w/o a language)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Your output should look something like this:" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "```bash\n", "C# 32\n", "C++ 63\n", "HTML 349\n", "JavaScript 3881\n", "Jupyter Notebook 5481\n", "PHP 201\n", "Python 37007\n", "R 18\n", "```" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Use the url `https://api.github.com/search/repositories?q=data` for source data." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Your answer" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Hint" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "collapsed": true }, "outputs": [], "source": [ "from urllib.request import urlopen\n", "from itertools import groupby\n", "from operator import itemgetter\n", "from ijson import items\n", "\n", "url2 = 'https://api.github.com/search/repositories?q=data'\n", "f = '???'\n", "repos = '???'\n", "\n", "# ...\n", "\n", "kwargs = {}\n", "grouped = groupby([], **kwargs)\n", "\n", "for key, group in grouped:\n", " cnt = '???'\n", " print(key, cnt)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## #2: Language with the most number of watchers, per `owner_type` per `has_pages`" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Your output should look something like this:" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "```python\n", "{'has_pages': True,\n", " 'language': 'JavaScript',\n", " 'owner_type': 'Organization',\n", " 'watchers': 128605}\n", "```" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Use the url `https://api.github.com/search/repositories?q=data&sort=stars&order=desc` for source data." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Your answer" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Hint" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "collapsed": true }, "outputs": [], "source": [ "from urllib.request import urlopen\n", "from operator import itemgetter\n", "from functools import partial\n", "from meza import process as pr, fntools as ft\n", "from meza.io import read_json\n", "\n", "url4 = 'https://api.github.com/search/repositories?q=data&sort=stars&order=desc'\n", "f = '???'\n", "records = '???'\n", "\n", "# Some of the functions you will use are `ft.flatten`, `pr.pivot`, `pr.normalize`\n", "# `pr.group`, `pr.fillempty`, and `pr.aggregate`. You can view documentation for\n", "# these functions in the doc-blocks at the links below:\n", "# \n", "# https://github.com/reubano/meza/blob/master/meza/process.py\n", "# https://github.com/reubano/meza/blob/master/meza/fntools.py\n", "\n", "# ...\n", "\n", "keyfunc = lambda x: True\n", "kwargs = {}\n", "grouped = pr.group([], keyfunc, **kwargs)\n", "\n", "for key, group in grouped:\n", " # ...\n", " pass" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.1" } }, "nbformat": 4, "nbformat_minor": 1 }