{ "metadata": { "name": "", "signature": "sha256:ef0598935d79793355c9fc0e192fb6ce64f52dac330e3c59a1021598baf9592b" }, "nbformat": 3, "nbformat_minor": 0, "worksheets": [ { "cells": [ { "cell_type": "code", "collapsed": false, "input": [ "from elasticsearch import Elasticsearch\n", "from elasticsearch_dsl import Search\n", "from datetime import datetime\n", "\n", "client = Elasticsearch([\"http://127.0.0.1:9200\"])\n", "\n", "#s = Search(using=client, index=\"git-apachecon\")" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 130 }, { "cell_type": "markdown", "metadata": {}, "source": [ "Introduction\n", "------------\n", "\n", "Usual metrics needed for reports are either basic to give a context, or advanced to provide extra points of view helpful in somehow to the managers, developers and others.\n", "\n", "This is an example of how to produce some basic development metrics using elasticsearch dsl" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Commits\n", "--------\n", "\n", "Some examples:\n", "* Number of commits\n", "* Number of commits filtered by an initial and final date\n", "* Evolution of commits in a monthly basis\n", "* Evolution of commits in a quarter basic\n", "* Main authors by number of commits\n", "* Main repositories by number of commits\n", "* Main authors by number of commits and filtered by an initial date\n", "* Main repositories by number of commits and filtered by an initial date" ] }, { "cell_type": "code", "collapsed": false, "input": [ "# COUNTING UNIQUE COMMITS\n", "s = Search(using=client, index=\"git-apachecon\")\n", "#s.count()\n", "#but we need to count unique commits:\n", "s.aggs.metric('commits', 'cardinality', field='hash')\n", "result = s.execute()\n", "result.to_dict()[\"aggregations\"]\n", "#TODO: need to check where to add the size = 0 to avoid using 'aggregations' in the dict." ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 131, "text": [ "{u'commits': {u'value': 185332}}" ] } ], "prompt_number": 131 }, { "cell_type": "code", "collapsed": false, "input": [ "# COUNTING UNIQUE COMMITS AND IGNORING 'MERGES'\n", "s = Search(using=client, index=\"git-apachecon\").filter('range', files={'gt':0})\n", "s.aggs.metric('commits', 'cardinality', field='hash')\n", "result = s.execute()\n", "result.to_dict()[\"aggregations\"]\n", "\n" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 143, "text": [ "{u'commits': {u'value': 180608}}" ] } ], "prompt_number": 143 }, { "cell_type": "code", "collapsed": false, "input": [ "# COUNTING UNIQUE COMMITS FILTERED BY DATE\n", "s = Search(using=client, index=\"git-apachecon\").filter('range', author_date={'gt': datetime(2015, 12, 12)})\n", "s.aggs.metric('commits', 'cardinality', field='hash')\n", "result = s.execute()\n", "result.to_dict()[\"aggregations\"]" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 132, "text": [ "{u'commits': {u'value': 27682}}" ] } ], "prompt_number": 132 }, { "cell_type": "code", "collapsed": false, "input": [ "# COUNTING COMMITS FILTERED BY DATE AND GROUPED BY QUARTER\n", "s = Search(using=client, index=\"git-apachecon\").filter('range', author_date={'gt': datetime(2015, 1, 1)})\n", "#s = s.filter('range', created={'gte': datetime(1999, 1, 1)})\n", "s.aggs.bucket('histogram', 'date_histogram', field='author_date', interval='quarter')\n", "result = s.execute()\n", "result.to_dict()[\"aggregations\"]" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 133, "text": [ "{u'histogram': {u'buckets': [{u'doc_count': 8019,\n", " u'key': 1420070400000,\n", " u'key_as_string': u'2015-01-01T00:00:00.000Z'},\n", " {u'doc_count': 10515,\n", " u'key': 1427846400000,\n", " u'key_as_string': u'2015-04-01T00:00:00.000Z'},\n", " {u'doc_count': 8213,\n", " u'key': 1435708800000,\n", " u'key_as_string': u'2015-07-01T00:00:00.000Z'},\n", " {u'doc_count': 8660,\n", " u'key': 1443657600000,\n", " u'key_as_string': u'2015-10-01T00:00:00.000Z'},\n", " {u'doc_count': 7238,\n", " u'key': 1451606400000,\n", " u'key_as_string': u'2016-01-01T00:00:00.000Z'},\n", " {u'doc_count': 9142,\n", " u'key': 1459468800000,\n", " u'key_as_string': u'2016-04-01T00:00:00.000Z'},\n", " {u'doc_count': 6970,\n", " u'key': 1467331200000,\n", " u'key_as_string': u'2016-07-01T00:00:00.000Z'},\n", " {u'doc_count': 3159,\n", " u'key': 1475280000000,\n", " u'key_as_string': u'2016-10-01T00:00:00.000Z'}]}}" ] } ], "prompt_number": 133 }, { "cell_type": "code", "collapsed": false, "input": [ "# COUNTING COMMITS FILTERED BY DATE, GROUPED BY QUARTER AND THE UNIQUE ONES\n", "s = Search(using=client, index=\"git-apachecon\").filter('range', author_date={'gt': datetime(2015, 1, 1)})\n", "#s = s.filter('range', created={'gte': datetime(1999, 1, 1)})\n", "s.aggs.bucket('histogram', 'date_histogram', field='author_date', interval='quarter').metric('commits', 'cardinality', field='hash')\n", "result = s.execute()\n", "result.to_dict()[\"aggregations\"]" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 134, "text": [ "{u'histogram': {u'buckets': [{u'commits': {u'value': 8096},\n", " u'doc_count': 8019,\n", " u'key': 1420070400000,\n", " u'key_as_string': u'2015-01-01T00:00:00.000Z'},\n", " {u'commits': {u'value': 10250},\n", " u'doc_count': 10515,\n", " u'key': 1427846400000,\n", " u'key_as_string': u'2015-04-01T00:00:00.000Z'},\n", " {u'commits': {u'value': 8052},\n", " u'doc_count': 8213,\n", " u'key': 1435708800000,\n", " u'key_as_string': u'2015-07-01T00:00:00.000Z'},\n", " {u'commits': {u'value': 8583},\n", " u'doc_count': 8660,\n", " u'key': 1443657600000,\n", " u'key_as_string': u'2015-10-01T00:00:00.000Z'},\n", " {u'commits': {u'value': 7166},\n", " u'doc_count': 7238,\n", " u'key': 1451606400000,\n", " u'key_as_string': u'2016-01-01T00:00:00.000Z'},\n", " {u'commits': {u'value': 9466},\n", " u'doc_count': 9142,\n", " u'key': 1459468800000,\n", " u'key_as_string': u'2016-04-01T00:00:00.000Z'},\n", " {u'commits': {u'value': 7012},\n", " u'doc_count': 6970,\n", " u'key': 1467331200000,\n", " u'key_as_string': u'2016-07-01T00:00:00.000Z'},\n", " {u'commits': {u'value': 3161},\n", " u'doc_count': 3159,\n", " u'key': 1475280000000,\n", " u'key_as_string': u'2016-10-01T00:00:00.000Z'}]}}" ] } ], "prompt_number": 134 }, { "cell_type": "code", "collapsed": false, "input": [ "# COUNTING UNIQUE COMMITS FILTERED BY DATE AND GROUPED BY REPOSITORY\n", "s = Search(using=client, index=\"git-apachecon\").filter('range', author_date={'gt': datetime(2015, 12, 12)})\n", "s.aggs.bucket('by_repo', 'terms', field='repo_name').metric('commits', 'cardinality', field='hash')\n", "result = s.execute()\n", "result.to_dict()[\"aggregations\"]" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 135, "text": [ "{u'by_repo': {u'buckets': [{u'commits': {u'value': 7583},\n", " u'doc_count': 7644,\n", " u'key': u'https://github.com/apache/ambari.git'},\n", " {u'commits': {u'value': 6474},\n", " u'doc_count': 6559,\n", " u'key': u'https://github.com/apache/hadoop.git'},\n", " {u'commits': {u'value': 5950},\n", " u'doc_count': 6057,\n", " u'key': u'https://github.com/apache/spark.git'},\n", " {u'commits': {u'value': 3897},\n", " u'doc_count': 3718,\n", " u'key': u'https://github.com/apache/hbase.git'},\n", " {u'commits': {u'value': 2299},\n", " u'doc_count': 2339,\n", " u'key': u'https://github.com/apache/hive.git'},\n", " {u'commits': {u'value': 556},\n", " u'doc_count': 533,\n", " u'key': u'https://github.com/apache/tez.git'},\n", " {u'commits': {u'value': 293},\n", " u'doc_count': 274,\n", " u'key': u'https://github.com/apache/pig.git'},\n", " {u'commits': {u'value': 260},\n", " u'doc_count': 268,\n", " u'key': u'https://github.com/apache/mahout.git'},\n", " {u'commits': {u'value': 216},\n", " u'doc_count': 210,\n", " u'key': u'https://github.com/apache/avro.git'},\n", " {u'commits': {u'value': 208},\n", " u'doc_count': 200,\n", " u'key': u'https://github.com/apache/zookeeper.git'}],\n", " u'doc_count_error_upper_bound': 0,\n", " u'sum_other_doc_count': 33}}" ] } ], "prompt_number": 135 }, { "cell_type": "code", "collapsed": false, "input": [ "# COUNTING UNIQUE COMMITS FILTERED BY DATE AND GROUPED BY AUTHOR\n", "s = Search(using=client, index=\"git-apachecon\").filter('range', author_date={'gt': datetime(2015, 12, 12)})\n", "s.aggs.bucket('by_repo', 'terms', field='author_name').metric('commits', 'cardinality', field='hash')\n", "result = s.execute()\n", "result.to_dict()[\"aggregations\"]" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 137, "text": [ "{u'by_repo': {u'buckets': [{u'commits': {u'value': 809},\n", " u'doc_count': 760,\n", " u'key': u'tedyu'},\n", " {u'commits': {u'value': 685}, u'doc_count': 671, u'key': u'Andrew Onishuk'},\n", " {u'commits': {u'value': 543}, u'doc_count': 542, u'key': u'Akira Ajisaka'},\n", " {u'commits': {u'value': 428}, u'doc_count': 414, u'key': u'stack'},\n", " {u'commits': {u'value': 413}, u'doc_count': 400, u'key': u'Alex Antonenko'},\n", " {u'commits': {u'value': 394}, u'doc_count': 387, u'key': u'Jason Lowe'},\n", " {u'commits': {u'value': 347}, u'doc_count': 365, u'key': u'Reynold Xin'},\n", " {u'commits': {u'value': 388}, u'doc_count': 362, u'key': u'Kihwal Lee'},\n", " {u'commits': {u'value': 357}, u'doc_count': 360, u'key': u'Sumit Mohanty'},\n", " {u'commits': {u'value': 333},\n", " u'doc_count': 332,\n", " u'key': u'Jonathan Hurley'}],\n", " u'doc_count_error_upper_bound': 165,\n", " u'sum_other_doc_count': 23242}}" ] } ], "prompt_number": 137 }, { "cell_type": "code", "collapsed": false, "input": [ "# COUNTING UNIQUE COMMITS FILTERED BY DATE AND GROUPED BY REPOSITORY AND AUTHOR\n", "s = Search(using=client, index=\"git-apachecon\").filter('range', author_date={'gt': datetime(2015, 12, 12)})\n", "s.aggs.bucket('by_repo', 'terms', field='repo_name').bucket('by_author', 'terms', field='author_name').metric('commits', 'cardinality', field='hash')\n", "result = s.execute()\n", "result.to_dict()[\"aggregations\"]" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 139, "text": [ "{u'by_repo': {u'buckets': [{u'by_author': {u'buckets': [{u'commits': {u'value': 673},\n", " u'doc_count': 671,\n", " u'key': u'Andrew Onishuk'},\n", " {u'commits': {u'value': 553},\n", " u'doc_count': 400,\n", " u'key': u'Alex Antonenko'},\n", " {u'commits': {u'value': 333},\n", " u'doc_count': 360,\n", " u'key': u'Sumit Mohanty'},\n", " {u'commits': {u'value': 459},\n", " u'doc_count': 332,\n", " u'key': u'Jonathan Hurley'},\n", " {u'commits': {u'value': 350},\n", " u'doc_count': 330,\n", " u'key': u'Oliver Szabo'},\n", " {u'commits': {u'value': 178}, u'doc_count': 294, u'key': u'Dmytro Sen'},\n", " {u'commits': {u'value': 247},\n", " u'doc_count': 268,\n", " u'key': u'Aravindan Vijayan'},\n", " {u'commits': {u'value': 271},\n", " u'doc_count': 261,\n", " u'key': u'Lisnichenko Dmitro'},\n", " {u'commits': {u'value': 349},\n", " u'doc_count': 249,\n", " u'key': u'Dipayan Bhowmick'},\n", " {u'commits': {u'value': 172},\n", " u'doc_count': 233,\n", " u'key': u'Jayush Luniya'}],\n", " u'doc_count_error_upper_bound': 15,\n", " u'sum_other_doc_count': 4246},\n", " u'doc_count': 7644,\n", " u'key': u'https://github.com/apache/ambari.git'},\n", " {u'by_author': {u'buckets': [{u'commits': {u'value': 237},\n", " u'doc_count': 542,\n", " u'key': u'Akira Ajisaka'},\n", " {u'commits': {u'value': 225}, u'doc_count': 362, u'key': u'Kihwal Lee'},\n", " {u'commits': {u'value': 253}, u'doc_count': 339, u'key': u'Jason Lowe'},\n", " {u'commits': {u'value': 296},\n", " u'doc_count': 296,\n", " u'key': u'Chris Nauroth'},\n", " {u'commits': {u'value': 379}, u'doc_count': 278, u'key': u'Andrew Wang'},\n", " {u'commits': {u'value': 192},\n", " u'doc_count': 218,\n", " u'key': u'Steve Loughran'},\n", " {u'commits': {u'value': 193}, u'doc_count': 194, u'key': u'Wangda Tan'},\n", " {u'commits': {u'value': 248},\n", " u'doc_count': 192,\n", " u'key': u'Varun Saxena'},\n", " {u'commits': {u'value': 188}, u'doc_count': 187, u'key': u'arp'},\n", " {u'commits': {u'value': 204}, u'doc_count': 185, u'key': u'junping_du'}],\n", " u'doc_count_error_upper_bound': 22,\n", " u'sum_other_doc_count': 3766},\n", " u'doc_count': 6559,\n", " u'key': u'https://github.com/apache/hadoop.git'},\n", " {u'by_author': {u'buckets': [{u'commits': {u'value': 441},\n", " u'doc_count': 365,\n", " u'key': u'Reynold Xin'},\n", " {u'commits': {u'value': 306},\n", " u'doc_count': 272,\n", " u'key': u'Dongjoon Hyun'},\n", " {u'commits': {u'value': 187},\n", " u'doc_count': 238,\n", " u'key': u'Shixiong Zhu'},\n", " {u'commits': {u'value': 259}, u'doc_count': 236, u'key': u'Wenchen Fan'},\n", " {u'commits': {u'value': 277}, u'doc_count': 227, u'key': u'Davies Liu'},\n", " {u'commits': {u'value': 253}, u'doc_count': 221, u'key': u'gatorsmile'},\n", " {u'commits': {u'value': 151}, u'doc_count': 183, u'key': u'Sean Owen'},\n", " {u'commits': {u'value': 153}, u'doc_count': 181, u'key': u'Josh Rosen'},\n", " {u'commits': {u'value': 163}, u'doc_count': 167, u'key': u'hyukjinkwon'},\n", " {u'commits': {u'value': 199},\n", " u'doc_count': 165,\n", " u'key': u'Yanbo Liang'}],\n", " u'doc_count_error_upper_bound': 27,\n", " u'sum_other_doc_count': 3802},\n", " u'doc_count': 6057,\n", " u'key': u'https://github.com/apache/spark.git'},\n", " {u'by_author': {u'buckets': [{u'commits': {u'value': 689},\n", " u'doc_count': 743,\n", " u'key': u'tedyu'},\n", " {u'commits': {u'value': 432}, u'doc_count': 414, u'key': u'stack'},\n", " {u'commits': {u'value': 225},\n", " u'doc_count': 254,\n", " u'key': u'Matteo Bertozzi'},\n", " {u'commits': {u'value': 192},\n", " u'doc_count': 249,\n", " u'key': u'Enis Soztutar'},\n", " {u'commits': {u'value': 106},\n", " u'doc_count': 152,\n", " u'key': u'Andrew Kyle Purtell'},\n", " {u'commits': {u'value': 144}, u'doc_count': 145, u'key': u'zhangduo'},\n", " {u'commits': {u'value': 111}, u'doc_count': 138, u'key': u'chenheng'},\n", " {u'commits': {u'value': 109},\n", " u'doc_count': 128,\n", " u'key': u'Elliott Clark'},\n", " {u'commits': {u'value': 167}, u'doc_count': 123, u'key': u'Sean Busbey'},\n", " {u'commits': {u'value': 141},\n", " u'doc_count': 97,\n", " u'key': u'anoopsamjohn'}],\n", " u'doc_count_error_upper_bound': 5,\n", " u'sum_other_doc_count': 1275},\n", " u'doc_count': 3718,\n", " u'key': u'https://github.com/apache/hbase.git'},\n", " {u'by_author': {u'buckets': [{u'commits': {u'value': 450},\n", " u'doc_count': 327,\n", " u'key': u'Sergey Shelukhin'},\n", " {u'commits': {u'value': 220},\n", " u'doc_count': 200,\n", " u'key': u'Prasanth Jayachandran'},\n", " {u'commits': {u'value': 132},\n", " u'doc_count': 159,\n", " u'key': u'Jesus Camacho Rodriguez'},\n", " {u'commits': {u'value': 140},\n", " u'doc_count': 133,\n", " u'key': u'Siddharth Seth'},\n", " {u'commits': {u'value': 153},\n", " u'doc_count': 118,\n", " u'key': u'Eugene Koifman'},\n", " {u'commits': {u'value': 116},\n", " u'doc_count': 116,\n", " u'key': u'Pengcheng Xiong'},\n", " {u'commits': {u'value': 87}, u'doc_count': 89, u'key': u'Matt McCline'},\n", " {u'commits': {u'value': 57}, u'doc_count': 87, u'key': u'Wei'},\n", " {u'commits': {u'value': 59}, u'doc_count': 78, u'key': u'aihuaxu'},\n", " {u'commits': {u'value': 79}, u'doc_count': 76, u'key': u'ctang'}],\n", " u'doc_count_error_upper_bound': 5,\n", " u'sum_other_doc_count': 956},\n", " u'doc_count': 2339,\n", " u'key': u'https://github.com/apache/hive.git'},\n", " {u'by_author': {u'buckets': [{u'commits': {u'value': 101},\n", " u'doc_count': 143,\n", " u'key': u'Hitesh Shah'},\n", " {u'commits': {u'value': 75},\n", " u'doc_count': 90,\n", " u'key': u'Jonathan Eagles'},\n", " {u'commits': {u'value': 127},\n", " u'doc_count': 89,\n", " u'key': u'Sreenath Somarajapuram'},\n", " {u'commits': {u'value': 123},\n", " u'doc_count': 75,\n", " u'key': u'Siddharth Seth'},\n", " {u'commits': {u'value': 61},\n", " u'doc_count': 54,\n", " u'key': u'Rajesh Balamohan'},\n", " {u'commits': {u'value': 47}, u'doc_count': 48, u'key': u'Jason Lowe'},\n", " {u'commits': {u'value': 13}, u'doc_count': 11, u'key': u'Bikas Saha'},\n", " {u'commits': {u'value': 8}, u'doc_count': 11, u'key': u'Ming Ma'},\n", " {u'commits': {u'value': 8}, u'doc_count': 7, u'key': u'Jeff Zhang'},\n", " {u'commits': {u'value': 3},\n", " u'doc_count': 5,\n", " u'key': u'Rohini Palaniswamy'}],\n", " u'doc_count_error_upper_bound': 0,\n", " u'sum_other_doc_count': 0},\n", " u'doc_count': 533,\n", " u'key': u'https://github.com/apache/tez.git'},\n", " {u'by_author': {u'buckets': [{u'commits': {u'value': 73},\n", " u'doc_count': 92,\n", " u'key': u'Rohini Palaniswamy'},\n", " {u'commits': {u'value': 81}, u'doc_count': 84, u'key': u'Jianyong Dai'},\n", " {u'commits': {u'value': 49}, u'doc_count': 58, u'key': u'Xuefu Zhang'},\n", " {u'commits': {u'value': 36}, u'doc_count': 29, u'key': u'Koji Noguchi'},\n", " {u'commits': {u'value': 8},\n", " u'doc_count': 7,\n", " u'key': u'Praveen Rachabattuni'},\n", " {u'commits': {u'value': 2}, u'doc_count': 2, u'key': u'Cheolsoo Park'},\n", " {u'commits': {u'value': 2}, u'doc_count': 2, u'key': u'Jeff Zhang'}],\n", " u'doc_count_error_upper_bound': 0,\n", " u'sum_other_doc_count': 0},\n", " u'doc_count': 274,\n", " u'key': u'https://github.com/apache/pig.git'},\n", " {u'by_author': {u'buckets': [{u'commits': {u'value': 124},\n", " u'doc_count': 162,\n", " u'key': u'smarthi'},\n", " {u'commits': {u'value': 86},\n", " u'doc_count': 76,\n", " u'key': u'Andrew Palumbo'},\n", " {u'commits': {u'value': 13},\n", " u'doc_count': 17,\n", " u'key': u'Andrew Musselman'},\n", " {u'commits': {u'value': 6}, u'doc_count': 6, u'key': u'pferrel'},\n", " {u'commits': {u'value': 3},\n", " u'doc_count': 3,\n", " u'key': u'Dmitriy Lyubimov'},\n", " {u'commits': {u'value': 1}, u'doc_count': 1, u'key': u'Albert Chu'},\n", " {u'commits': {u'value': 1}, u'doc_count': 1, u'key': u'Karl Richter'},\n", " {u'commits': {u'value': 1}, u'doc_count': 1, u'key': u'Marku'},\n", " {u'commits': {u'value': 1}, u'doc_count': 1, u'key': u'yougoer'}],\n", " u'doc_count_error_upper_bound': 0,\n", " u'sum_other_doc_count': 0},\n", " u'doc_count': 268,\n", " u'key': u'https://github.com/apache/mahout.git'},\n", " {u'by_author': {u'buckets': [{u'commits': {u'value': 47},\n", " u'doc_count': 72,\n", " u'key': u'Ryan Blue'},\n", " {u'commits': {u'value': 29}, u'doc_count': 32, u'key': u'Niels Basjes'},\n", " {u'commits': {u'value': 26}, u'doc_count': 27, u'key': u'Thomas White'},\n", " {u'commits': {u'value': 23},\n", " u'doc_count': 20,\n", " u'key': u'Martin Kleppmann'},\n", " {u'commits': {u'value': 18}, u'doc_count': 17, u'key': u'Doug Cutting'},\n", " {u'commits': {u'value': 8},\n", " u'doc_count': 6,\n", " u'key': u'Gabor Szadovszky'},\n", " {u'commits': {u'value': 8},\n", " u'doc_count': 6,\n", " u'key': u'Thiruvalluvan M. G'},\n", " {u'commits': {u'value': 5}, u'doc_count': 4, u'key': u'Sachin Goyal'},\n", " {u'commits': {u'value': 5}, u'doc_count': 4, u'key': u'Yibing Shi'},\n", " {u'commits': {u'value': 3}, u'doc_count': 3, u'key': u'Sean Busbey'}],\n", " u'doc_count_error_upper_bound': 0,\n", " u'sum_other_doc_count': 19},\n", " u'doc_count': 210,\n", " u'key': u'https://github.com/apache/avro.git'},\n", " {u'by_author': {u'buckets': [{u'commits': {u'value': 114},\n", " u'doc_count': 104,\n", " u'key': u'Patrick Hunt'},\n", " {u'commits': {u'value': 24}, u'doc_count': 30, u'key': u'Chris Nauroth'},\n", " {u'commits': {u'value': 27},\n", " u'doc_count': 22,\n", " u'key': u'Rakesh Radhakrishnan'},\n", " {u'commits': {u'value': 13},\n", " u'doc_count': 17,\n", " u'key': u'Flavio Paiva Junqueira'},\n", " {u'commits': {u'value': 9},\n", " u'doc_count': 10,\n", " u'key': u'Ra\\xfal Guti\\xe9rrez Segal\\xe9s'},\n", " {u'commits': {u'value': 6},\n", " u'doc_count': 8,\n", " u'key': u'Camille Fournier'},\n", " {u'commits': {u'value': 6}, u'doc_count': 5, u'key': u'Joan'},\n", " {u'commits': {u'value': 2},\n", " u'doc_count': 2,\n", " u'key': u'Alexander Shraer'},\n", " {u'commits': {u'value': 1}, u'doc_count': 1, u'key': u'Michael Han'},\n", " {u'commits': {u'value': 1},\n", " u'doc_count': 1,\n", " u'key': u'Raul Gutierrez S'}],\n", " u'doc_count_error_upper_bound': 0,\n", " u'sum_other_doc_count': 0},\n", " u'doc_count': 200,\n", " u'key': u'https://github.com/apache/zookeeper.git'}],\n", " u'doc_count_error_upper_bound': 0,\n", " u'sum_other_doc_count': 33}}" ] } ], "prompt_number": 139 }, { "cell_type": "code", "collapsed": false, "input": [ "# COUNTING UNIQUE COMMITS FILTERED BY DATE AND GROUPED BY REPOSITORY AND AUTHOR\n", "s = Search(using=client, index=\"git-apachecon\").filter('range', author_date={'gt': datetime(2015, 12, 12)}).filter('range', files={'gt':0})\n", "s.aggs.bucket('by_repo', 'terms', field='repo_name').bucket('by_author', 'terms', field='author_name').metric('commits', 'cardinality', field='hash')\n", "result = s.execute()\n", "result.to_dict()[\"aggregations\"]" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 140, "text": [ "{u'by_repo': {u'buckets': [{u'by_author': {u'buckets': [{u'commits': {u'value': 673},\n", " u'doc_count': 671,\n", " u'key': u'Andrew Onishuk'},\n", " {u'commits': {u'value': 553},\n", " u'doc_count': 400,\n", " u'key': u'Alex Antonenko'},\n", " {u'commits': {u'value': 333},\n", " u'doc_count': 359,\n", " u'key': u'Sumit Mohanty'},\n", " {u'commits': {u'value': 350},\n", " u'doc_count': 330,\n", " u'key': u'Oliver Szabo'},\n", " {u'commits': {u'value': 459},\n", " u'doc_count': 317,\n", " u'key': u'Jonathan Hurley'},\n", " {u'commits': {u'value': 178}, u'doc_count': 294, u'key': u'Dmytro Sen'},\n", " {u'commits': {u'value': 247},\n", " u'doc_count': 268,\n", " u'key': u'Aravindan Vijayan'},\n", " {u'commits': {u'value': 271},\n", " u'doc_count': 261,\n", " u'key': u'Lisnichenko Dmitro'},\n", " {u'commits': {u'value': 349},\n", " u'doc_count': 249,\n", " u'key': u'Dipayan Bhowmick'},\n", " {u'commits': {u'value': 172},\n", " u'doc_count': 233,\n", " u'key': u'Jayush Luniya'}],\n", " u'doc_count_error_upper_bound': 14,\n", " u'sum_other_doc_count': 4210},\n", " u'doc_count': 7592,\n", " u'key': u'https://github.com/apache/ambari.git'},\n", " {u'by_author': {u'buckets': [{u'commits': {u'value': 237},\n", " u'doc_count': 542,\n", " u'key': u'Akira Ajisaka'},\n", " {u'commits': {u'value': 225}, u'doc_count': 362, u'key': u'Kihwal Lee'},\n", " {u'commits': {u'value': 253}, u'doc_count': 339, u'key': u'Jason Lowe'},\n", " {u'commits': {u'value': 296},\n", " u'doc_count': 296,\n", " u'key': u'Chris Nauroth'},\n", " {u'commits': {u'value': 379}, u'doc_count': 278, u'key': u'Andrew Wang'},\n", " {u'commits': {u'value': 192},\n", " u'doc_count': 218,\n", " u'key': u'Steve Loughran'},\n", " {u'commits': {u'value': 193}, u'doc_count': 194, u'key': u'Wangda Tan'},\n", " {u'commits': {u'value': 248},\n", " u'doc_count': 192,\n", " u'key': u'Varun Saxena'},\n", " {u'commits': {u'value': 188}, u'doc_count': 185, u'key': u'arp'},\n", " {u'commits': {u'value': 204}, u'doc_count': 185, u'key': u'junping_du'}],\n", " u'doc_count_error_upper_bound': 22,\n", " u'sum_other_doc_count': 3752},\n", " u'doc_count': 6543,\n", " u'key': u'https://github.com/apache/hadoop.git'},\n", " {u'by_author': {u'buckets': [{u'commits': {u'value': 424},\n", " u'doc_count': 353,\n", " u'key': u'Reynold Xin'},\n", " {u'commits': {u'value': 306},\n", " u'doc_count': 272,\n", " u'key': u'Dongjoon Hyun'},\n", " {u'commits': {u'value': 187},\n", " u'doc_count': 238,\n", " u'key': u'Shixiong Zhu'},\n", " {u'commits': {u'value': 259}, u'doc_count': 236, u'key': u'Wenchen Fan'},\n", " {u'commits': {u'value': 277}, u'doc_count': 227, u'key': u'Davies Liu'},\n", " {u'commits': {u'value': 253}, u'doc_count': 221, u'key': u'gatorsmile'},\n", " {u'commits': {u'value': 153}, u'doc_count': 181, u'key': u'Josh Rosen'},\n", " {u'commits': {u'value': 151}, u'doc_count': 177, u'key': u'Sean Owen'},\n", " {u'commits': {u'value': 199}, u'doc_count': 165, u'key': u'Yanbo Liang'},\n", " {u'commits': {u'value': 163},\n", " u'doc_count': 164,\n", " u'key': u'hyukjinkwon'}],\n", " u'doc_count_error_upper_bound': 27,\n", " u'sum_other_doc_count': 3796},\n", " u'doc_count': 6030,\n", " u'key': u'https://github.com/apache/spark.git'},\n", " {u'by_author': {u'buckets': [{u'commits': {u'value': 689},\n", " u'doc_count': 743,\n", " u'key': u'tedyu'},\n", " {u'commits': {u'value': 432}, u'doc_count': 414, u'key': u'stack'},\n", " {u'commits': {u'value': 225},\n", " u'doc_count': 254,\n", " u'key': u'Matteo Bertozzi'},\n", " {u'commits': {u'value': 192},\n", " u'doc_count': 249,\n", " u'key': u'Enis Soztutar'},\n", " {u'commits': {u'value': 106},\n", " u'doc_count': 152,\n", " u'key': u'Andrew Kyle Purtell'},\n", " {u'commits': {u'value': 144}, u'doc_count': 145, u'key': u'zhangduo'},\n", " {u'commits': {u'value': 111}, u'doc_count': 138, u'key': u'chenheng'},\n", " {u'commits': {u'value': 109},\n", " u'doc_count': 128,\n", " u'key': u'Elliott Clark'},\n", " {u'commits': {u'value': 159}, u'doc_count': 120, u'key': u'Sean Busbey'},\n", " {u'commits': {u'value': 82},\n", " u'doc_count': 96,\n", " u'key': u'Mikhail Antonov'}],\n", " u'doc_count_error_upper_bound': 5,\n", " u'sum_other_doc_count': 1274},\n", " u'doc_count': 3713,\n", " u'key': u'https://github.com/apache/hbase.git'},\n", " {u'by_author': {u'buckets': [{u'commits': {u'value': 450},\n", " u'doc_count': 324,\n", " u'key': u'Sergey Shelukhin'},\n", " {u'commits': {u'value': 220},\n", " u'doc_count': 200,\n", " u'key': u'Prasanth Jayachandran'},\n", " {u'commits': {u'value': 132},\n", " u'doc_count': 159,\n", " u'key': u'Jesus Camacho Rodriguez'},\n", " {u'commits': {u'value': 140},\n", " u'doc_count': 133,\n", " u'key': u'Siddharth Seth'},\n", " {u'commits': {u'value': 153},\n", " u'doc_count': 118,\n", " u'key': u'Eugene Koifman'},\n", " {u'commits': {u'value': 116},\n", " u'doc_count': 116,\n", " u'key': u'Pengcheng Xiong'},\n", " {u'commits': {u'value': 87}, u'doc_count': 89, u'key': u'Matt McCline'},\n", " {u'commits': {u'value': 57}, u'doc_count': 87, u'key': u'Wei'},\n", " {u'commits': {u'value': 59}, u'doc_count': 78, u'key': u'aihuaxu'},\n", " {u'commits': {u'value': 79}, u'doc_count': 76, u'key': u'ctang'}],\n", " u'doc_count_error_upper_bound': 5,\n", " u'sum_other_doc_count': 951},\n", " u'doc_count': 2331,\n", " u'key': u'https://github.com/apache/hive.git'},\n", " {u'by_author': {u'buckets': [{u'commits': {u'value': 101},\n", " u'doc_count': 143,\n", " u'key': u'Hitesh Shah'},\n", " {u'commits': {u'value': 75},\n", " u'doc_count': 90,\n", " u'key': u'Jonathan Eagles'},\n", " {u'commits': {u'value': 127},\n", " u'doc_count': 89,\n", " u'key': u'Sreenath Somarajapuram'},\n", " {u'commits': {u'value': 123},\n", " u'doc_count': 75,\n", " u'key': u'Siddharth Seth'},\n", " {u'commits': {u'value': 61},\n", " u'doc_count': 54,\n", " u'key': u'Rajesh Balamohan'},\n", " {u'commits': {u'value': 47}, u'doc_count': 48, u'key': u'Jason Lowe'},\n", " {u'commits': {u'value': 13}, u'doc_count': 11, u'key': u'Bikas Saha'},\n", " {u'commits': {u'value': 8}, u'doc_count': 11, u'key': u'Ming Ma'},\n", " {u'commits': {u'value': 8}, u'doc_count': 7, u'key': u'Jeff Zhang'},\n", " {u'commits': {u'value': 3},\n", " u'doc_count': 5,\n", " u'key': u'Rohini Palaniswamy'}],\n", " u'doc_count_error_upper_bound': 0,\n", " u'sum_other_doc_count': 0},\n", " u'doc_count': 533,\n", " u'key': u'https://github.com/apache/tez.git'},\n", " {u'by_author': {u'buckets': [{u'commits': {u'value': 73},\n", " u'doc_count': 92,\n", " u'key': u'Rohini Palaniswamy'},\n", " {u'commits': {u'value': 81}, u'doc_count': 81, u'key': u'Jianyong Dai'},\n", " {u'commits': {u'value': 49}, u'doc_count': 58, u'key': u'Xuefu Zhang'},\n", " {u'commits': {u'value': 36}, u'doc_count': 29, u'key': u'Koji Noguchi'},\n", " {u'commits': {u'value': 8},\n", " u'doc_count': 7,\n", " u'key': u'Praveen Rachabattuni'},\n", " {u'commits': {u'value': 2}, u'doc_count': 2, u'key': u'Cheolsoo Park'},\n", " {u'commits': {u'value': 2}, u'doc_count': 2, u'key': u'Jeff Zhang'}],\n", " u'doc_count_error_upper_bound': 0,\n", " u'sum_other_doc_count': 0},\n", " u'doc_count': 271,\n", " u'key': u'https://github.com/apache/pig.git'},\n", " {u'by_author': {u'buckets': [{u'commits': {u'value': 124},\n", " u'doc_count': 161,\n", " u'key': u'smarthi'},\n", " {u'commits': {u'value': 86},\n", " u'doc_count': 73,\n", " u'key': u'Andrew Palumbo'},\n", " {u'commits': {u'value': 12},\n", " u'doc_count': 12,\n", " u'key': u'Andrew Musselman'},\n", " {u'commits': {u'value': 6}, u'doc_count': 5, u'key': u'pferrel'},\n", " {u'commits': {u'value': 3},\n", " u'doc_count': 3,\n", " u'key': u'Dmitriy Lyubimov'},\n", " {u'commits': {u'value': 1}, u'doc_count': 1, u'key': u'Albert Chu'},\n", " {u'commits': {u'value': 1}, u'doc_count': 1, u'key': u'Karl Richter'},\n", " {u'commits': {u'value': 1}, u'doc_count': 1, u'key': u'Marku'},\n", " {u'commits': {u'value': 1}, u'doc_count': 1, u'key': u'yougoer'}],\n", " u'doc_count_error_upper_bound': 0,\n", " u'sum_other_doc_count': 0},\n", " u'doc_count': 258,\n", " u'key': u'https://github.com/apache/mahout.git'},\n", " {u'by_author': {u'buckets': [{u'commits': {u'value': 46},\n", " u'doc_count': 70,\n", " u'key': u'Ryan Blue'},\n", " {u'commits': {u'value': 29}, u'doc_count': 32, u'key': u'Niels Basjes'},\n", " {u'commits': {u'value': 24}, u'doc_count': 22, u'key': u'Thomas White'},\n", " {u'commits': {u'value': 23},\n", " u'doc_count': 20,\n", " u'key': u'Martin Kleppmann'},\n", " {u'commits': {u'value': 18}, u'doc_count': 17, u'key': u'Doug Cutting'},\n", " {u'commits': {u'value': 8},\n", " u'doc_count': 6,\n", " u'key': u'Gabor Szadovszky'},\n", " {u'commits': {u'value': 8},\n", " u'doc_count': 6,\n", " u'key': u'Thiruvalluvan M. G'},\n", " {u'commits': {u'value': 5}, u'doc_count': 4, u'key': u'Sachin Goyal'},\n", " {u'commits': {u'value': 5}, u'doc_count': 4, u'key': u'Yibing Shi'},\n", " {u'commits': {u'value': 3}, u'doc_count': 3, u'key': u'Sean Busbey'}],\n", " u'doc_count_error_upper_bound': 0,\n", " u'sum_other_doc_count': 19},\n", " u'doc_count': 203,\n", " u'key': u'https://github.com/apache/avro.git'},\n", " {u'by_author': {u'buckets': [{u'commits': {u'value': 114},\n", " u'doc_count': 104,\n", " u'key': u'Patrick Hunt'},\n", " {u'commits': {u'value': 20}, u'doc_count': 27, u'key': u'Chris Nauroth'},\n", " {u'commits': {u'value': 13},\n", " u'doc_count': 17,\n", " u'key': u'Flavio Paiva Junqueira'},\n", " {u'commits': {u'value': 21},\n", " u'doc_count': 17,\n", " u'key': u'Rakesh Radhakrishnan'},\n", " {u'commits': {u'value': 6},\n", " u'doc_count': 8,\n", " u'key': u'Camille Fournier'},\n", " {u'commits': {u'value': 6},\n", " u'doc_count': 8,\n", " u'key': u'Ra\\xfal Guti\\xe9rrez Segal\\xe9s'},\n", " {u'commits': {u'value': 6}, u'doc_count': 5, u'key': u'Joan'},\n", " {u'commits': {u'value': 2},\n", " u'doc_count': 2,\n", " u'key': u'Alexander Shraer'},\n", " {u'commits': {u'value': 1}, u'doc_count': 1, u'key': u'Michael Han'},\n", " {u'commits': {u'value': 1},\n", " u'doc_count': 1,\n", " u'key': u'Raul Gutierrez S'}],\n", " u'doc_count_error_upper_bound': 0,\n", " u'sum_other_doc_count': 0},\n", " u'doc_count': 190,\n", " u'key': u'https://github.com/apache/zookeeper.git'}],\n", " u'doc_count_error_upper_bound': 0,\n", " u'sum_other_doc_count': 33}}" ] } ], "prompt_number": 140 }, { "cell_type": "code", "collapsed": false, "input": [ "# PRINT QUERY\n", "s = Search(using=client, index=\"git-apachecon\").filter('range', author_date={'gt': datetime(2015, 12, 12)})\n", "s.aggs.bucket('by_repo', 'terms', field='author_name').metric('commits', 'cardinality', field='hash')\n", "s.to_dict()" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 112, "text": [ "Terms(aggs={'commits': Cardinality(field='hash')}, field='author_name')" ] } ], "prompt_number": 112 }, { "cell_type": "code", "collapsed": false, "input": [ "# USE QUERY\n", "s.from_dict(dict({'aggs': {'by_repo': {'aggs': {'commits': {'cardinality': {'field': 'hash'}}},'terms': {'field': 'author_name'}}},'query': {'bool': {'filter': [{'range': {'author_date': {'gt': '2015-12-12'}}}]}}}))\n", "result=s.execute()\n", "result.to_dict()[\"aggregations\"]" ], "language": "python", "metadata": {}, "outputs": [ { "metadata": {}, "output_type": "pyout", "prompt_number": 123, "text": [ "{u'by_repo': {u'buckets': [{u'commits': {u'value': 809},\n", " u'doc_count': 760,\n", " u'key': u'tedyu'},\n", " {u'commits': {u'value': 685}, u'doc_count': 671, u'key': u'Andrew Onishuk'},\n", " {u'commits': {u'value': 543}, u'doc_count': 542, u'key': u'Akira Ajisaka'},\n", " {u'commits': {u'value': 428}, u'doc_count': 414, u'key': u'stack'},\n", " {u'commits': {u'value': 413}, u'doc_count': 400, u'key': u'Alex Antonenko'},\n", " {u'commits': {u'value': 394}, u'doc_count': 387, u'key': u'Jason Lowe'},\n", " {u'commits': {u'value': 347}, u'doc_count': 365, u'key': u'Reynold Xin'},\n", " {u'commits': {u'value': 388}, u'doc_count': 362, u'key': u'Kihwal Lee'},\n", " {u'commits': {u'value': 357}, u'doc_count': 360, u'key': u'Sumit Mohanty'},\n", " {u'commits': {u'value': 333},\n", " u'doc_count': 332,\n", " u'key': u'Jonathan Hurley'}],\n", " u'doc_count_error_upper_bound': 165,\n", " u'sum_other_doc_count': 23242}}" ] } ], "prompt_number": 123 }, { "cell_type": "code", "collapsed": false, "input": [], "language": "python", "metadata": {}, "outputs": [] } ], "metadata": {} } ] }