{ "cells": [ { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [], "source": [ "#r \"nuget:Parquet.Net\"" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [], "source": [ "#r \"nuget:System.IO\" " ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [], "source": [ "using System.Collections.Generic;\n", "using System.IO;\n", "using System.Linq;\n", "using Parquet; \n", "using Parquet.Data;" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [], "source": [ "//create data columns with schema metadata and the data you need\n", "var idColumn = new DataColumn(\n", " new DataField<int>(\"id\"),\n", " new int[] { 1, 2 });\n", "\n", "var cityColumn = new DataColumn(\n", " new DataField<string>(\"city\"),\n", " new string[] { \"London\", \"Derby\" });\n", "\n", "// create file schema\n", "var schema = new Schema(idColumn.Field, cityColumn.Field);" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [], "source": [ "Stream ms = new System.IO.MemoryStream();\n", "\n", "using (var parquetWriter = new ParquetWriter(schema, ms))\n", "{\n", " // create a new row group in the file\n", " using (ParquetRowGroupWriter groupWriter = parquetWriter.CreateRowGroup())\n", " {\n", " groupWriter.WriteColumn(idColumn);\n", " groupWriter.WriteColumn(cityColumn);\n", " }\n", "}\n" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [], "source": [ "ms.Position = 0; " ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "1\n", "2\n" ] } ], "source": [ "// open parquet file reader\n", "using (var parquetReader = new ParquetReader(ms))\n", "{\n", " // get file schema (available straight after opening parquet reader)\n", " // however, get only data fields as only they contain data values\n", " DataField[] dataFields = parquetReader.Schema.GetDataFields();\n", "\n", " // enumerate through row groups in this file\n", " for(int i = 0; i < parquetReader.RowGroupCount; i++)\n", " {\n", " // create row group reader\n", " using (ParquetRowGroupReader groupReader = parquetReader.OpenRowGroupReader(i))\n", " {\n", " // read all columns inside each row group (you have an option to read only\n", " // required columns if you need to.\n", " DataColumn[] columns = dataFields.Select(groupReader.ReadColumn).ToArray();\n", "\n", " // get first column, for instance\n", " DataColumn firstColumn = columns[0];\n", "\n", " // .Data member contains a typed array of column data you can cast to the type of the column\n", " Array data = firstColumn.Data;\n", " int[] ids = (int[])data;\n", " foreach (int id in ids)\n", " { \n", " Console.WriteLine(id);\n", " }\n", " }\n", " }\n", "}\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": ".NET (C#)", "language": "C#", "name": ".net-csharp" }, "language_info": { "file_extension": ".cs", "mimetype": "text/x-csharp", "name": "C#", "pygments_lexer": "csharp", "version": "8.0" } }, "nbformat": 4, "nbformat_minor": 2 }