{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "This notebook was prepared by [Donne Martin](http://donnemartin.com). Source and license info is on [GitHub](https://github.com/donnemartin/data-science-ipython-notebooks)."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# HDFS"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Run an HDFS command:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "!hdfs"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Run a file system command on the file systems (FsShell):"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "!hdfs dfs"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "List the user's home directory:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "!hdfs dfs -ls"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "List the HDFS root directory:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "!hdfs dfs -ls /"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Copy a local file to the user's directory on HDFS:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "!hdfs dfs -put file.txt file.txt"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Display the contents of the specified HDFS file:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "!hdfs dfs -cat file.txt"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Print the last 10 lines of the file to the terminal:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "!hdfs dfs -cat file.txt | tail -n 10"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "View a directory and all of its files:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "!hdfs dfs -cat dir/* | less"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Copy an HDFS file to local:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "!hdfs dfs -get file.txt file.txt"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Create a directory on HDFS:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "!hdfs dfs -mkdir dir"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Recursively delete the specified directory and all of its contents:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "!hdfs dfs -rm -r dir"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Specify HDFS file in Spark (paths are relative to the user's home HDFS directory):"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "data = sc.textFile (\"hdfs://hdfs-host:port/path/file.txt\")"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 2",
   "language": "python",
   "name": "python2"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.10"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}