{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "This notebook was prepared by [Donne Martin](http://donnemartin.com). Source and license info is on [GitHub](https://github.com/donnemartin/data-science-ipython-notebooks)." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# HDFS" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Run an HDFS command:" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "!hdfs" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Run a file system command on the file systems (FsShell):" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "!hdfs dfs" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "List the user's home directory:" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "!hdfs dfs -ls" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "List the HDFS root directory:" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "!hdfs dfs -ls /" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Copy a local file to the user's directory on HDFS:" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "!hdfs dfs -put file.txt file.txt" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Display the contents of the specified HDFS file:" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "!hdfs dfs -cat file.txt" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Print the last 10 lines of the file to the terminal:" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "!hdfs dfs -cat file.txt | tail -n 10" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "View a directory and all of its files:" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "!hdfs dfs -cat dir/* | less" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Copy an HDFS file to local:" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "!hdfs dfs -get file.txt file.txt" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Create a directory on HDFS:" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "!hdfs dfs -mkdir dir" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Recursively delete the specified directory and all of its contents:" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "!hdfs dfs -rm -r dir" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Specify HDFS file in Spark (paths are relative to the user's home HDFS directory):" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "data = sc.textFile (\"hdfs://hdfs-host:port/path/file.txt\")" ] } ], "metadata": { "kernelspec": { "display_name": "Python 2", "language": "python", "name": "python2" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 2 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", "version": "2.7.10" } }, "nbformat": 4, "nbformat_minor": 0 }