#!/usr/bin/env python # encoding: utf-8 ''' genRSS -- generate a RSS 2 feed from media files in a directory. @author: Amine SEHILI @copyright: 2014-2017 Amine Sehili @license: MIT @contact: amine.sehili <AT> gmail.com @deffield updated: April 8th 2017 ''' # pylint:enable=bad-whitespace import sys import os import glob import fnmatch import time import urllib import mimetypes import argparse from optparse import OptionParser __all__ = [] __version__ = 0.1 __date__ = '2014-11-01' __updated__ = '2017-04-08' DEBUG = 0 TESTRUN = 0 PROFILE = 0 def getFiles(dirname, extensions=None, recursive=False): ''' Return the list of files (relative paths, starting from dirname) in a given directory. Unless a list of the desired file extensions is given, all files in dirname are returned. If recursive = True, also look for files in sub directories of direname. Parameters ---------- dirname : string path to a directory under the file system. extensions : list of string Extensions of the accepted files. Default = None (i.e. return all files). recursive : bool If True, recursively look for files in sub directories. Default = False. Returns ------- selectedFiles : list A list of file paths. Examples -------- >>> import os >>> m = "test{0}media{0}".format(os.sep) >>> expected = "['{0}1.mp3', '{0}1.mp4', '{0}1.ogg', '{0}2.MP3']".format(m) >>> str(getFiles("{0}".format(m))) == expected True >>> expected = "['{0}1.mp3', '{0}1.mp4', '{0}1.ogg', '{0}2.MP3', '{0}subdir_1{1}2.MP4', " >>> expected += "'{0}subdir_1{1}3.mp3', '{0}subdir_1{1}4.mp3', '{0}subdir_2{1}4.mp4', " >>> expected += "'{0}subdir_2{1}5.mp3', '{0}subdir_2{1}6.mp3']" >>> str(getFiles("{0}".format(m), recursive=True)) == expected.format(m, os.sep) True >>> expected = "['{0}1.mp3', '{0}2.MP3']".format(m) >>> str(getFiles("{0}".format(m), extensions=["mp3"])) == expected True >>> expected = "['{0}1.mp3', '{0}1.ogg', '{0}2.MP3', '{0}subdir_1{1}3.mp3', " >>> expected += "'{0}subdir_1{1}4.mp3', '{0}subdir_2{1}5.mp3', '{0}subdir_2{1}6.mp3']" >>> str(getFiles("{0}".format(m), extensions=["mp3", "ogg"], recursive=True)) == expected.format(m, os.sep) True >>> expected = "['{0}1.mp4', '{0}subdir_1{1}2.MP4', '{0}subdir_2{1}4.mp4']".format(m, os.sep) >>> str(getFiles("{0}".format(m), extensions=["mp4"], recursive=True)) == expected True ''' if dirname[-1] != os.sep: dirname += os.sep selectedFiles = [] allFiles = [] if recursive: for root, dirs, filenames in os.walk(dirname): for name in filenames: allFiles.append(os.path.join(root, name)) else: allFiles = [f for f in glob.glob(dirname + "*") if os.path.isfile(f)] if extensions is not None: for ext in set([e.lower() for e in extensions]): selectedFiles += [n for n in allFiles if fnmatch.fnmatch(n.lower(), "*{0}".format(ext))] else: selectedFiles = allFiles return sorted(set(selectedFiles)) def buildItem(link, title, guid=None, description="", pubDate=None, indent=" ", extraTags=None): ''' Generate a RSS 2 item and return it as a string. Parameters ---------- link : string URL of the item. title : string Title of the item. guid : string Unique identifier of the item. If no guid is given, link is used as the identifier. Default = None. description : string Description of the item. Default = "" pubDate : string Date of publication of the item. Should follow the RFC-822 format, otherwise the feed will not pass a validator. This method doses (yet) not check the compatibility of pubDate. Here are a few examples of correct RFC-822 dates: - "Wed, 02 Oct 2002 08:00:00 EST" - "Mon, 22 Dec 2014 18:30:00 +0000" You can use the following code to gererate a RFC-822 valid time: time.strftime("%a, %d %b %Y %H:%M:%S +0000", time.localtime(time.time())) Default = None (no pubDate tag will be added to the generated item) indent : string A string of white spaces used to indent the elements of the item. 3 * len(indent) white spaces will be left before <guid>, <link>, <title> and <description> and 2 * len(indent) before item. extraTags : a list of dictionaries Each dictionary contains the following keys - "na1me": name of the tag (mandatory) - "value": value of the tag (optional) - "params": string or list of string, parameters of the tag (optional) Example: ------- Either of the following two dictionaries: {"name" : enclosure, "value" : None, "params" : 'url="file.mp3" type="audio/mpeg" length="1234"'} {"name" : enclosure, "value" : None, "params" : ['url="file.mp3"', 'type="audio/mpeg"', 'length="1234"']} will give this tag: <enclosure url="file.mp3" type="audio/mpeg" length="1234"/> whereas this dictionary: {"name" : "aTag", "value" : "aValue", "params" : None} would give this tag: <aTag>aValue</aTag> Returns ------- A string representing a RSS 2 item. Examples -------- >>> item = buildItem("my/web/site/media/item1", title = "Title of item 1", guid = "item1", ... description="This is item 1", pubDate="Mon, 22 Dec 2014 18:30:00 +0000", ... indent = " ") >>> print(item) <item> <guid>item1</guid> <link>my/web/site/media/item1</link> <title>Title of item 1</title> <description>This is item 1</description> <pubDate>Mon, 22 Dec 2014 18:30:00 +0000</pubDate> </item> >>> item = buildItem("my/web/site/media/item2", title = "Title of item 2", indent = " ", ... extraTags=[{"name" : "itunes:duration" , "value" : "06:08"}]) >>> print(item) <item> <guid>my/web/site/media/item2</guid> <link>my/web/site/media/item2</link> <title>Title of item 2</title> <description></description> <itunes:duration>06:08</itunes:duration> </item> >>> item = buildItem("my/web/site/media/item2", title = "Title of item 2", indent = " ", ... extraTags=[{"name" : "enclosure" , ... "params" : 'url="http://example.com/media/file.mp3"' ... ' type="audio/mpeg" length="1234"'}]) >>> print(item) <item> <guid>my/web/site/media/item2</guid> <link>my/web/site/media/item2</link> <title>Title of item 2</title> <description></description> <enclosure url="http://example.com/media/file.mp3" type="audio/mpeg" length="1234"/> </item> >>> item = buildItem("my/web/site/media/item2", title = "Title of item 2", indent = " ", ... extraTags= [{"name" : "enclosure", "value" : None, ... "params" : ['url="file.mp3"', 'type="audio/mpeg"', ... 'length="1234"']}]) >>> print(item) <item> <guid>my/web/site/media/item2</guid> <link>my/web/site/media/item2</link> <title>Title of item 2</title> <description></description> <enclosure url="file.mp3" type="audio/mpeg" length="1234"/> </item> ''' if guid is None: guid = link guid = "{0}<guid>{1}</guid>\n".format(indent * 3, guid) link = "{0}<link>{1}</link>\n".format(indent * 3, link) title = "{0}<title>{1}</title>\n".format(indent * 3, title) descrption = "{0}<description>{1}</description>\n".format(indent * 3, description) if pubDate is not None: pubDate = "{0}<pubDate>{1}</pubDate>\n".format(indent * 3, pubDate) else: pubDate = "" extra = "" if extraTags is not None: for tag in extraTags: if tag is None: continue name = tag["name"] value = tag.get("value", None) params = tag.get("params", '') if params is None: params = '' if isinstance(params, (list)): params = " ".join(params) if len(params) > 0: params = " " + params extra += "{0}<{1}{2}".format(indent * 3, name, params) extra += "{0}\n".format("/>" if value is None else ">{0}</{1}>".format(value, name)) return "{0}<item>\n{1}{2}{3}{4}{5}{6}{0}</item>".format(indent * 2, guid, link, title, descrption, pubDate, extra) def fileToItem(host, fname, pubDate): ''' Inspect a file name to determine what kind of RSS item to build, and return the built item. Parameters ---------- host : string The hostname and directory to use for the link. fname : string File name to inspect. pubDate : string Publication date in RFC 822 format. Returns ------- A string representing an RSS item, as with buildItem. Examples -------- >>> print fileToItem('example.com/', 'test/media/1.mp3', 'Mon, 16 Jan 2017 23:55:07 +0000') <item> <guid>example.com/test/media/1.mp3</guid> <link>example.com/test/media/1.mp3</link> <title>1.mp3</title> <description>1.mp3</description> <pubDate>Mon, 16 Jan 2017 23:55:07 +0000</pubDate> <enclosure url="example.com/test/media/1.mp3" type="audio/mpeg" length="0"/> </item> >>> print fileToItem('example.com/', 'test/invalid/checksum.md5', 'Mon, 16 Jan 2017 23:55:07 +0000') <item> <guid>example.com/test/invalid/checksum.md5</guid> <link>example.com/test/invalid/checksum.md5</link> <title>checksum.md5</title> <description>checksum.md5</description> <pubDate>Mon, 16 Jan 2017 23:55:07 +0000</pubDate> </item> >>> print fileToItem('example.com/', 'test/invalid/windows.exe', 'Mon, 16 Jan 2017 23:55:07 +0000') <item> <guid>example.com/test/invalid/windows.exe</guid> <link>example.com/test/invalid/windows.exe</link> <title>windows.exe</title> <description>windows.exe</description> <pubDate>Mon, 16 Jan 2017 23:55:07 +0000</pubDate> </item> ''' fileURL = urllib.quote(host + fname.replace("\\", "/"), ":/") fileMimeType = mimetypes.guess_type(fname)[0] if fileMimeType is not None and ("audio" in fileMimeType or "video" in fileMimeType): tagParams = "url=\"{0}\" type=\"{1}\" length=\"{2}\"".format(fileURL, fileMimeType, os.path.getsize(fname)) enclosure = {"name" : "enclosure", "value" : None, "params": tagParams} else: enclosure = None return buildItem(link=fileURL, title=os.path.basename(fname), guid=fileURL, description=os.path.basename(fname), pubDate=pubDate, extraTags=[enclosure]) def main(argv=None): program_name = os.path.basename(sys.argv[0]) program_version = "v0.1" program_build_date = "%s" % __updated__ program_version_string = '%%prog %s (%s)' % (program_version, program_build_date) program_usage = "genRSS -d directory [OPTIONS]" program_longdesc = "Generates an RSS feed from files in a directory" program_license = "Copyright 2014-2017 Amine SEHILI. Licensed under the MIT License" if argv is None: argv = sys.argv[1:] try: parser = argparse.ArgumentParser(usage=program_usage, description=program_longdesc, formatter_class=argparse.RawTextHelpFormatter) parser.add_argument("-d", "--dirname", dest="dirname", default=[], action='append', help="Directory to look for media files in.\n" "This directory name will be appended to the host name\n" "to create absolute paths to your media files.", metavar="DIRECTORY") parser.add_argument("-r", "--recursive", dest="recursive", action="store_true", default=False, help="Look for media files recursively in sub directories\n" "[Default:False]", ) parser.add_argument("-e", "--extensions", dest="extensions", type=str, default=None, metavar="STRING", help="A comma separated list of extensions (e.g. mp3,mp4,avi,ogg)\n[Default: all files]", ) parser.add_argument("-o", "--out", dest="outfile", help="Output RSS file [default: stdout]", metavar="FILE") parser.add_argument("-H", "--host", dest="host", default="http://localhost:8080", metavar="URL", help="Host name (or IP address), possibly with a protocol\n" "(default: http) a port number and the path to the base\n" "directory where your media directory is located.\n" "Examples of host names:\n" " - http://localhost:8080 [default]\n" " - mywebsite.com/media/JapaneseLessons\n" " - mywebsite\n" " - 192.168.1.12:8080\n" " - http://192.168.1.12/media/JapaneseLessons\n", ) parser.add_argument("-i", "--image", dest="image", default=None, metavar="URL", help="Absolute or relative URL for feed's image [default: None]", ) parser.add_argument("-t", "--title", dest="title", default=None, metavar="STRING", help="Title of the podcast [Defaule:None]", ) parser.add_argument("-p", "--description", dest="description", default=None, metavar="STRING", help="Description of the podcast [Defaule:None]", ) parser.add_argument("-C", "--sort-creation", dest="sort_creation", help="Sort files by date of creation instead of name (default)", action="store_true", default=False) parser.add_argument("-v", "--verbose", dest="verbose", action="store_true", help="set verbose [default: False]") # process options opts = parser.parse_args(argv) if len(opts.dirname) == 0 or opts.host is None or opts.host == '': raise Exception("\n".join(["Usage: python %s -d directory -H hostname [-o output -r]" % (program_name), "For more information run %s --help\n" % (program_name)])) for somedir in opts.dirname: if not os.path.isdir(somedir) or not os.path.exists(somedir): raise Exception("Cannot find directory %s\n--dirname must be a path to an existing directory" % (dirname, )) host = opts.host if host[-1] != '/': host += '/' if not host.lower().startswith("http://") and not host.lower().startswith("https://"): host = "http://" + host link = host if opts.outfile is not None: if link[-1] == '/': link += opts.outfile else: link += '/' + opts.outfile title = "" if opts.title is not None: title = opts.title description = "" if opts.description is not None: description = opts.description if opts.extensions is not None: opts.extensions = [e for e in opts.extensions.split(",") if e != ""] ######################### # get the list of the desired files fileNames = [] for dirname in opts.dirname: #sys.stderr.write('Debug, checking for files in \'%s\'' % (dirname, ) if dirname[-1] != os.sep: dirname += os.sep fileNames.extend(getFiles(dirname.encode("utf-8"), extensions=opts.extensions, recursive=opts.recursive)) # process the list of files if len(fileNames) == 0: sys.stderr.write("No media files on directory '%s'\n" % (opts.dirname)) sys.exit(0) if opts.sort_creation: # sort files by date of creation if required # get files date of creation in seconds pubDates = [os.path.getctime(f) for f in fileNames] # most feed readers will use pubDate to sort items even if they are not sorted in the output file # for readability, we also sort fileNames according to pubDates in the feed. sortedFiles = sorted(zip(fileNames, pubDates), key=lambda f: - f[1]) else: # in order to have feed items sorted by name, we give them artificial pubDates # fileNames are already sorted (natural order), so we assume that the first item is published now # and the n-th item, (now - (n)) minutes and f seconds ago. # f is a random number of seconds between 0 and 10 (float) now = time.time() import random pubDates = [now - (60 * 60 * 24 * d + (random.random() * 10)) for d in xrange(len(fileNames))] sortedFiles = zip(fileNames, pubDates) # write dates in RFC-822 format sortedFiles = ((f[0], time.strftime("%a, %d %b %Y %H:%M:%S +0000", time.gmtime(f[1]))) for f in sortedFiles) # build items items = [fileToItem(host, fname, pubDate) for fname, pubDate in sortedFiles] if opts.outfile is not None: outfp = open(opts.outfile, "w") else: outfp = sys.stdout outfp.write('<?xml version="1.0" encoding="UTF-8"?>\n') outfp.write('<rss version="2.0">\n') outfp.write(' <channel>\n') outfp.write(' <title>{0}</title>\n'.format(title)) outfp.write(' <description>{0}</description>\n'.format(description)) outfp.write(' <link>{0}</link>\n'.format(link)) if opts.image is not None: if opts.image.lower().startswith("http://") or opts.image.lower().startswith("https://"): imgurl = opts.image else: imgurl = urllib.quote(host + opts.image, ":/") outfp.write(" <image>\n") outfp.write(" <url>{0}</url>\n".format(imgurl)) outfp.write(" <title>{0}</title>\n".format(title)) outfp.write(" <link>{0}</link>\n".format(link)) outfp.write(" </image>\n") for item in items: outfp.write(item + "\n") outfp.write('') outfp.write(' </channel>\n') outfp.write('</rss>\n') if outfp != sys.stdout: outfp.close() except Exception as mainException: sys.stderr.write(str(mainException) + "\n") return 2 if __name__ == "__main__": if DEBUG: sys.argv.append("-h") if TESTRUN or "--run-tests" in sys.argv: import doctest doctest.testmod() sys.exit(0) if PROFILE: import cProfile import pstats profile_filename = 'genRSS_profile.txt' cProfile.run('main()', profile_filename) statsfile = open("profile_stats.txt", "wb") p = pstats.Stats(profile_filename, stream=statsfile) stats = p.strip_dirs().sort_stats('cumulative') stats.print_stats() statsfile.close() sys.exit(0) sys.exit(main())