' % (self.name, self.level, self.line_number) def pop_sections(sections, level): while sections: s = sections.pop() if s and s.level < level: sections.append(s) return atx_heading_re = re.compile(r'^(#+)\s+(.*?)(?:\s+#+)?\s*$') settext_heading_re = re.compile(r'^[-=]+$') settext_subject_re = re.compile(r'^[^\s]+.*$') def find_sections(filename, lines): sections = [] in_code_block = False beyond_front_matter = False in_front_matter = False previous_sections = [] for i, line in enumerate(lines): # Some markdown-based tools allow for "front matter" at the beginning # of the file. The data is demarcated by a leading and trailing triple # hyphen (---) on its own line. The tools I've looked at, like Jekyll, # expect this to start on the first line. So here's an attempt to skip # over the front matter and only tag the remaining part of the file. if not beyond_front_matter: if line == "": continue elif line == '---': in_front_matter = not in_front_matter continue if not in_front_matter and line: beyond_front_matter = True # Skip GitHub Markdown style code blocks. if line.startswith("```"): in_code_block = not in_code_block continue if in_code_block: continue m = atx_heading_re.match(line) if m: level = len(m.group(1)) name = m.group(2) pop_sections(previous_sections, level) if previous_sections: parent = previous_sections[-1] else: parent = None line_number = i + 1 s = Section(level, name, line, line_number, filename, parent) previous_sections.append(s) sections.append(s) else: m = settext_heading_re.match(line) if i and m: if not settext_subject_re.match(lines[i - 1]): continue name = lines[i-1].strip() if line[0] == '=': level = 1 else: level = 2 pop_sections(previous_sections, level) if previous_sections: parent = previous_sections[-1] else: parent = None line_number = i s = Section(level, name, lines[i-1], line_number, filename, parent) previous_sections.append(s) sections.append(s) return sections def sections_to_tags(sections, sro): tags = [] for section in sections: tags.append(Tag.section(section, sro)) return tags def gen_tags_header(output, sort): if sort == "yes": sorted_line = b'!_TAG_FILE_SORTED\t1\t//\n' elif sort == "foldcase": sorted_line = b'!_TAG_FILE_SORTED\t2\t//\n' else: sorted_line = b'!_TAG_FILE_SORTED\t0\t//\n' output.write(b'!_TAG_FILE_ENCODING\tutf-8\t//\n') output.write(b'!_TAG_FILE_FORMAT\t2\t//\n') output.write(sorted_line) def gen_tags_content(output, sort, tags): if sort == "yes": tags = sorted(tags) elif sort == "foldcase": tags = sorted(tags, key=lambda x: str(x).lower()) for t in tags: output.write(t.render().encode('utf-8')) output.write('\n'.encode('utf-8')) def main(): from optparse import OptionParser parser = OptionParser(usage="usage: %prog [options] file(s)", version=__version__) parser.add_option( "-f", "--file", metavar="FILE", dest="tagfile", default="tags", help='Write tags into FILE (default: "tags"). Use "-" to write ' 'tags to stdout.') parser.add_option( "", "--encoding", metavar="ENCODING", dest="encoding", default=None, help='Skips auto detection and uses the specified encoding for the ' 'input files. Encoding name should be one that Python would ' 'recognize.') parser.add_option( "", "--sort", metavar="[yes|foldcase|no]", dest="sort", choices=["yes", "no", "foldcase"], default="yes", help='Produce sorted output. Acceptable values are "yes", ' '"no", and "foldcase". Default is "yes".') parser.add_option( "", "--sro", metavar="SEPARATOR", dest="sro", default="|", action="store", help=u'Use the specified string to scope nested headings. The ' 'default is pipe symbol ("|"), but that can be an issue if your ' 'headings contain the pipe symbol. It might be more useful to ' 'use a such as the UTF-8 chevron ("\u00bb").') options, args = parser.parse_args() if sys.version_info[0] == 2: encoding = sys.stdin.encoding or locale.getpreferredencoding() or 'utf-8' options.sro = options.sro.decode(encoding) if options.tagfile == '-': if sys.version_info[0] == 2: output = sys.stdout else: output = sys.stdout.buffer else: output = open(options.tagfile, 'wb') gen_tags_header(output, options.sort) all_sections = [] try: for filename in args: if sys.version_info[0] == 2: filename = filename.decode(sys.getfilesystemencoding()) try: with open_autoenc(filename, encoding=options.encoding) as f: buf = f.read() except IOError as e: if e.errno == errno.EPIPE: raise print_warning(e) continue lines = buf.splitlines() del buf sections = find_sections(filename, lines) all_sections.extend(sections) finally: # We do this to match ctags behavior... even when a file is missing, # it'll write out the tags it has. gen_tags_content(output, options.sort, sections_to_tags(all_sections, options.sro)) output.flush() output.close() def print_warning(e): print("WARNING: %s" % str(e), file=sys.stderr) def print_error(e): print("ERROR: %s" % str(e), file=sys.stderr) def cli_main(): try: main() except IOError as e: import errno if e.errno == errno.EPIPE: # Exit saying we got SIGPIPE. sys.exit(141) raise except ScriptError as e: print("ERROR: %s" % str(e), file=sys.stderr) sys.exit(1) if __name__ == '__main__': cli_main()