X-Git-Url: https://jxself.org/git/?p=ibg.git;a=blobdiff_plain;f=tools%2Fautoindex.py;fp=tools%2Fautoindex.py;h=b9d89823df6dc2a42ddd18c8d819c6418166f461;hp=0000000000000000000000000000000000000000;hb=4381288eeb792adb3672ef747013c02815fa760a;hpb=4261ff48a6357e2af0957f5e1b88bdc022243d16 diff --git a/tools/autoindex.py b/tools/autoindex.py new file mode 100644 index 0000000..b9d8982 --- /dev/null +++ b/tools/autoindex.py @@ -0,0 +1,216 @@ +""" +Add Sphinx index entries to RST source. + +TODO: scan directory tree, look for *.rst +TODO: add option to remove entries +""" + +import os +import re +import sys + +try: + from ConfigParser import ConfigParser +except ImportError: + from configparser import ConfigParser + + +# Configuration defaults. +defaults = {'comment': ".. Generated by autoindex", + 'mintext': '5000', + 'noindex': ''} + +# Read config settings. +config = ConfigParser(defaults, allow_no_value=True) +config.optionxform = str + +thisdir = os.path.dirname(__file__) +conffile = os.path.join(thisdir, "autoindex.cfg") +config.read(conffile) + +# Extract keywords and role mappings. +def getmap(section): + mapping = {} + + if config.has_section(section): + for name in config.options(section): + if name not in defaults: + mapping[name] = config.get(section, name) + + return mapping + +keywords = getmap('keywords') +rolemap = getmap('rolemap') + +# Autoindex comment. +comment = config.get('DEFAULT', 'comment') + +# Minimum amount of text twixt identical entries. +mintext = config.getint('DEFAULT', 'mintext') + +# Don't add index entries after paragraphs matching this. +noindex = config.get('DEFAULT', 'noindex').strip().split("\n") + +if noindex: + noindex_patterns = "(%s)" % "|".join(noindex) +else: + noindex_patterns = None + +# Paragraph separator. +separator = "\n\n" + + +def main(args): + # Parse command args. + if len(args) == 2: + infile = args[1] + outfile = None + elif len(args) == 3: + infile, outfile = args[1:] + else: + sys.exit("Usage: %s INFILE [OUTFILE]" % args[0]) + + ##dump_paragraphs(infile) + + # Do indexing. + autoindex_file(infile, outfile) + + +def autoindex_file(infile, outfile=None): + "Add index entries to a file." + + # Get original text. + with open(infile) as fp: + text = fp.read() + + # Index it. + itext = autoindex_text(text) + + # Write output (but don't modify original if nothing changed). + if outfile or itext != text: + if outfile == '-': + sys.stdout.write(itext) + else: + with open(outfile or infile, "wb") as fp: + fp.write(itext) + + +def autoindex_text(text): + "Add index entries to the given text." + return separator.join(indexed_paragraphs(text)) + + +def indexed_paragraphs(text): + "Yield indexed paragraphs from the specified text." + + # Current text position. + textpos = 0 + + # Text position of last entries for each index word (to avoid too many + # close together for the same entry). + lastpos = {} + + def addindex(index, name, desc=None): + if name not in lastpos or lastpos[name] + mintext < textpos: + index.append((name, desc, textpos)) + lastpos[name] = textpos + + # Whether to add index entries. + noindex = False + + for info in paragraph_info(text): + # Update text count. + para = info['text'] + textpos += len(para) + + # Initialise index (list of [name, desc, textpos]). + index = [] + + # Find index entries for roles. + for match in re.finditer(r':(.+?):`(.+?)`', para): + role, name = match.groups() + if role in rolemap: + addindex(index, name, rolemap[role]) + + # Find index entries for keywords. + paraline = para.replace("\n", " ") + for word, desc in keywords.items(): + if re.search(r'\b' + word + r'\b', paraline): + addindex(index, word, desc) + + # Yield index paragraph if required. + if index and not noindex: + indent = info['indent'] + lines = [indent + comment] + lines.append(indent + ".. index::") + + for name, desc, pos in sorted(index): + msg = "autoindex: " + name + + if desc: + text = " pair: %s; %s" % (name, desc) + msg += " (" + desc + ")" + else: + text = " single: %s" % name + + lines.append(indent + text) + sys.stderr.write("%s [%s]\n" % (msg, pos)) + + yield "\n".join(lines) + + noindex = info['noindex'] + + # Yield paragraph. + yield para + + +def unindexed_paragraphs(text): + "Yield paragraphs stripped of autoindex comments." + + for para in text.split(separator): + if comment not in para: + yield para + + +def paragraph_info(text): + "Yield paragraph information from text." + + noindex = False + noindex_level = None + + for para in unindexed_paragraphs(text): + indent = re.match(r' *', para).group() + level = len(indent) + + # Detect first entry in a list. Should be at same indent level as + # its text. + match = re.match(r'\* ', para.lstrip()) + if match: + level += len(match.group()) + + if noindex_patterns: + if not noindex and re.search(noindex_patterns, para, re.M): + noindex_level = level + noindex = True + elif noindex_level is not None and level <= noindex_level: + noindex_level = None + noindex = False + + yield {'text': para, + 'noindex': noindex, + 'indent': indent, + 'level': level} + + +def dump_paragraphs(infile): + print noindex_patterns + + with open(infile) as fp: + text = fp.read() + + for info in paragraph_info(text): + print info['level'], info['noindex'], info['text'].replace("\n", " ") + + +if __name__ == "__main__": + main(sys.argv)