| 1 | #!/usr/bin/env python2
|
| 2 | """ref_check.py: Check integrity of doc/ref, and print metrics."""
|
| 3 | from __future__ import print_function
|
| 4 |
|
| 5 | import collections
|
| 6 | #from pprint import pprint
|
| 7 | import sys
|
| 8 |
|
| 9 | from doctools.util import log
|
| 10 |
|
| 11 |
|
| 12 | def PrintTree(node, f, indent=0):
|
| 13 | """Print DocNode tree in make_help.py."""
|
| 14 | if node.attrs:
|
| 15 | a_str = ', '.join('%s=%s' % pair for pair in node.attrs)
|
| 16 | a_str = '(%s)' % a_str
|
| 17 | else:
|
| 18 | a_str = ''
|
| 19 |
|
| 20 | print('%s%s %s' % (indent * ' ', node.name, a_str), file=f)
|
| 21 | for ch in node.children:
|
| 22 | PrintTree(ch, f, indent + 1)
|
| 23 |
|
| 24 |
|
| 25 | def Check(all_toc_nodes, chap_tree):
|
| 26 | """
|
| 27 | Args:
|
| 28 | all_toc_nodes: Structure of doc/ref/toc-*.md
|
| 29 | chap_tree: Structure of chap-*.html
|
| 30 | """
|
| 31 | all_topics = []
|
| 32 |
|
| 33 | link_from = {} # (filename, topic) -> implemented
|
| 34 | link_to = set()
|
| 35 |
|
| 36 | section_check = collections.defaultdict(list)
|
| 37 | toc_topic_check = collections.defaultdict(list)
|
| 38 |
|
| 39 | #
|
| 40 | # Walk the TOC metadata
|
| 41 | #
|
| 42 |
|
| 43 | topics_not_impl = 0
|
| 44 | sections_not_impl = 0
|
| 45 |
|
| 46 | log('TOC:')
|
| 47 | log('')
|
| 48 | for toc_node in all_toc_nodes:
|
| 49 | toc = toc_node['toc']
|
| 50 | log(' %s', toc)
|
| 51 | for box_node in toc_node['boxes']:
|
| 52 | to_chap = box_node['to_chap']
|
| 53 | log(' %s' % to_chap)
|
| 54 | for line_info in box_node['lines']:
|
| 55 | section = line_info['section']
|
| 56 | section_impl = line_info['impl']
|
| 57 | if not section_impl:
|
| 58 | sections_not_impl += 1
|
| 59 |
|
| 60 | topics = line_info['topics']
|
| 61 | for topic, topic_impl in topics:
|
| 62 | is_implemented = topic_impl and section_impl
|
| 63 |
|
| 64 | chap_filename = 'chap-%s.html' % to_chap
|
| 65 | link_from[chap_filename, topic] = is_implemented
|
| 66 |
|
| 67 | if is_implemented:
|
| 68 | toc_topic_check[topic].append(toc)
|
| 69 | else:
|
| 70 | topics_not_impl += 1
|
| 71 |
|
| 72 | all_topics.extend(topics)
|
| 73 |
|
| 74 | log('')
|
| 75 |
|
| 76 | log('TOC stats:')
|
| 77 | log(' All Topics: %d', len(all_topics))
|
| 78 | log(' Unique topics: %d', len(set(all_topics)))
|
| 79 | log(' Topics marked implemented: %d', len(toc_topic_check))
|
| 80 | log(' Topics not implemented: %d', topics_not_impl)
|
| 81 | log(' Sections not implemented (X): %d', sections_not_impl)
|
| 82 | log('')
|
| 83 |
|
| 84 | if 0:
|
| 85 | PrintTree(chap_tree, sys.stdout)
|
| 86 |
|
| 87 | num_sections = 0
|
| 88 | num_topics = 0
|
| 89 | num_topics_written = 0
|
| 90 |
|
| 91 | #
|
| 92 | # Walk the Chapter Tree
|
| 93 | #
|
| 94 |
|
| 95 | chap_topics = collections.defaultdict(list) # topic_id -> list of chapters
|
| 96 | short_topics = []
|
| 97 |
|
| 98 | min_words = 5 # arbitrary
|
| 99 |
|
| 100 | for chap in chap_tree.children:
|
| 101 |
|
| 102 | for section in chap.children:
|
| 103 | num_sections += 1
|
| 104 |
|
| 105 | for topic in section.children:
|
| 106 | num_topics += 1
|
| 107 |
|
| 108 | values = [v for k, v in topic.attrs if k == 'id']
|
| 109 | if len(values) == 1:
|
| 110 | topic_id = values[0]
|
| 111 | else:
|
| 112 | topic_id = topic.name
|
| 113 |
|
| 114 | chap_topics[topic_id].append(chap.name)
|
| 115 | link_to.add((chap.name, topic_id))
|
| 116 |
|
| 117 | # split by whitespace
|
| 118 | num_words = len(topic.text.split())
|
| 119 | if num_words > min_words:
|
| 120 | num_topics_written += 1
|
| 121 | elif num_words > 1:
|
| 122 | short_topics.append((topic_id, topic.text))
|
| 123 |
|
| 124 | num_chapters = len(chap_tree.children)
|
| 125 |
|
| 126 | log('Chapter stats:')
|
| 127 | log(' num chapters = %d', num_chapters)
|
| 128 | log(' num_sections = %d', num_sections)
|
| 129 | log(' num_topics = %d', num_topics)
|
| 130 |
|
| 131 | chap_topic_set = set(chap_topics)
|
| 132 | log(' num unique topics = %d', len(chap_topic_set))
|
| 133 | log(' topics with first draft (more than %d words) = %d', min_words,
|
| 134 | num_topics_written)
|
| 135 | log('')
|
| 136 |
|
| 137 | log('%d in link_to set: %s', len(link_to), sorted(link_to)[:10])
|
| 138 | log('')
|
| 139 | log('%d in link_from set: %s', len(link_from), sorted(link_from)[:10])
|
| 140 | log('')
|
| 141 |
|
| 142 | index_topic_set = set(toc_topic_check)
|
| 143 |
|
| 144 | assert 'j8-escape' in index_topic_set
|
| 145 | assert 'j8-escape' in chap_topic_set
|
| 146 |
|
| 147 | # Report on topic namespace integrity, e.g. 'help append' should go to one
|
| 148 | # thing
|
| 149 | log('Topics in multiple chapters:')
|
| 150 | for topic_id, chaps in chap_topics.iteritems():
|
| 151 | if len(chaps) > 1:
|
| 152 | log(' %s: %s', topic_id, ' '.join(chaps))
|
| 153 | log('')
|
| 154 |
|
| 155 | log('Duplicate topics in TOC:')
|
| 156 | log('')
|
| 157 | for topic in sorted(toc_topic_check):
|
| 158 | toc_list = toc_topic_check[topic]
|
| 159 | if len(toc_list) > 1:
|
| 160 | log('%20s: %s', topic, ' '.join(toc_list))
|
| 161 | log('')
|
| 162 |
|
| 163 | # Report on link integrity
|
| 164 | if 1:
|
| 165 | # TOC topics with X can be missing
|
| 166 | impl_link_from = set(k for k, v in link_from.iteritems() if v)
|
| 167 | broken = impl_link_from - link_to
|
| 168 | log('%d Broken Links:', len(broken))
|
| 169 | for pair in sorted(broken):
|
| 170 | log(' %s', pair)
|
| 171 | log('')
|
| 172 |
|
| 173 | orphaned = link_to - set(link_from)
|
| 174 | log('%d Orphaned Topics:', len(orphaned))
|
| 175 | for pair in sorted(orphaned):
|
| 176 | log(' %s', pair)
|
| 177 | log('')
|
| 178 |
|
| 179 | log('Short topics:')
|
| 180 | for topic, text in short_topics:
|
| 181 | log('%15s %r', topic, text)
|
| 182 | log('')
|