doctools/split

OILS / doctools / split_doc.py View on Github | oils.pub

160 lines, 110 significant

1	#!/usr/bin/env python2
2	"""split_doc.py."""
3	from __future__ import print_function
4
5	import json
6	import optparse
7	import re
8	import sys
9	from typing import List, Dict, IO
10
11	DATE_RE = re.compile(r'(\d\d\d\d) / (\d\d) / (\d\d)', re.VERBOSE)
12
13	META_RE = re.compile(r'(\S+): [ ]* (.*)', re.VERBOSE)
14
15
16	def SplitDocument(default_vals, entry_f, meta_f, content_f, strict=False):
17	# type: (Dict[str, str], IO[str], IO[str], IO[str], bool) -> None
18	"""Split a document into metadata JSON and content Markdown.
19
20	Used for blog posts and index.md / cross-ref.md.
21	"""
22	first_line = entry_f.readline()
23	if strict and first_line.strip() != '---':
24	raise RuntimeError("Document should start with --- (got %r)" %
25	first_line)
26
27	meta = {}
28
29	# TODO: if first_line is ---, then read metadata in key: value format.
30	if first_line.strip() == '---':
31	while True:
32	line = entry_f.readline().strip()
33	if line == '---':
34	break
35	m = META_RE.match(line)
36	if not m:
37	raise RuntimeError('Invalid metadata line %r' % line)
38	name, value = m.groups()
39
40	if name == 'date':
41	m2 = DATE_RE.match(value)
42	if not m2:
43	raise RuntimeError('Invalid date %r' % value)
44	year, month, day = m2.groups()
45	meta['year'] = int(year)
46	meta['month'] = int(month)
47	meta['day'] = int(day)
48
49	elif name == 'updated_date':
50	m2 = DATE_RE.match(value)
51	if not m2:
52	raise RuntimeError('Invalid date %r' % value)
53	year, month, day = m2.groups()
54	meta['updated_year'] = int(year)
55	meta['updated_month'] = int(month)
56	meta['updated_day'] = int(day)
57
58	else:
59	meta[name] = value
60
61	#print('line = %r' % line, file=sys.stderr)
62	while True:
63	first_nonempty = entry_f.readline()
64	if first_nonempty.strip() != '':
65	break
66
67	else:
68	if first_line:
69	first_nonempty = first_line
70	else:
71	while True:
72	first_nonempty = entry_f.readline()
73	if first_nonempty.strip() != '':
74	break
75
76	# Invariant: we've read the first non-empty line here. Now we need to see if
77	# it's the title.
78
79	#print('first_nonempty = %r' % first_nonempty, file=sys.stderr)
80
81	line_two = entry_f.readline()
82	if re.match('=+', line_two):
83	meta['title'] = first_nonempty.strip()
84
85	# Fill in defaults after parsing all values.
86	for name, value in default_vals.iteritems():
87	if name not in meta:
88	meta[name] = value
89
90	json.dump(meta, meta_f, indent=2)
91
92	# Read the rest of the file and write it
93	contents = entry_f.read()
94
95	content_f.write(first_nonempty)
96	content_f.write(line_two)
97
98	content_f.write(contents)
99
100	comments_url = meta.get('comments_url', '')
101	if comments_url:
102	content_f.write("""
103	[comments-url]: %s
104
105	""" % comments_url)
106
107
108	def Options():
109	p = optparse.OptionParser('split_doc.py [options] input_file out_prefix')
110	# Like awk -v
111	p.add_option(
112	'-v',
113	dest='default_vals',
114	action='append',
115	default=[],
116	help=
117	"If the doc's own metadata doesn't define 'name', set it to this value"
118	)
119	p.add_option('-s',
120	'--strict',
121	dest='strict',
122	action='store_true',
123	default=False,
124	help="Require metadata")
125	return p
126
127
128	def main(argv):
129	# type: (List[str]) -> None
130	o = Options()
131	opts, argv = o.parse_args(argv)
132
133	entry_path = argv[1] # e.g. blog/2016/11/01.md
134	out_prefix = argv[2] # e.g _site/blog/2016/11/01
135
136	meta_path = out_prefix + '_meta.json'
137	content_path = out_prefix + '_content.md'
138
139	default_vals = {}
140	for pair in opts.default_vals:
141	name, value = pair.split('=', 1)
142	default_vals[name] = value
143
144	with \
145	open(entry_path) as entry_f, \
146	open(meta_path, 'w') as meta_f, \
147	open(content_path, 'w') as content_f:
148	SplitDocument(default_vals,
149	entry_f,
150	meta_f,
151	content_f,
152	strict=opts.strict)
153
154
155	if __name__ == '__main__':
156	try:
157	main(sys.argv)
158	except RuntimeError as e:
159	print('FATAL: %s' % e, file=sys.stderr)
160	sys.exit(1)