OILS / doctools / oils_doc.py View on Github | oils.pub

707 lines, 387 significant
1#!/usr/bin/env python2
2"""oils_doc.py: HTML processing for Oil documentation.
3
4Plugins:
5 ExpandLinks expands $xref, etc.
6 PygmentsPlugin -- for ```python, ```sh, ```c, etc.
7 HelpTopicsPlugin -- for help-index.html
8
9 ShPromptPlugin -- understands $ echo hi, but doesn't run anything
10 ShSession -- runs shell snippets and caches the output
11"""
12from __future__ import print_function
13
14from _devbuild.gen.htm8_asdl import h8_id
15
16import cgi
17try:
18 from cStringIO import StringIO
19except ImportError:
20 # for python3
21 from io import StringIO # type: ignore
22import re
23import sys
24
25from typing import Iterator, Any, List, Optional, IO
26
27from data_lang import htm8
28from doctools.util import log
29from doctools import html_old
30
31try:
32 import pygments
33except ImportError:
34 pygments = None
35
36
37class _Abbrev(object):
38
39 def __init__(self, fmt):
40 # type: (str) -> None
41 self.fmt = fmt
42
43 def __call__(self, value):
44 # type: (str) -> str
45 return self.fmt % {'value': value}
46
47
48_ABBREVIATIONS = {
49 'xref':
50 _Abbrev('/cross-ref.html?tag=%(value)s#%(value)s'),
51
52 # alias for osh-help, for backward compatibility
53 # to link to the same version
54
55 # OBSOLETE
56 # TODO: Remove all of these broken links!
57 'help':
58 _Abbrev('osh-help.html?topic=%(value)s#%(value)s'),
59 'osh-help':
60 _Abbrev('osh-help.html?topic=%(value)s#%(value)s'),
61 'oil-help':
62 _Abbrev('oil-help.html?topic=%(value)s#%(value)s'),
63 'osh-help-latest':
64 _Abbrev(
65 '//oilshell.org/release/latest/doc/osh-help.html?topic=%(value)s#%(value)s'
66 ),
67 'oil-help-latest':
68 _Abbrev(
69 '//oilshell.org/release/latest/doc/oil-help.html?topic=%(value)s#%(value)s'
70 ),
71
72
73 # New style: one for every chapter?
74 # Problem: can't use relative links here, because some are from doc/ref, and
75 # some are from doc
76 'chap-type-method':
77 _Abbrev('chap-type-method.html?topic=%(value)s#%(value)s'),
78 'chap-plugin':
79 _Abbrev('chap-plugin.html?topic=%(value)s#%(value)s'),
80 'chap-builtin-cmd':
81 _Abbrev('chap-builtin-cmd.html?topic=%(value)s#%(value)s'),
82
83 # old
84 'oil-doc':
85 _Abbrev('//www.oilshell.org/release/latest/doc/%(value)s'),
86 # new
87 'oils-doc':
88 _Abbrev('//oils.pub/release/latest/doc/%(value)s'),
89
90 # old AND new
91 'blog-tag':
92 _Abbrev('/blog/tags.html?tag=%(value)s#%(value)s'),
93
94 # For linkins from oils.pub -> oilshell.org
95 'oilshell-blog-tag':
96 _Abbrev('https://www.oilshell.org/blog/tags.html?tag=%(value)s#%(value)s'),
97
98 # old
99 'oil-commit':
100 _Abbrev('https://github.com/oilshell/oil/commit/%(value)s'),
101 # new
102 'oils-commit':
103 _Abbrev('https://github.com/oils-for-unix/oils/commit/%(value)s'),
104
105 # old
106 'oil-src':
107 _Abbrev('https://github.com/oilshell/oil/blob/master/%(value)s'),
108 # new
109 'oils-src':
110 _Abbrev('https://github.com/oils-for-unix/oils/blob/master/%(value)s'),
111
112 # old
113 'blog-code-src':
114 _Abbrev('https://github.com/oilshell/blog-code/blob/master/%(value)s'),
115 'issue':
116 _Abbrev('https://github.com/oilshell/oil/issues/%(value)s'),
117 'wiki':
118 _Abbrev('https://github.com/oilshell/oil/wiki/%(value)s'),
119
120 # new
121 'oils-blog-code-src':
122 _Abbrev('https://github.com/oils-for-unix/blog-code/blob/master/%(value)s'),
123 'oils-issue':
124 _Abbrev('https://github.com/oils-for-unix/oils/issues/%(value)s'),
125 'oils-wiki':
126 _Abbrev('https://github.com/oils-for-unix/oils/wiki/%(value)s'),
127}
128
129# $xref:foo
130_SHORTCUT_RE = re.compile(r'\$ ([a-z\-]+) (?: : (\S+))?', re.VERBOSE)
131
132
133def ExpandLinks(s):
134 # type: (str) -> str
135 """Expand $xref:bash and so forth."""
136 f = StringIO()
137 out = htm8.Output(s, f)
138
139 tag_lexer = html_old.TagLexer(s)
140
141 pos = 0
142
143 it = html_old.ValidTokens(s)
144 while True:
145 try:
146 tok_id, end_pos = next(it)
147 except StopIteration:
148 break
149
150 if tok_id == h8_id.StartTag:
151
152 tag_lexer.Reset(pos, end_pos)
153 if tag_lexer.GetTagName() == 'a':
154 open_tag_right = end_pos
155
156 href_start, href_end = tag_lexer.GetSpanForAttrValue('href')
157 if href_start == -1:
158 continue
159
160 href_raw = s[href_start:href_end]
161
162 new = None
163 m = _SHORTCUT_RE.match(href_raw)
164 if m:
165 abbrev_name, arg = m.groups()
166 if not arg:
167 close_tag_left, _ = html_old.ReadUntilEndTag(
168 it, tag_lexer, 'a')
169 arg = s[open_tag_right:close_tag_left]
170
171 # Hack to so we can write [Wiki Page]($wiki) and have the
172 # link look like /Wiki-Page/
173 if abbrev_name == 'wiki':
174 arg = arg.replace(' ', '-')
175
176 func = _ABBREVIATIONS.get(abbrev_name)
177 if not func:
178 raise RuntimeError('Invalid abbreviation %r' %
179 abbrev_name)
180 new = func(arg)
181
182 if new is not None:
183 out.PrintUntil(href_start)
184 f.write(cgi.escape(new))
185 out.SkipTo(href_end)
186
187 pos = end_pos
188
189 out.PrintTheRest()
190
191 return f.getvalue()
192
193
194class _Plugin(object):
195 """
196 A plugin for HighlightCode(), which modifies <pre><code> ... </code></pre>
197 """
198
199 def __init__(self, s, start_pos, end_pos):
200 # type: (str, int, int) -> None
201 self.s = s
202 self.start_pos = start_pos
203 self.end_pos = end_pos
204
205 def PrintHighlighted(self, out):
206 # type: (htm8.Output) -> None
207 raise NotImplementedError()
208
209
210# Optional newline at end
211_LINE_RE = re.compile(r'(.*) \n?', re.VERBOSE)
212
213_PROMPT_LINE_RE = re.compile(
214 r'''
215(\S* \$)[ ] # flush-left non-whitespace, then dollar and space is a prompt
216(.*?) # arbitrary text
217(?: # don't highlight tab completion
218 (&lt;TAB&gt;) # it's HTML escaped!!!
219 .*?
220)?
221(?:
222 [ ][ ]([#] .*) # optionally: two spaces then a comment
223)?
224$
225''', re.VERBOSE)
226
227_EOL_COMMENT_RE = re.compile(
228 r'''
229.*? # arbitrary text
230[ ][ ]([#] .*) # two spaces then a comment
231$
232''', re.VERBOSE)
233
234_COMMENT_LINE_RE = re.compile(r'#.*')
235
236
237def Lines(s, start_pos, end_pos):
238 # type: (str, int, int) -> Iterator[int]
239 """Yields positions in s that end a line."""
240 pos = start_pos
241 while pos < end_pos:
242 m = _LINE_RE.match(s, pos, end_pos)
243 if not m:
244 raise RuntimeError("Should have matched a line")
245 line_end = m.end(0)
246
247 yield line_end
248
249 pos = line_end
250
251
252class ShPromptPlugin(_Plugin):
253 """Highlight shell prompts."""
254
255 def PrintHighlighted(self, out):
256 # type: (htm8.Output) -> None
257 pos = self.start_pos
258 for line_end in Lines(self.s, self.start_pos, self.end_pos):
259
260 m = _COMMENT_LINE_RE.match(self.s, pos, line_end)
261 if m:
262 out.PrintUntil(m.start(0))
263 out.Print('<span class="sh-comment">')
264 out.PrintUntil(m.end(0))
265 out.Print('</span>')
266 else:
267 m = _PROMPT_LINE_RE.match(self.s, pos, line_end)
268 if m:
269 #log('MATCH %r', m.groups())
270
271 out.PrintUntil(m.start(1))
272 out.Print('<span class="sh-prompt">')
273 out.PrintUntil(m.end(1))
274 out.Print('</span>')
275
276 out.PrintUntil(m.start(2))
277 out.Print('<span class="sh-command">')
278 out.PrintUntil(m.end(2))
279 out.Print('</span>')
280
281 if m.group(3):
282 out.PrintUntil(m.start(3))
283 out.Print('<span class="sh-tab-complete">')
284 out.PrintUntil(m.end(3))
285 out.Print('</span>')
286
287 if m.group(4):
288 out.PrintUntil(m.start(4))
289 out.Print('<span class="sh-comment">')
290 out.PrintUntil(m.end(4))
291 out.Print('</span>')
292 else:
293 m = _EOL_COMMENT_RE.match(self.s, pos, line_end)
294 if m:
295 out.PrintUntil(m.start(1))
296 out.Print('<span class="sh-comment">')
297 out.PrintUntil(m.end(1))
298 out.Print('</span>')
299
300 out.PrintUntil(line_end)
301
302 pos = line_end
303
304
305class HelpTopicsPlugin(_Plugin):
306 """Highlight blocks of doc/ref/toc-*.md."""
307
308 def __init__(self, s, start_pos, end_pos, chapter, linkify_stop_col):
309 _Plugin.__init__(self, s, start_pos, end_pos)
310 self.chapter = chapter
311 self.linkify_stop_col = linkify_stop_col
312
313 def PrintHighlighted(self, out):
314 # type: (htm8.Output) -> None
315 from doctools import help_gen
316
317 debug_out = []
318 r = help_gen.TopicHtmlRenderer(self.chapter, debug_out,
319 self.linkify_stop_col)
320
321 pos = self.start_pos
322 for line_end in Lines(self.s, self.start_pos, self.end_pos):
323 # NOTE: IndexLineToHtml accepts an HTML ESCAPED line. It's valid to just
324 # add tags and leave everything alone.
325 line = self.s[pos:line_end]
326
327 html_line = r.Render(line)
328
329 if html_line is not None:
330 out.PrintUntil(pos)
331 out.Print(html_line)
332 out.SkipTo(line_end)
333
334 pos = line_end
335
336 return debug_out
337
338
339class PygmentsPlugin(_Plugin):
340
341 def __init__(self, s, start_pos, end_pos, lang):
342 _Plugin.__init__(self, s, start_pos, end_pos)
343 self.lang = lang
344
345 def PrintHighlighted(self, out):
346 # type: (htm8.Output) -> None
347
348 # unescape before passing to pygments, which will escape
349 code = html_old.ToText(self.s, self.start_pos, self.end_pos)
350
351 lexer = pygments.lexers.get_lexer_by_name(self.lang)
352 formatter = pygments.formatters.HtmlFormatter()
353
354 highlighted = pygments.highlight(code, lexer, formatter)
355 out.Print(highlighted)
356
357
358def SimpleHighlightCode(s):
359 # type: (str) -> str
360 """Simple highlighting for test/shell-vs-shell.sh."""
361
362 f = StringIO()
363 out = htm8.Output(s, f)
364
365 tag_lexer = html_old.TagLexer(s)
366
367 pos = 0
368
369 it = html_old.ValidTokens(s)
370
371 while True:
372 try:
373 tok_id, end_pos = next(it)
374 except StopIteration:
375 break
376
377 if tok_id == h8_id.StartTag:
378
379 tag_lexer.Reset(pos, end_pos)
380 if tag_lexer.GetTagName() == 'pre':
381 pre_start_pos = pos
382 pre_end_pos = end_pos
383
384 slash_pre_right, slash_pre_right = \
385 html_old.ReadUntilEndTag(it, tag_lexer, 'pre')
386
387 out.PrintUntil(pre_end_pos)
388
389 # Using ShPromptPlugin because it does the comment highlighting we want!
390 plugin = ShPromptPlugin(s, pre_start_pos, slash_pre_right)
391 plugin.PrintHighlighted(out)
392
393 out.SkipTo(slash_pre_right)
394
395 pos = end_pos
396
397 out.PrintTheRest()
398
399 return f.getvalue()
400
401
402CSS_CLASS_RE = re.compile(
403 r'''
404 language-chapter-links-
405 ([a-z0-9-]+) # chapter name
406 (?:_(\d+))? # optional linkify_stop_col
407 ''', re.VERBOSE)
408
409
410def HighlightCode(s, default_highlighter, debug_out=None):
411 # type: (str, Optional[Any], Optional[List]) -> str
412 """
413 Algorithm:
414 1. Collect what's inside <pre><code> ...
415 2. Then read lines with ShPromptPlugin.
416 3. If the line looks like a shell prompt and command, highlight them with
417 <span>
418 """
419 if debug_out is None:
420 debug_out = []
421
422 f = StringIO()
423 out = htm8.Output(s, f)
424
425 tag_lexer = html_old.TagLexer(s)
426
427 pos = 0
428
429 it = html_old.ValidTokens(s)
430
431 while True:
432 try:
433 tok_id, end_pos = next(it)
434 except StopIteration:
435 break
436
437 if tok_id == h8_id.StartTag:
438
439 tag_lexer.Reset(pos, end_pos)
440 if tag_lexer.GetTagName() == 'pre':
441 pre_start_pos = pos
442 pos = end_pos
443
444 try:
445 tok_id, end_pos = next(it)
446 except StopIteration:
447 break
448
449 tag_lexer.Reset(pos, end_pos)
450 if (tok_id == h8_id.StartTag and
451 tag_lexer.GetTagName() == 'code'):
452
453 css_class = tag_lexer.GetAttrRaw('class')
454 code_start_pos = end_pos
455
456 if css_class is None:
457 slash_code_left, slash_code_right = \
458 html_old.ReadUntilEndTag(it, tag_lexer, 'code')
459
460 if default_highlighter is not None:
461 # TODO: Refactor this to remove duplication with
462 # language-{sh-prompt,oil-sh} below
463
464 # oil-sh for compatibility
465 if default_highlighter in ('sh-prompt', 'oils-sh',
466 'oil-sh'):
467 out.PrintUntil(code_start_pos)
468
469 # Using ShPromptPlugin because it does the comment highlighting
470 # we want!
471 plugin = ShPromptPlugin(
472 s, code_start_pos, slash_code_left)
473 plugin.PrintHighlighted(out)
474
475 out.SkipTo(slash_code_left)
476 else:
477 raise RuntimeError(
478 'Unknown default highlighter %r' %
479 default_highlighter)
480
481 elif css_class.startswith('language'):
482 slash_code_left, slash_code_right = \
483 html_old.ReadUntilEndTag(it, tag_lexer, 'code')
484
485 if css_class == 'language-none':
486 # Allow ```none
487 pass
488
489 elif css_class in ('language-sh-prompt',
490 'language-oil-sh',
491 'language-oils-sh'):
492 # Here's we're KEEPING the original <pre><code>
493 # Print everything up to and including <pre><code language="...">
494 out.PrintUntil(code_start_pos)
495
496 plugin = ShPromptPlugin(s, code_start_pos,
497 slash_code_left)
498 plugin.PrintHighlighted(out)
499
500 out.SkipTo(slash_code_left)
501
502 elif css_class == 'language-ysh':
503 # TODO: Write an Oil syntax highlighter.
504 pass
505
506 elif css_class.startswith('language-chapter-links-'):
507 m = CSS_CLASS_RE.match(css_class)
508 assert m is not None, css_class
509
510 #log('%s GROUPS %s', css_class, m.groups())
511 chapter, num_str = m.groups()
512 if num_str is not None:
513 linkify_stop_col = int(num_str)
514 else:
515 linkify_stop_col = -1
516
517 out.PrintUntil(code_start_pos)
518
519 plugin = HelpTopicsPlugin(s, code_start_pos,
520 slash_code_left, chapter,
521 linkify_stop_col)
522
523 block_debug_info = plugin.PrintHighlighted(out)
524
525 # e.g. these are links to cmd-lang within a block in toc-ysh
526 chap_block = {
527 'to_chap': chapter,
528 'lines': block_debug_info
529 }
530 debug_out.append(chap_block)
531
532 out.SkipTo(slash_code_left)
533
534 else: # language-*: Use Pygments
535 if pygments is None:
536 log("Warning: Couldn't import pygments, so skipping syntax highlighting"
537 )
538 continue
539
540 # We REMOVE the original <pre><code> because
541 # Pygments gives you a <pre> already
542
543 # We just read closing </code>, and the next one
544 # should be </pre>.
545 try:
546 tok_id, end_pos = next(it)
547 except StopIteration:
548 break
549 tag_lexer.Reset(slash_code_right, end_pos)
550 assert tok_id == h8_id.EndTag, tok_id
551 assert (tag_lexer.GetTagName() == 'pre'
552 ), tag_lexer.GetTagName()
553 slash_pre_right = end_pos
554
555 out.PrintUntil(pre_start_pos)
556
557 lang = css_class[len('language-'):]
558 plugin = PygmentsPlugin(s, code_start_pos,
559 slash_code_left, lang)
560 plugin.PrintHighlighted(out)
561
562 out.SkipTo(slash_pre_right)
563 f.write('<!-- done pygments -->\n')
564
565 pos = end_pos
566
567 out.PrintTheRest()
568
569 return f.getvalue()
570
571
572def ExtractCode(s, f):
573 # type: (str, IO[str]) -> None
574 """Print code blocks to a plain text file.
575
576 So we can at least validate the syntax.
577
578 Similar to the algorithm code above:
579
580 1. Collect what's inside <pre><code> ...
581 2. Decode &amp; -> &,e tc. and return it
582 """
583 out = htm8.Output(s, f)
584 tag_lexer = html_old.TagLexer(s)
585
586 block_num = 0
587 pos = 0
588 it = html_old.ValidTokens(s)
589
590 while True:
591 try:
592 tok_id, end_pos = next(it)
593 except StopIteration:
594 break
595
596 if tok_id == h8_id.StartTag:
597 tag_lexer.Reset(pos, end_pos)
598 if tag_lexer.GetTagName() == 'pre':
599 pre_start_pos = pos
600 pos = end_pos
601
602 try:
603 tok_id, end_pos = next(it)
604 except StopIteration:
605 break
606
607 tag_lexer.Reset(pos, end_pos)
608 if (tok_id == h8_id.StartTag and
609 tag_lexer.GetTagName() == 'code'):
610
611 css_class = tag_lexer.GetAttrRaw('class')
612 # Skip code blocks that look like ```foo
613 # Usually we use 'oil-sh' as the default_highlighter, and
614 # all those code blocks should be extracted. TODO: maybe
615 # this should be oil-language?
616 if css_class is None:
617 code_start_pos = end_pos
618
619 out.SkipTo(code_start_pos)
620 out.Print('# block %d' % block_num)
621 out.Print('\n')
622
623 slash_code_left, slash_code_right = \
624 html_old.ReadUntilEndTag(it, tag_lexer, 'code')
625
626 text = html_old.ToText(s, code_start_pos,
627 slash_code_left)
628 out.SkipTo(slash_code_left)
629
630 out.Print(text)
631 out.Print('\n')
632
633 block_num += 1
634
635 pos = end_pos
636
637 #out.PrintTheRest()
638
639
640class ShellSession(object):
641 """
642 TODO: Pass this to HighlightCode as a plugin
643
644 $ x=one
645 $ echo $x
646 $ echo two
647
648 Becomes
649
650 $ x=one
651 $ echo $x
652 one
653 $ echo two
654 two
655
656 And then you will have
657 blog/2019/12/_shell_session/
658 $hash1-stdout.txt
659 $hash2-stdout.txt
660
661 It hashes the command with md5 and then brings it back.
662 If the file already exists then it doesn't run it again.
663 You can delete the file to redo it.
664
665 TODO: write a loop that reads one line at a time, writes, it, then reads
666 output from bash.
667 Use the Lines iterator to get lines.
668 For extra credit, you can solve the PS2 problem? That's easily done with
669 Oil's parser.
670 """
671
672 def __init__(self, shell_exe, cache_dir):
673 # type: (str, str) -> None
674 """
675 Args:
676 shell_exe: sh, bash, osh, or oil. Use the one in the $PATH by default.
677 cache_dir: ~/git/oilshell/oilshell.org/blog/2019/12/session/
678 """
679 self.shell_exe = shell_exe
680 self.cache_dir = cache_dir
681
682 def PrintHighlighted(self, s, start_pos, end_pos, out):
683 # type: (str, int, int, htm8.Output) -> None
684 """
685 Args:
686 s: an HTML string.
687 """
688 pass
689
690
691def main(argv):
692 # type: (List[str]) -> None
693 action = argv[1]
694
695 if action == 'highlight':
696 # for test/shell-vs-shell.sh
697
698 html = sys.stdin.read()
699 out = SimpleHighlightCode(html)
700 print(out)
701
702 else:
703 raise RuntimeError('Invalid action %r' % action)
704
705
706if __name__ == '__main__':
707 main(sys.argv)