| 1 | # HTM8 - An Easy Subset of HTML5
|
| 2 |
|
| 3 | module htm8
|
| 4 | {
|
| 5 |
|
| 6 | h8_id =
|
| 7 | Decl
|
| 8 |
|
| 9 | # CommentBegin, ProcessingBegin, CDataBegin are "pseudo-tokens", not visible
|
| 10 | | Comment | CommentBegin
|
| 11 | | Processing | ProcessingBegin
|
| 12 | | CData | CDataBegin
|
| 13 |
|
| 14 | | StartTag | StartEndTag | EndTag
|
| 15 |
|
| 16 | | DecChar | HexChar | CharEntity
|
| 17 |
|
| 18 | | RawData | HtmlCData
|
| 19 |
|
| 20 | | BadAmpersand | BadGreaterThan | BadLessThan
|
| 21 |
|
| 22 | | Invalid
|
| 23 | | EndOfStream
|
| 24 |
|
| 25 | # Returned by QUOTED_VALUE_LEX, in addition to DecChar, HexChar, etc.
|
| 26 | | DoubleQuote
|
| 27 | | SingleQuote
|
| 28 | generate [no_namespace_suffix] # cosmetic: call it h8_id, not h8_id_e
|
| 29 |
|
| 30 | # "enum" returned by lexer
|
| 31 | attr_name =
|
| 32 | Ok # Found an attribute
|
| 33 | | Done # No more attributes
|
| 34 | | Invalid # e.g. <a !>
|
| 35 | generate [no_namespace_suffix]
|
| 36 |
|
| 37 | # "enum" returned by lexer
|
| 38 | h8_val_id =
|
| 39 | UnquotedVal # a=foo
|
| 40 | | DoubleQuote # a="foo"
|
| 41 | | SingleQuote # a='foo'
|
| 42 | | NoMatch # anything else
|
| 43 | generate [no_namespace_suffix]
|
| 44 |
|
| 45 | # Returned by attr value parser
|
| 46 | attr_value =
|
| 47 | Missing # <a missing>
|
| 48 | | Empty # <a empty= >
|
| 49 | | Unquoted # <a unquoted=1 >
|
| 50 | | DoubleQuoted # <a quoted="1" >
|
| 51 | | SingleQuoted # <a quoted='1' >
|
| 52 | # No end of stream here, it will just be Missing, and the next attr_name will fail
|
| 53 |
|
| 54 | #
|
| 55 | # OLD
|
| 56 | #
|
| 57 |
|
| 58 | h8_tag_id =
|
| 59 | TagName
|
| 60 | | AttrName
|
| 61 | | UnquotedValue | QuotedValue | MissingValue
|
| 62 | generate [no_namespace_suffix]
|
| 63 |
|
| 64 | # This API is maybe more natural, but has more allocations
|
| 65 | #
|
| 66 | # tag_lexer.Read()
|
| 67 |
|
| 68 | # # Unquoted, Quoted, Empty, Missing
|
| 69 | # (int tag_name_start, int tag_name_end, attr_value)
|
| 70 |
|
| 71 | # attr_value =
|
| 72 | # Missing # <a missing> - tag_name_end adds =""
|
| 73 |
|
| 74 | # | Empty (int equals_pos) # <a empty=>
|
| 75 |
|
| 76 | # # <a unquoted=foo>
|
| 77 | # # the first one has end_pos 0, and can be h8_id.ZeroPosition?
|
| 78 | # | Unquoted (List[Tuple[h8_id, end_pos]] tokens)
|
| 79 |
|
| 80 | # # <a quoted="foo">
|
| 81 | # | Quoted (List[Tuple[h8_id, end_pos]] tokens)
|
| 82 |
|
| 83 | # # Rather than raise an exception, we should have this for bad data
|
| 84 | # | Invalid(int pos)
|
| 85 | }
|
| 86 |
|