OILS / core / alloc.py View on Github | oils.pub

221 lines, 100 significant
1"""
2alloc.py - Arena manages SourceLine and Token instances (could rename)
3"""
4
5from _devbuild.gen.syntax_asdl import source_t, Token, SourceLine, loc
6from asdl import runtime
7from core import error
8from mycpp.mylib import log
9
10from typing import List, Dict, Any
11
12_ = log
13
14
15class ctx_SourceCode(object):
16
17 def __init__(self, arena, src):
18 # type: (Arena, source_t) -> None
19 arena.PushSource(src)
20 self.arena = arena
21
22 def __enter__(self):
23 # type: () -> None
24 pass
25
26 def __exit__(self, type, value, traceback):
27 # type: (Any, Any, Any) -> None
28 self.arena.PopSource()
29
30
31class Arena(object):
32 """Manages source_t, SourceLine, Token."""
33
34 def __init__(self, save_tokens=False):
35 # type: (bool) -> None
36
37 self.save_tokens = save_tokens
38
39 # indexed by span_id
40 self.tokens = [] # type: List[Token]
41 self.num_tokens = 0
42
43 # Only used in tools
44 self.span_id_lookup = {} # type: Dict[Token, int]
45
46 # All lines that haven't been discarded. For LST formatting.
47 self.lines_list = [] # type: List[SourceLine]
48
49 # reuse these instances in many line_span instances
50 self.source_instances = [] # type: List[source_t]
51
52 def SaveTokens(self):
53 # type: () -> None
54 """
55 Used by --tool X. Do we need LosslessArena?
56 """
57 self.save_tokens = True
58
59 def PushSource(self, src):
60 # type: (source_t) -> None
61 self.source_instances.append(src)
62
63 def PopSource(self):
64 # type: () -> None
65 self.source_instances.pop()
66
67 def AddLine(self, line, line_num):
68 # type: (str, int) -> SourceLine
69 """Save a physical line and return a line_id for later retrieval.
70
71 The line number is 1-based.
72 """
73 src_line = SourceLine(line_num, line, self.source_instances[-1])
74 self.lines_list.append(src_line)
75 return src_line
76
77 def DiscardLines(self):
78 # type: () -> None
79 """Remove references ot lines we've accumulated.
80
81 - This makes the linear search in SnipCodeString() shorter.
82 - It removes the ARENA's references to all lines. The TOKENS still
83 reference some lines.
84 """
85 #log("discarding %d lines", len(self.lines_list))
86 del self.lines_list[:]
87
88 def SnipCodeString(self, left, right, inclusive=True):
89 # type: (Token, Token, bool) -> str
90 """Return the code string between left and right tokens, INCLUSIVE.
91
92 Used for ALIAS expansion, which happens in the PARSER.
93
94 The argument to aliases can span multiple lines, like this:
95
96 $ myalias '1 2 3'
97 """
98 if inclusive:
99 ileft = left.col
100 iright = right.col + right.length
101 else:
102 ileft = left.col + left.length
103 iright = right.col
104
105 pieces = [] # type: List[str]
106 if not inclusive:
107 pieces.append(' ' * ileft)
108
109 if left.line == right.line:
110 for li in self.lines_list:
111 if li == left.line:
112 pieces.append(li.content[ileft:iright])
113 return ''.join(pieces)
114
115 saving = False
116 found_left = False
117 found_right = False
118 for li in self.lines_list:
119 if li == left.line:
120 found_left = True
121 saving = True
122
123 # Save everything after the left token
124 piece = li.content[ileft:]
125 pieces.append(piece)
126 #log(' %r', piece)
127 continue
128
129 if li == right.line:
130 found_right = True
131
132 piece = li.content[:iright]
133 pieces.append(piece)
134 #log(' %r', piece)
135
136 saving = False
137 break
138
139 if saving:
140 pieces.append(li.content)
141 #log(' %r', li.content)
142
143 assert found_left, "Couldn't find left token"
144 assert found_right, "Couldn't find right token"
145 return ''.join(pieces)
146
147 def NewToken(self, id_, col, length, src_line):
148 # type: (int, int, int, SourceLine) -> Token
149
150 if length >= 65536:
151 raise error.Parse(
152 '', # ignored message
153 loc.TokenTooLong(src_line, id_, length, col))
154
155 tok = Token(id_, length, col, src_line, None)
156 if self.save_tokens:
157 span_id = self.num_tokens
158 self.num_tokens += 1
159
160 self.tokens.append(tok)
161 self.span_id_lookup[tok] = span_id
162 return tok
163
164 def UnreadOne(self):
165 # type: () -> None
166 """Reuse the last span ID."""
167 if self.save_tokens:
168 self.tokens.pop()
169 self.num_tokens -= 1
170
171 def GetToken(self, span_id):
172 # type: (int) -> Token
173 assert span_id != runtime.NO_SPID, span_id
174 assert span_id < len(self.tokens), \
175 'Span ID out of range: %d is greater than %d' % (span_id, len(self.tokens))
176 return self.tokens[span_id]
177
178 def GetSpanId(self, tok):
179 # type: (Token) -> int
180 """Given a Token, returns its a sequence number"""
181 #return tok.span_id
182 #return -1
183 assert tok in self.span_id_lookup
184 return self.span_id_lookup[tok]
185
186 def LastSpanId(self):
187 # type: () -> int
188 """Return one past the last span ID."""
189 return len(self.tokens)
190
191
192class LosslessArena(Arena):
193 """
194 TODO:
195
196 Has lossless invariant, for
197 --tool fmt
198 --tool ysh-ify
199
200 Retains all SourceLine and Token
201
202 Somehow disallow re-parsing? Is that equivalent to ctx_SourceCode()?
203 """
204 pass
205
206
207class DynamicArena(Arena):
208 """
209 For batch and interactive shell
210
211 TODO:
212 - Test that SourceLine and Token are GC'd
213
214 However, it should support:
215 - SnipCodeString() for aliases
216 - SnipCodeBlock() for Hay
217
218 Neither of those are necessary in the LosslessArena? We might have
219 different utilities there.
220 """
221 pass