| 1 | """
|
| 2 | pass_state.py
|
| 3 | """
|
| 4 | from __future__ import print_function
|
| 5 |
|
| 6 | import os
|
| 7 | import re
|
| 8 | import subprocess
|
| 9 | from collections import defaultdict
|
| 10 |
|
| 11 | from mypy.types import Type
|
| 12 | from mypy.nodes import Expression
|
| 13 |
|
| 14 | from mycpp.util import SymbolToString, log, SplitPyName, SymbolPath
|
| 15 |
|
| 16 | from typing import Optional, List, Dict, Tuple, Set, Any
|
| 17 |
|
| 18 | _ = log
|
| 19 |
|
| 20 |
|
| 21 | class member_t(object):
|
| 22 | pass
|
| 23 |
|
| 24 |
|
| 25 | class ModuleMember(member_t):
|
| 26 | """
|
| 27 | A member of a Python module.
|
| 28 |
|
| 29 | e.g. core.state.Mem => core::state::Mem
|
| 30 | """
|
| 31 |
|
| 32 | def __init__(self, module_path: SymbolPath, member: str) -> None:
|
| 33 | self.module_path = module_path
|
| 34 | self.member = member
|
| 35 |
|
| 36 |
|
| 37 | class StaticClassMember(member_t):
|
| 38 | """
|
| 39 | A static member of an object. Usually a a method like an alternative constructor.
|
| 40 |
|
| 41 | e.g. runtime_asdl.Cell.CreateNull() => runtime_asdl::Cell::CreateNull()
|
| 42 | """
|
| 43 |
|
| 44 | def __init__(self, type_name: str, member: str) -> None:
|
| 45 | self.type_name = type_name
|
| 46 | self.member = member
|
| 47 |
|
| 48 |
|
| 49 | class HeapObjectMember(member_t):
|
| 50 | """
|
| 51 | A member of a heap-allocated object.
|
| 52 |
|
| 53 | e.g foo.empty() => foo->empty()
|
| 54 | """
|
| 55 |
|
| 56 | def __init__(self, object_expr: Expression, object_type: Type,
|
| 57 | member: str) -> None:
|
| 58 | self.object_expr = object_expr
|
| 59 | self.object_type = object_type
|
| 60 | self.member = member
|
| 61 |
|
| 62 |
|
| 63 | class StackObjectMember(member_t):
|
| 64 | """
|
| 65 | A member of a stack-allocated object.
|
| 66 |
|
| 67 | e.g foo.empty() => foo.empty()
|
| 68 | """
|
| 69 |
|
| 70 | def __init__(self, object_expr: Expression, object_type: Type,
|
| 71 | member: str) -> None:
|
| 72 | self.object_expr = object_expr
|
| 73 | self.object_type = object_type
|
| 74 | self.member = member
|
| 75 |
|
| 76 |
|
| 77 | class Virtual(object):
|
| 78 | """Calculate which C++ methods need the virtual keyword.
|
| 79 |
|
| 80 | See unit test for example usage.
|
| 81 | """
|
| 82 |
|
| 83 | def __init__(self) -> None:
|
| 84 | self.methods: Dict[SymbolPath, List[str]] = defaultdict(list)
|
| 85 | self.subclasses: Dict[SymbolPath, List[SymbolPath]] = defaultdict(list)
|
| 86 | self.virtuals: Dict[Tuple[SymbolPath, str], Optional[Tuple[SymbolPath,
|
| 87 | str]]] = {}
|
| 88 | self.has_vtable: Dict[SymbolPath, bool] = {}
|
| 89 | self.can_reorder_fields: Dict[SymbolPath, bool] = {}
|
| 90 |
|
| 91 | # _Executor -> vm::_Executor
|
| 92 | self.base_class_unique: Dict[str, SymbolPath] = {}
|
| 93 |
|
| 94 | # These are called on the Forward Declare pass
|
| 95 | def OnMethod(self, class_name: SymbolPath, method_name: str) -> None:
|
| 96 | #log('VIRTUAL OnMethod %s %s', class_name, method_name)
|
| 97 |
|
| 98 | # __init__ and so forth don't count
|
| 99 | if method_name.startswith('__') and method_name.endswith('__'):
|
| 100 | return
|
| 101 |
|
| 102 | self.methods[class_name].append(method_name)
|
| 103 |
|
| 104 | def OnSubclass(self, base_class: SymbolPath, subclass: SymbolPath) -> None:
|
| 105 | #log('VIRTUAL OnSubclass base %s -> %s', base_class, subclass)
|
| 106 | if len(base_class) > 1:
|
| 107 | # Hack for
|
| 108 | #
|
| 109 | # class _Executor: pass
|
| 110 | # versus
|
| 111 | # class MyExecutor(vm._Executor): pass
|
| 112 | base_key = base_class[-1]
|
| 113 |
|
| 114 | # Fail if we have two base classes in different namespaces with the same
|
| 115 | # name.
|
| 116 | if base_key in self.base_class_unique:
|
| 117 | # Make sure we don't have collisions
|
| 118 | assert (self.base_class_unique[base_key] == base_class or
|
| 119 | base_class
|
| 120 | in self.subclasses[self.base_class_unique[base_key]]
|
| 121 | ), base_class
|
| 122 | else:
|
| 123 | self.base_class_unique[base_key] = base_class
|
| 124 |
|
| 125 | self.subclasses[base_class].append(subclass)
|
| 126 |
|
| 127 | def Calculate(self) -> None:
|
| 128 | """Call this after the forward declare pass."""
|
| 129 | for base_class, subclasses in self.subclasses.items():
|
| 130 | self.can_reorder_fields[base_class] = False
|
| 131 |
|
| 132 | for subclass in subclasses:
|
| 133 | self.can_reorder_fields[subclass] = False
|
| 134 |
|
| 135 | b_methods = self.methods[base_class]
|
| 136 | s_methods = self.methods[subclass]
|
| 137 | overlapping = set(b_methods) & set(s_methods)
|
| 138 | for method in overlapping:
|
| 139 | self.virtuals[(base_class, method)] = None
|
| 140 | self.virtuals[(subclass, method)] = (base_class, method)
|
| 141 | if overlapping:
|
| 142 | self.has_vtable[base_class] = True
|
| 143 | self.has_vtable[subclass] = True
|
| 144 |
|
| 145 | # These is called on the Decl pass
|
| 146 | def IsVirtual(self, class_name: SymbolPath, method_name: str) -> bool:
|
| 147 | return (class_name, method_name) in self.virtuals
|
| 148 |
|
| 149 | def HasVTable(self, class_name: SymbolPath) -> bool:
|
| 150 | return class_name in self.has_vtable
|
| 151 |
|
| 152 | def CanReorderFields(self, class_name: SymbolPath) -> bool:
|
| 153 | if class_name in self.can_reorder_fields:
|
| 154 | return self.can_reorder_fields[class_name]
|
| 155 | else:
|
| 156 | return True # by default they can be reordered
|
| 157 |
|
| 158 |
|
| 159 | def SymbolPathToReference(func: str, p: SymbolPath) -> str:
|
| 160 | if len(p) > 1:
|
| 161 | return '$ObjectMember({}, {})'.format(
|
| 162 | SymbolToString(p[:-1], delim='.'), p[-1])
|
| 163 |
|
| 164 | return '$LocalVariable({}, {})'.format(func, p[0])
|
| 165 |
|
| 166 |
|
| 167 | class Fact(object):
|
| 168 | """
|
| 169 | An abstract fact. These can be used to build up datalog programs.
|
| 170 | """
|
| 171 |
|
| 172 | def __init__(self) -> None:
|
| 173 | pass
|
| 174 |
|
| 175 | @staticmethod
|
| 176 | def name() -> str:
|
| 177 | raise NotImplementedError()
|
| 178 |
|
| 179 | def Generate(self, func: str, statement: int) -> str:
|
| 180 | raise NotImplementedError()
|
| 181 |
|
| 182 |
|
| 183 | class FunctionCall(Fact):
|
| 184 |
|
| 185 | def __init__(self, callee: str) -> None:
|
| 186 | self.callee = callee
|
| 187 |
|
| 188 | @staticmethod
|
| 189 | def name() -> str:
|
| 190 | return 'call'
|
| 191 |
|
| 192 | def Generate(self, func: str, statement: int) -> str:
|
| 193 | return '{}\t{}\t{}\n'.format(func, statement, self.callee)
|
| 194 |
|
| 195 |
|
| 196 | class Definition(Fact):
|
| 197 | """
|
| 198 | The definition of a variable. This corresponds to an allocation.
|
| 199 | """
|
| 200 |
|
| 201 | def __init__(self, ref: SymbolPath, obj: str) -> None:
|
| 202 | self.ref = ref
|
| 203 | self.obj = obj
|
| 204 |
|
| 205 | @staticmethod
|
| 206 | def name() -> str:
|
| 207 | return 'assign'
|
| 208 |
|
| 209 | def Generate(self, func: str, statement: int) -> str:
|
| 210 | return '{}\t{}\t{}\t{}\n'.format(func, statement,
|
| 211 | SymbolPathToReference(func, self.ref),
|
| 212 | self.obj)
|
| 213 |
|
| 214 |
|
| 215 | class Assignment(Fact):
|
| 216 | """
|
| 217 | The assignment of one variable or object member to another.
|
| 218 | """
|
| 219 |
|
| 220 | def __init__(self, lhs: SymbolPath, rhs: SymbolPath) -> None:
|
| 221 | self.lhs = lhs
|
| 222 | self.rhs = rhs
|
| 223 |
|
| 224 | @staticmethod
|
| 225 | def name() -> str:
|
| 226 | return 'assign'
|
| 227 |
|
| 228 | def Generate(self, func: str, statement: int) -> str:
|
| 229 | return '{}\t{}\t{}\t$Ref({})\n'.format(
|
| 230 | func, statement, SymbolPathToReference(func, self.lhs),
|
| 231 | SymbolPathToReference(func, self.rhs))
|
| 232 |
|
| 233 |
|
| 234 | class Use(Fact):
|
| 235 | """
|
| 236 | The use of a reference.
|
| 237 |
|
| 238 | In the last assignment below, we would emit Use(foo) and Use(x). We would,
|
| 239 | however, not emit Use(foo.a) since it is an lvalue and would instead be
|
| 240 | covered by the Assign fact. Similarly, the first two assignments do not
|
| 241 | generate Use facts.
|
| 242 |
|
| 243 | foo = Foo()
|
| 244 | x = Bar()
|
| 245 | foo.a = x
|
| 246 |
|
| 247 | Any time a reference appears in an expression (or expression-statement) it
|
| 248 | will be considered used.
|
| 249 |
|
| 250 | some_function(a) => Use(a)
|
| 251 | a + b => Use(a), Use(b)
|
| 252 | print(thing.dict[key]) => Use(thing), Use(thing.dict), Use(key)
|
| 253 | obj.func() => Use(obj)
|
| 254 | """
|
| 255 |
|
| 256 | def __init__(self, ref: SymbolPath) -> None:
|
| 257 | self.ref = ref
|
| 258 |
|
| 259 | @staticmethod
|
| 260 | def name() -> str:
|
| 261 | return 'use'
|
| 262 |
|
| 263 | def Generate(self, func: str, statement: int) -> str:
|
| 264 | return '{}\t{}\t{}\n'.format(func, statement,
|
| 265 | SymbolPathToReference(func, self.ref))
|
| 266 |
|
| 267 |
|
| 268 | class Bind(Fact):
|
| 269 | """
|
| 270 | Binding a reference to a positional function parameter.
|
| 271 | """
|
| 272 |
|
| 273 | def __init__(self, ref: SymbolPath, callee: SymbolPath,
|
| 274 | arg_pos: int) -> None:
|
| 275 | self.ref = ref
|
| 276 | self.callee = callee
|
| 277 | self.arg_pos = arg_pos
|
| 278 |
|
| 279 | @staticmethod
|
| 280 | def name() -> str:
|
| 281 | return 'bind'
|
| 282 |
|
| 283 | def Generate(self, func: str, statement: int) -> str:
|
| 284 | return '{}\t{}\t{}\t{}\t{}\n'.format(
|
| 285 | func, statement, SymbolPathToReference(func, self.ref),
|
| 286 | SymbolToString(self.callee, delim='.'), self.arg_pos)
|
| 287 |
|
| 288 |
|
| 289 | class ControlFlowGraph(object):
|
| 290 | """
|
| 291 | A simple control-flow graph.
|
| 292 |
|
| 293 | Every statement in the program is represented as a node in a graph with
|
| 294 | unique a numeric ID. Control flow is represented as directed edges through
|
| 295 | the graph. Loops can introduce back-edges. Every node in the graph will
|
| 296 | satisfy at least one of the following conditions:
|
| 297 |
|
| 298 | - Its indegree is at least one.
|
| 299 |
|
| 300 | - Its outdegree is at least one.
|
| 301 |
|
| 302 | For simple linear graphs all you need is the AddStatement method. For more
|
| 303 | complex flows there is a set of context managers below to help simplify
|
| 304 | construction.
|
| 305 |
|
| 306 | - For branches-like statements (e.g. if- and try- statements) use
|
| 307 | CfgBranchContext. It will take care of the details associated with
|
| 308 | stitching the different branches to statements in the next statement.
|
| 309 |
|
| 310 | - For loops, use CfgLoopContext. It will take care of adding back-edges
|
| 311 | and connecting break statements to any statements that proceed the
|
| 312 | loop.
|
| 313 |
|
| 314 | - CfgBlockContext can be used for simple cases where you just want to
|
| 315 | track the beginning and end of a sequence of statements.
|
| 316 |
|
| 317 | Statements can carry annotations called facts, which are used as inputs to
|
| 318 | datalog programs to perform dataflow diffrent kinds of dataflow analyses.
|
| 319 | To annotate a statement, use the AddFact method with any object that
|
| 320 | implements the Fact interface.
|
| 321 |
|
| 322 | See the unit tests in pass_state_test.py and the mycpp phase in
|
| 323 | control_flow_pass.py for detailed examples of usage.
|
| 324 | """
|
| 325 |
|
| 326 | def __init__(self) -> None:
|
| 327 | self.statement_counter: int = 0
|
| 328 | self.edges: Set[Tuple[int, int]] = set({})
|
| 329 | self.block_stack: List[int] = []
|
| 330 | self.predecessors: Set[int] = set({})
|
| 331 | self.deadends: Set[int] = set({})
|
| 332 |
|
| 333 | # order doesn't actually matter here, but sets require elements to be
|
| 334 | # hashable
|
| 335 | self.facts: Dict[int, List[Fact]] = defaultdict(list)
|
| 336 |
|
| 337 | def AddEdge(self, pred: int, succ: int) -> None:
|
| 338 | """
|
| 339 | Add a directed edge from pred to succ. If pred is a deadend, its
|
| 340 | non-deadends will be used instead.
|
| 341 | """
|
| 342 | if pred in self.deadends:
|
| 343 | for w in [u for (u, v) in self.edges if v == pred]:
|
| 344 | self.AddEdge(w, succ)
|
| 345 | else:
|
| 346 | self.edges.add((pred, succ))
|
| 347 |
|
| 348 | def AddDeadend(self, statement: int) -> None:
|
| 349 | """
|
| 350 | Mark a statement as a dead-end (e.g. return or continue).
|
| 351 | """
|
| 352 | self.deadends.add(statement)
|
| 353 |
|
| 354 | def AddStatement(self) -> int:
|
| 355 | """
|
| 356 | Add a new statement and return its ID.
|
| 357 | """
|
| 358 | if len(self.predecessors) == 0:
|
| 359 | if len(self.block_stack):
|
| 360 | self.predecessors.add(self.block_stack[-1])
|
| 361 | else:
|
| 362 | self.predecessors.add(self.statement_counter)
|
| 363 |
|
| 364 | self.statement_counter += 1
|
| 365 | for pred in self.predecessors:
|
| 366 | self.AddEdge(pred, self.statement_counter)
|
| 367 |
|
| 368 | self.predecessors = set({})
|
| 369 |
|
| 370 | if len(self.block_stack):
|
| 371 | self.block_stack[-1] = self.statement_counter
|
| 372 |
|
| 373 | return self.statement_counter
|
| 374 |
|
| 375 | def AddFact(self, statement: int, fact: Fact) -> None:
|
| 376 | """
|
| 377 | Annotate a statement with a fact.
|
| 378 | """
|
| 379 | self.facts[statement].append(fact)
|
| 380 |
|
| 381 | def _PushBlock(self, begin: Optional[int] = None) -> int:
|
| 382 | """
|
| 383 | Start a block at the given statement ID. If a beginning statement isn't
|
| 384 | provided one will be created and its ID will be returend.
|
| 385 |
|
| 386 | Direct use of this function is discouraged. Consider using one of the
|
| 387 | block context managers below instead.
|
| 388 | """
|
| 389 | if begin is None:
|
| 390 | begin = self.AddStatement()
|
| 391 | else:
|
| 392 | self.predecessors.add(begin)
|
| 393 |
|
| 394 | self.block_stack.append(begin)
|
| 395 | return begin
|
| 396 |
|
| 397 | def _PopBlock(self) -> int:
|
| 398 | """
|
| 399 | Pop a block from the top of the stack and return the ID of the block's
|
| 400 | last statement.
|
| 401 |
|
| 402 | Direct use of this function is discouraged. Consider using one of the
|
| 403 | block context managers below instead.
|
| 404 | """
|
| 405 | assert len(self.block_stack)
|
| 406 | last = self.block_stack.pop()
|
| 407 | if len(self.block_stack) and last not in self.deadends:
|
| 408 | self.block_stack[-1] = last
|
| 409 |
|
| 410 | return last
|
| 411 |
|
| 412 |
|
| 413 | class CfgBlockContext(object):
|
| 414 | """
|
| 415 | Context manager to make dealing with things like with-statements easier.
|
| 416 | """
|
| 417 |
|
| 418 | def __init__(self,
|
| 419 | cfg: ControlFlowGraph,
|
| 420 | begin: Optional[int] = None) -> None:
|
| 421 | self.cfg = cfg
|
| 422 | if cfg is None:
|
| 423 | return
|
| 424 |
|
| 425 | self.entry = self.cfg._PushBlock(begin)
|
| 426 | self.exit = self.entry
|
| 427 |
|
| 428 | def __enter__(self) -> Optional['CfgBlockContext']:
|
| 429 | return self if self.cfg else None
|
| 430 |
|
| 431 | def __exit__(self, *args: Any) -> None:
|
| 432 | if not self.cfg:
|
| 433 | return
|
| 434 |
|
| 435 | self.exit = self.cfg._PopBlock()
|
| 436 |
|
| 437 |
|
| 438 | class CfgBranchContext(object):
|
| 439 | """
|
| 440 | Context manager to make dealing with if-else blocks easier.
|
| 441 | """
|
| 442 |
|
| 443 | def __init__(self, cfg: ControlFlowGraph, branch_point: int) -> None:
|
| 444 | self.cfg = cfg
|
| 445 | self.entry = branch_point
|
| 446 | self.exit = self.entry
|
| 447 | if cfg is None:
|
| 448 | return
|
| 449 |
|
| 450 | self.arms: List[CfgBranchContext] = []
|
| 451 | self.pushed = False
|
| 452 |
|
| 453 | def AddBranch(self, entry: Optional[int] = None) -> 'CfgBranchContext':
|
| 454 | if not self.cfg:
|
| 455 | return CfgBranchContext(None, None)
|
| 456 |
|
| 457 | self.arms.append(CfgBranchContext(self.cfg, entry or self.entry))
|
| 458 | self.cfg._PushBlock(self.arms[-1].entry)
|
| 459 | self.arms[-1].pushed = True
|
| 460 | return self.arms[-1]
|
| 461 |
|
| 462 | def __enter__(self) -> 'CfgBranchContext':
|
| 463 | return self
|
| 464 |
|
| 465 | def __exit__(self, *args: Any) -> None:
|
| 466 | if not self.cfg:
|
| 467 | return
|
| 468 |
|
| 469 | if self.pushed:
|
| 470 | self.exit = self.cfg._PopBlock()
|
| 471 |
|
| 472 | for arm in self.arms:
|
| 473 | if arm.exit not in self.cfg.deadends:
|
| 474 | self.cfg.predecessors.add(arm.exit)
|
| 475 |
|
| 476 |
|
| 477 | class CfgLoopContext(object):
|
| 478 | """
|
| 479 | Context manager to make dealing with loops easier.
|
| 480 | """
|
| 481 |
|
| 482 | def __init__(self,
|
| 483 | cfg: ControlFlowGraph,
|
| 484 | entry: Optional[int] = None) -> None:
|
| 485 | self.cfg = cfg
|
| 486 | self.breaks: Set[int] = set()
|
| 487 | if cfg is None:
|
| 488 | return
|
| 489 |
|
| 490 | self.entry = self.cfg._PushBlock(entry)
|
| 491 | self.exit = self.entry
|
| 492 |
|
| 493 | def AddBreak(self, statement: int) -> None:
|
| 494 | assert self.cfg
|
| 495 | self.breaks.add(statement)
|
| 496 | self.cfg.AddDeadend(statement)
|
| 497 |
|
| 498 | def AddContinue(self, statement: int) -> None:
|
| 499 | self.cfg.AddEdge(statement, self.entry)
|
| 500 | self.cfg.AddDeadend(statement)
|
| 501 |
|
| 502 | def __enter__(self) -> Optional['CfgLoopContext']:
|
| 503 | return self if self.cfg else None
|
| 504 |
|
| 505 | def __exit__(self, *args: Any) -> None:
|
| 506 | if not self.cfg:
|
| 507 | return
|
| 508 |
|
| 509 | self.exit = self.cfg._PopBlock()
|
| 510 | self.cfg.AddEdge(self.exit, self.entry)
|
| 511 | for pred in self.cfg.predecessors:
|
| 512 | self.cfg.AddEdge(pred, self.entry)
|
| 513 |
|
| 514 | # If we had any breaks, arm the predecessor set with the current
|
| 515 | # statement and the break statements.
|
| 516 | if len(self.breaks):
|
| 517 | if len(self.cfg.block_stack):
|
| 518 | self.cfg.predecessors.add(self.cfg.block_stack[-1])
|
| 519 | else:
|
| 520 | self.cfg.predecessors.add(self.cfg.statement_counter)
|
| 521 |
|
| 522 | for b in self.breaks:
|
| 523 | self.cfg.deadends.remove(b)
|
| 524 | self.cfg.predecessors.add(b)
|
| 525 |
|
| 526 |
|
| 527 | class StackRoots(object):
|
| 528 | """
|
| 529 | Output of the souffle stack roots solver.
|
| 530 | """
|
| 531 |
|
| 532 | def __init__(self, tuples: Set[Tuple[SymbolPath, SymbolPath]]) -> None:
|
| 533 | self.root_tuples = tuples
|
| 534 |
|
| 535 | def needs_root(self, func: SymbolPath, reference: SymbolPath) -> bool:
|
| 536 | """
|
| 537 | Returns true if the given reference should have a stack root.
|
| 538 | """
|
| 539 | return (func, reference) in self.root_tuples
|
| 540 |
|
| 541 |
|
| 542 | def DumpControlFlowGraphs(cfgs: Dict[SymbolPath, ControlFlowGraph],
|
| 543 | facts_dir: str = '_tmp/mycpp-facts') -> None:
|
| 544 | """
|
| 545 | Dump the given control flow graphs and associated facts into the given
|
| 546 | directory as text files that can be consumed by datalog.
|
| 547 | """
|
| 548 | edge_facts = '{}/cf_edge.facts'.format(facts_dir)
|
| 549 |
|
| 550 | os.makedirs(facts_dir, exist_ok=True)
|
| 551 | # Open files for all facts that we might emit even if we don't end up having
|
| 552 | # anything to write to them. Souffle will complain if it can't find the file
|
| 553 | # for anything marked as an input.
|
| 554 | fact_files = {
|
| 555 | fact_type.name():
|
| 556 | open('{}/{}.facts'.format(facts_dir, fact_type.name()), 'w')
|
| 557 | for fact_type in Fact.__subclasses__()
|
| 558 | }
|
| 559 | with open(edge_facts, 'w') as cfg_f:
|
| 560 | for func, cfg in sorted(cfgs.items()):
|
| 561 | joined = SymbolToString(func, delim='.')
|
| 562 | for (u, v) in sorted(cfg.edges):
|
| 563 | cfg_f.write('{}\t{}\t{}\n'.format(joined, u, v))
|
| 564 |
|
| 565 | for statement, facts in sorted(cfg.facts.items()):
|
| 566 | for fact in facts: # already sorted temporally
|
| 567 | fact_f = fact_files[fact.name()]
|
| 568 | fact_f.write(fact.Generate(joined, statement))
|
| 569 |
|
| 570 | for f in fact_files.values():
|
| 571 | f.close()
|
| 572 |
|
| 573 |
|
| 574 | def ComputeMinimalStackRoots(cfgs: Dict[SymbolPath, ControlFlowGraph],
|
| 575 | souffle_dir: str = '_tmp') -> StackRoots:
|
| 576 | """
|
| 577 | Run the the souffle stack roots solver and translate its output in a format
|
| 578 | that can be queried by cppgen_pass.
|
| 579 | """
|
| 580 | facts_dir = '{}/facts'.format(souffle_dir)
|
| 581 | os.makedirs(facts_dir)
|
| 582 | output_dir = '{}/outputs'.format(souffle_dir)
|
| 583 | os.makedirs(output_dir)
|
| 584 | DumpControlFlowGraphs(cfgs, facts_dir=facts_dir)
|
| 585 |
|
| 586 | subprocess.check_call([
|
| 587 | '_bin/datalog/dataflow',
|
| 588 | '-F',
|
| 589 | facts_dir,
|
| 590 | '-D',
|
| 591 | output_dir,
|
| 592 | ])
|
| 593 |
|
| 594 | tuples: Set[Tuple[SymbolPath, SymbolPath]] = set({})
|
| 595 | with open('{}/stack_root_vars.tsv'.format(output_dir), 'r') as roots_f:
|
| 596 | pat = re.compile(r'\$(.*)\((.*), (.*)\)')
|
| 597 | for line in roots_f:
|
| 598 | function, ref = line.split('\t')
|
| 599 | m = pat.match(ref)
|
| 600 | assert m.group(1) in ('LocalVariable', 'ObjectMember')
|
| 601 | if m.group(1) == 'LocalVariable':
|
| 602 | _, ref_func, var_name = m.groups()
|
| 603 | assert ref_func == function
|
| 604 | tuples.add((SplitPyName(function), (var_name, )))
|
| 605 |
|
| 606 | if m.group(1) == 'ObjectMember':
|
| 607 | _, base_obj, member_name = m.groups()
|
| 608 | tuples.add((SplitPyName(function),
|
| 609 | SplitPyName(base_obj) + (member_name, )))
|
| 610 |
|
| 611 | return StackRoots(tuples)
|