/* This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this file, * You can obtain one at http://mozilla.org/MPL/2.0/. */ /* -*- indent-tabs-mode: nil; js-indent-level: 4 -*- */ // Utility code for traversing the JSON data structures produced by sixgill. "use strict"; var TRACING = false; // When edge.Kind == "Pointer", these are the meanings of the edge.Reference field. var PTR_POINTER = 0; var PTR_REFERENCE = 1; var PTR_RVALUE_REF = 2; // A class wrapping information about a single function, plus access to global // information. var FunctionFlowGraph = class FunctionFlowGraph { constructor({ name, bodies, typeInfo }) { // Full name ("$") of the function. assert(name); this.name = name; // The loop bodies within this function. There will always be 1 main // body at the end of the list, and additional bodies for all the loops. // (It's not a simple 1-1 relationship; there could be nested loops and // artificial loops created to produce a reducible control flow graph.) // https://firefox-source-docs.mozilla.org/js/HazardAnalysis/CFG.html assert(bodies); this.bodies = bodies; // typeInfo is global data, not tied to this function, but we only want // to be passing around one thing everywhere. assert(typeInfo); this.typeInfo = typeInfo; } mainBody() { // Loops come first, then the main body. return this.bodies.at(-1); } forEachBody(f) { this.bodies.forEach(f); } // Look up a variable declaration with a Variable object from the CFG. Note // that the body does not matter, because for some strange reason all the // variable declarations are duplicated across all bodies. If the same name // is used in different bodies (or within the same body in different // scopes), the variable name will be disambiguated by appending ":". lookupDecl(variable) { return this.mainBody().DefineVariable?.find( decl => sameVariable(decl.Variable, variable) ); } forEachDecl(f) { this.mainBody().DefineVariable?.forEach(f); } getAttrsForTypeName(typeName) { let attrs = 0; if (typeName in this.typeInfo.GCSuppressors) { attrs = attrs | ATTR_GC_SUPPRESSED; } return attrs; } getBodyByBlockId(blockId) { const body = this.bodies.find( body => sameBlockId(body.BlockId, blockId) ); assert(body); return body; } }; // Find all points (positions within the code) within the body (an outer // function or a loop within it), recursing into loops if needed. Return them as // a list of tuples, where `bits` is just the same // value passed in here (in the future, perhaps it could be updated to reflect // RAII or other scopes). function findAllPoints(ffg, blockId, bits) { const body = ffg.getBodyByBlockId(blockId); if (!("PEdge" in body)) return; const points = []; for (var edge of body.PEdge) { points.push([body, edge.Index[0], bits]); if (edge.Kind == "Loop") points.push(...findAllPoints(ffg, edge.BlockId, bits)); } return points; } // Visitor of a graph of vertexes and sixgill-generated edges, // where the edges represent the actual computation happening. // // Uses the syntax `var Visitor = class { ... }` rather than `class Visitor` // to allow reloading this file with the JS debugger. var Visitor = class { constructor(ffg) { this.visited_bodies = new Map(); ffg.forEachBody(body => { this.visited_bodies.set(body, new Map()); }); } // Prepend `edge` to the info stored at the successor node, returning // the updated info value. This should be overridden by pretty much any // subclass, as a traversal's semantics are largely determined by this method. extend_path(edge, body, ppoint, successor_value) { return true; } // Default implementation does a basic "only visit nodes once" search. // (Whether this is BFS/DFS/other is determined by the caller.) // Override if you need to revisit nodes. Valid actions are "continue", // "prune", and "done". "continue" means continue with the search. "prune" // means do not continue to predecessors of this node, only continue with // the remaining entries in the work queue. "done" means the // whole search is complete even if unvisited nodes remain. next_action(prev, current) { return prev ? "prune" : "continue"; } // Update the info at a node. If this is the first time the node has been // seen, `prev` will be undefined. `current` will be the info computed by // `extend_path`. The node will be updated with the return value. merge_info(prev, current) { return true; } // Default visit() implementation. Subclasses will usually leave this alone // and use the other methods as extension points. // // Take a body, a point within that body, and the info computed by // extend_path() for that point when traversing an edge. Return whether the // search should continue ("continue"), the search should be pruned and // other paths followed ("prune"), or that the whole search is complete and // it is time to return a value ("done", and the value returned by // merge_info() will be returned by the overall search). // // Persistently record the value computed so far at each point, and call // (overridable) next_action() and merge_info() methods with the previous // and freshly-computed value for each point. // // Often, extend_path() will decide how/whether to continue the search and // will return the search action to take, and next_action() will blindly // return it if the point has not yet been visited. (And if it has, it will // prune this branch of the search so that no point is visited multiple // times.) visit(body, ppoint, info) { const visited_value_table = this.visited_bodies.get(body); const existing_value_if_visited = visited_value_table.get(ppoint); const action = this.next_action(existing_value_if_visited, info); const merged = this.merge_info(existing_value_if_visited, info); visited_value_table.set(ppoint, merged); return [action, merged]; } }; // For a given function containing a set of bodies, each containing a set of // ppoints, perform a mostly breadth-first traversal through the complete graph // of all nodes throughout all the bodies of the function. // // When traversing, every node is associated with a value that // is assigned or updated whenever it is visited. The overall traversal // terminates when a given condition is reached, and an arbitrary custom value // is returned. If the search completes without the termination condition // being reached, it will return the value associated with the entrypoint // node, which is initialized to `entrypoint_fallback_value` (and thus serves as // the fallback return value if all search paths are pruned before reaching // the entrypoint.) // // The traversal is only *mostly* breadth-first because the visitor decides // whether to stop searching when it sees a node. If a node is visited for a // second time, the visitor can choose to continue (and thus revisit the node) // in order to find "better" paths that may include a node more than once. // The search is done in the "upwards" direction -- as in, it starts at the // exit point and searches through predecessors. // // Override visitor.visit() to return an action and a value. The action // determines whether the overall search should terminate ('done'), or // continues looking through the predecessors of the current node ('continue'), // or whether it should just continue processing the work queue without // looking at predecessors ('prune'). // // This allows this function to be used in different ways. If the visitor // associates a value with each node that chains onto its forward-flow successors // (predecessors in the "upwards" search order), then a complete path through // the graph will be returned. // // Alternatively, BFS_upwards() can be used to test whether a condition holds // (eg "the exit point is reachable only after calling SomethingImportant()"), // in which case no path is needed and the visitor can compute a simple boolean // every time it encounters a point. Note that `entrypoint_fallback_value` will // still be returned if the search terminates without ever reaching the // entrypoint, which is useful for dominator analyses. // // See the Visitor base class's implementation of visit(), above, for the // most commonly used visit logic. function BFS_upwards(start_body, start_ppoint, ffg, visitor, initial_successor_value = {}, entrypoint_fallback_value=null) { let entrypoint_value = entrypoint_fallback_value; const work = [[start_body, start_ppoint, null, initial_successor_value]]; if (TRACING) { printErr(`BFS start at ${blockIdentifier(start_body)}:${start_ppoint}`); } while (work.length > 0) { const [body, ppoint, edgeToAdd, successor_value] = work.shift(); if (TRACING) { const s = edgeToAdd ? " : " + str(edgeToAdd) : ""; printErr(`prepending edge from ${ppoint} to state '${successor_value}'${s}`); } let value = visitor.extend_path(edgeToAdd, body, ppoint, successor_value); const [action, merged_value] = visitor.visit(body, ppoint, value); if (action === "done") { return merged_value; } if (action === "prune") { // Do not push anything else to the work queue, but continue processing // other branches. continue; } assert(action == "continue"); value = merged_value; const predecessors = getPredecessors(body); for (const edge of (predecessors[ppoint] || [])) { if (edge.Kind == "Loop") { // Propagate the search into the exit point of the loop body. const loopBody = ffg.getBodyByBlockId(edge.BlockId); const loopEnd = loopBody.Index[1]; work.push([loopBody, loopEnd, null, value]); // Don't continue to predecessors here without going through // the loop. (The points in this body that enter the loop will // be traversed when we reach the entry point of the loop.) } work.push([body, edge.Index[0], edge, value]); } // Check for hitting the entry point of a loop body. if (ppoint == body.Index[0] && body.BlockId.Kind == "Loop") { // Propagate to outer body parents that enter the loop body. for (const parent of (body.BlockPPoint || [])) { const parentBody = ffg.getBodyByBlockId(parent.BlockId); work.push([parentBody, parent.Index, null, value]); } // This point is also preceded by the *end* of this loop, for the // previous iteration. work.push([body, body.Index[1], null, value]); } // Check for reaching the entrypoint of the function. if (body === start_body && ppoint == body.Index[0]) { entrypoint_value = value; } } // The search space was exhausted without finding a 'done' state. That // might be because all search paths were pruned before reaching the entry // point of the function, in which case entrypoint_value will still be its initial // value. (If entrypoint_value has been set, then we may still not have visited the // entire graph, if some paths were pruned but at least one made it to the entrypoint.) return entrypoint_value; } // Given the CFG for the constructor call of some RAII, return whether the // given edge is the matching destructor call. function isMatchingDestructor(edge, constructed) { if (edge.Kind != "Call") return false; var callee = edge.Exp[0]; if (callee.Kind != "Var") return false; var variable = callee.Variable; assert(variable.Kind == "Func"); if (variable.Name[1].charAt(0) != '~') return false; // Note that in some situations, a regular function can begin with '~', so // we don't necessarily have a destructor in hand. This is probably a // sixgill artifact, but in js::wasm::ModuleGenerator::~ModuleGenerator, a // templatized static inline EraseIf is invoked, and it gets named ~EraseIf // for some reason. if (!("PEdgeCallInstance" in edge)) return false; if (constructed.Kind != "Var") { // The constructor has to be just the variable v we're interested in, // not *v or v.field. return false; } var destructExp = edge.PEdgeCallInstance.Exp; if (destructExp.Kind != "Var") return false; return sameVariable(constructed.Variable, destructExp.Variable); } // Return all calls within the RAII scope of any constructor matched by // matchConstructorEdge(). (Note that this would be insufficient if you needed // to treat each instance separately, such as when different regions of a // function body were guarded by these constructors and you needed to do // something different with each.) function allRAIIGuardedCallPoints(ffg, body) { if (!("PEdge" in body)) return []; var points = []; for (const edge of body.PEdge) { const result = matchConstructorEdge(ffg, edge); if (result && result.attrs != 0) { points.push(...pointsInRAIIScope(ffg, body, edge, result.attrs, result.constructed)); } } return points; } // Test whether the given edge is the constructor corresponding to the given // destructor edge. function isMatchingConstructor(destructor, edge) { if (edge.Kind != "Call") return false; var callee = edge.Exp[0]; if (callee.Kind != "Var") return false; var variable = callee.Variable; if (variable.Kind != "Func") return false; var name = readable(variable.Name[0]); var destructorName = readable(destructor.Exp[0].Variable.Name[0]); var match = destructorName.match(/^(.*?::)~(\w+)\(/); if (!match) { printErr("Unhandled destructor syntax: " + destructorName); return false; } var constructorSubstring = match[1] + match[2]; if (name.indexOf(constructorSubstring) == -1) return false; var destructExp = destructor.PEdgeCallInstance.Exp; if (destructExp.Kind != "Var") return false; var constructExp = edge.PEdgeCallInstance.Exp; if (constructExp.Kind != "Var") return false; return sameVariable(constructExp.Variable, destructExp.Variable); } function findMatchingConstructor(destructorEdge, body, warnIfNotFound=true) { var worklist = [destructorEdge]; var predecessors = getPredecessors(body); while(worklist.length > 0) { var edge = worklist.pop(); if (isMatchingConstructor(destructorEdge, edge)) return edge; if (edge.Index[0] in predecessors) { for (var e of predecessors[edge.Index[0]]) worklist.push(e); } } if (warnIfNotFound) printErr("Could not find matching constructor!"); return undefined; } // Return an array of all points within the RAII scope, each point being a tuple // [, , ]. // // ffg - information about the function being processed. // body - the body containing the starting point. // constructorEdge - the edge representing the constructor invocation. Used // only to initialize the search to the point just after the ctor call. // bits - the attributes to include with each point. Currently, this is the // same for every returned point tuple. // constructed - the variable that was constructed. For a regular constructor // call, this could be trivially inferred from `constructorEdge`, but for // inlined constexpr constructors, the caller needs to figure it out. function pointsInRAIIScope(ffg, body, constructorEdge, bits, constructed) { var seen = {}; var worklist = [constructorEdge.Index[1]]; var points = []; while (worklist.length) { var point = worklist.pop(); if (point in seen) continue; seen[point] = true; points.push([body, point, bits]); var successors = getSuccessors(body); if (!(point in successors)) continue; for (var nedge of successors[point]) { if (isMatchingDestructor(nedge, constructed)) continue; if (nedge.Kind == "Loop") points.push(...findAllPoints(ffg, nedge.BlockId, bits)); worklist.push(nedge.Index[1]); } } return points; } function isImmobileValue(exp) { if (exp.Kind == "Int" && exp.String == "0") { return true; } return false; } // Returns whether decl is a body.DefineVariable[] entry for a non-temporary reference. function isReferenceDecl(decl) { return decl.Type.Kind == "Pointer" && decl.Type.Reference != PTR_POINTER && decl.Variable.Kind != "Temp"; } function expressionIsVariableAddress(exp, variable) { while (exp.Kind == "Fld") exp = exp.Exp[0]; return exp.Kind == "Var" && sameVariable(exp.Variable, variable); } function edgeTakesVariableAddress(edge, variable, body) { if (ignoreEdgeUse(edge, variable, body)) return false; if (ignoreEdgeAddressTaken(edge)) return false; switch (edge.Kind) { case "Assign": return expressionIsVariableAddress(edge.Exp[1], variable); case "Call": if ("PEdgeCallArguments" in edge) { for (var exp of edge.PEdgeCallArguments.Exp) { if (expressionIsVariableAddress(exp, variable)) return true; } } return false; default: return false; } } // Look at an invocation of a virtual method or function pointer contained in a // field, and return the static type of the invocant (or the containing struct, // for a function pointer field.) function getFieldCallInstanceCSU(edge, field) { if ("FieldInstanceFunction" in field) { // We have a 'this'. const instanceExp = edge.PEdgeCallInstance.Exp; if (instanceExp.Kind == 'Drf') { // somevar->foo() return edge.Type.TypeFunctionCSU.Type.Name; } else if (instanceExp.Kind == 'Fld') { // somevar.foo() return instanceExp.Field.FieldCSU.Type.Name; } else if (instanceExp.Kind == 'Index') { // A strange construct. // C++ code: static_cast(this)->trace(trc); // CFG: Call(21,30, this*[-1]{JS::CustomAutoRooter}.trace*(trc*)) return instanceExp.Type.Name; } else if (instanceExp.Kind == 'Var') { // C++: reinterpret_cast(gRawGMT)->~SimpleTimeZone(); // CFG: // # icu_64::SimpleTimeZone::icu_64::SimpleTimeZone.__comp_dtor // [6,7] Call gRawGMT.icu_64::SimpleTimeZone.__comp_dtor () return field.FieldCSU.Type.Name; } else { printErr("------------------ edge -------------------"); printErr(JSON.stringify(edge, null, 4)); printErr("------------------ field -------------------"); printErr(JSON.stringify(field, null, 4)); assert(false, `unrecognized FieldInstanceFunction Kind ${instanceExp.Kind}`); } } else { // somefar.foo() where somevar is a field of some CSU. return field.FieldCSU.Type.Name; } } function expressionUsesVariable(exp, variable) { if (exp.Kind == "Var" && sameVariable(exp.Variable, variable)) return true; if (!("Exp" in exp)) return false; for (var childExp of exp.Exp) { if (expressionUsesVariable(childExp, variable)) return true; } return false; } function expressionUsesVariableContents(exp, variable) { if (!("Exp" in exp)) return false; for (var childExp of exp.Exp) { if (childExp.Kind == 'Drf') { if (expressionUsesVariable(childExp, variable)) return true; } else if (expressionUsesVariableContents(childExp, variable)) { return true; } } return false; } // Detect simple |return nullptr;| statements. function isReturningImmobileValue(edge, variable) { if (variable.Kind == "Return") { if (edge.Exp[0].Kind == "Var" && sameVariable(edge.Exp[0].Variable, variable)) { if (isImmobileValue(edge.Exp[1])) return true; } } return false; } // If the edge uses the given variable's value, return the earliest point at // which the use is definite. Usually, that means the source of the edge // (anything that reaches that source point will end up using the variable, but // there may be other ways to reach the destination of the edge.) // // Return values are implicitly used at the very last point in the function. // This makes a difference: if an RAII class GCs in its destructor, we need to // start looking at the final point in the function, not one point back from // that, since that would skip over the GCing call. // // Certain references may be annotated to be live to the end of the function // as well (eg AutoCheckCannotGC&& parameters). // // Note that this returns a nonzero value only if the variable's incoming value is used. // So this would return 0 for 'obj': // // obj = someFunction(); // // but these would return a positive value: // // obj = someFunction(obj); // obj->foo = someFunction(); // function edgeUsesVariable(ffg, edge, decl, body, liveToEnd=false) { const variable = decl.Variable; if (ignoreEdgeUse(edge, variable, body)) return 0; if (variable.Kind == "Return") { liveToEnd = true; } if (liveToEnd && body.Index[1] == edge.Index[1] && body.BlockId.Kind == "Function") { // The last point in the function body is treated as using the return // value. This is the only time the destination point is returned // rather than the source point. return edge.Index[1]; } var src = edge.Index[0]; switch (edge.Kind) { case "Assign": { // Detect `Return := nullptr`. if (isReturningImmobileValue(edge, variable)) return 0; const [lhs, rhs] = edge.Exp; // Detect `lhs := ...variable...` if (expressionUsesVariable(rhs, variable)) return src; // Detect `...variable... := rhs` but not `variable := rhs`. The latter // overwrites the previous value of `variable` without using it. if (expressionUsesVariable(lhs, variable) && !exprCoversVariable(ffg, lhs, decl)) { return src; } return 0; } case "Assume": return expressionUsesVariableContents(edge.Exp[0], variable) ? src : 0; case "Call": { const callee = edge.Exp[0]; if (expressionUsesVariable(callee, variable)) return src; if ("PEdgeCallInstance" in edge) { if (expressionUsesVariable(edge.PEdgeCallInstance.Exp, variable)) { if (edgeStartsValueLiveRange(ffg, edge, decl)) { // If the variable is being constructed, then the incoming // value is not used here; it didn't exist before // construction. (The analysis doesn't get told where // variables are defined, so must infer it from // construction. If the variable does not have a // constructor, its live range may be larger than it really // ought to be if it is defined within a loop body, but // that is conservative.) } else { return src; } } } if ("PEdgeCallArguments" in edge) { for (var exp of edge.PEdgeCallArguments.Exp) { if (expressionUsesVariable(exp, variable)) return src; } } if (edge.Exp.length == 1) return 0; // Assigning call result to a variable. const lhs = edge.Exp[1]; if (expressionUsesVariable(lhs, variable) && !exprCoversVariable(ffg, lhs, decl)) return src; return 0; } case "Loop": return 0; case "Assembly": return 0; default: assert(false); } } // If `decl` is the body.DefineVariable[] declaration of a reference type, then // return the expression without the outer dereference. Otherwise, return the // original expression. function maybeDereference(exp, decl) { if (exp.Kind == "Drf" && exp.Exp[0].Kind == "Var") { if (isReferenceDecl(decl)) { return exp.Exp[0]; } } return exp; } // Test to see if `exp` is simply the given variable. function expressionIsVariable(exp, variable) { return exp.Kind == "Var" && sameVariable(exp.Variable, variable); } function referencedCSUName(type) { if (type.Kind == "Pointer") { return referencedCSUName(type.Type); } else if (type.Kind == "CSU") { return type.Name; } } function containsGCPointer(ffg, type) { if (type.Kind == "CSU") { if (!(type.Name in ffg.typeInfo.AllGCPointers)) { return false; } } else if (type.Kind == "Pointer") { const pointeeType = type.Type; if (pointeeType.Kind != "CSU" || !(pointeeType.Name in ffg.typeInfo.AllGCTypes)) { return false; } } else { // Not a GC pointer. return false; } return true; } // Test to see if `exp` is simply the given variable or is a field of the given // variable that comprises the whole of the interesting parts of that variable's // type. The latter is to handle anonymous lambda closures that capture a single // GC pointer variable. The idea is that if you assign to the sole field within // a struct, then you're overwriting the whole struct and so callers will use // this to determine that the old value of the struct is now dead. // // struct A { // JSObject* obj; // int i; // } a; // struct B { // A a; // void* vp; // } b; // struct C { // A a1; // A a2; // } c; // a.i = 7; // Does not cover whole variable // a.obj = nullptr; // Covers whole variable // b.vp = nullptr; // Does not cover whole variable // b.a.obj = nullptr; // Covers whole variable // b.a = get_a_struct(); // Covers whole variable // c.a1.obj = nullptr; // Does not cover whole variable (c.a2 was not overwritten) // function exprCoversVariable(ffg, exp, decl) { if (exp.Kind == "Var") { return sameVariable(exp.Variable, decl.Variable); } else if (exp.Kind == "Fld") { // Treat x.f1.f2 = val as "setting" the variable x and overwriting its // previous value if x and x.f1 have types with a single field // containing GC pointers (because the assignment overwrites the only // part of the variable we care about), *and* if x.f1.f2 has at least // one GC pointer (if it has zero, then we're overwriting a different // part of x.f1 that doesn't touch the GC pointer-containing part. If it // has more than 1, that's fine; we may be overwriting multiple GC // pointers at once). If any intervening type has multiple GC pointer // fields, then it doesn't count (because the rest of that type isn't // being overwritten). // Only CSUs (classes/structs/unions) are eligible for this partial // assignment treatment. if (decl.Type.Kind != "CSU") { return false; } // Check that the innermost field assignment (eg x.f1.f2) is a GC // pointer type, since otherwise we're overwriting the wrong field. if (!containsGCPointer(ffg, exp.Field.Type)) { return false; } // Loop through each nested field access, other than the innermost, to // check that there aren't any sibling fields containing GC pointers we // are not overwriting. let trail = exp; let e = exp.Exp[0]; // Advance past the innermost field assignment (eg x.f1.f2 -> x.f1) while (e.Kind == "Fld") { const csu = referencedCSUName(e.Field.Type); if (!csu || !(csu in ffg.typeInfo.SingleGCField)) { return false; } trail = e; e = e.Exp[0]; } // We also need to know that the original variable (`x` in the example) // is a SingleGCField type, but the expression CFG does not contain its // type. Fortunately, we know we're accessing a field, and field // accesses contain the type of the Class/Struct/Union they are field // accesses of, so the innermost expr with Kind="Fld" has the type. if (trail && !(trail.Field.FieldCSU.Type.Name in ffg.typeInfo.SingleGCField)) { return false; } // ...and now make sure `x` is the variable we care about in the first // place. Note that `e` is the most deeply nested expression in the CFG, // but it represents the outermost part of the `x.f1.f2` field access // expression. if (e.Kind != "Var" || !sameVariable(e.Variable, decl.Variable)) { return false; } return true; } return false; } // Similar to the above, except treat uses of a reference as if they were uses // of the dereferenced contents. This requires knowing the type of the // variable, and so takes its declaration rather than the variable itself. function expressionIsDeclaredVariable(exp, decl) { exp = maybeDereference(exp, decl); return expressionIsVariable(exp, decl.Variable); } function expressionIsMethodOnVariableDecl(exp, decl) { // This might be calling a method on a base class, in which case exp will // be an unnamed field of the variable instead of the variable itself. while (exp.Kind == "Fld" && exp.Field.Name[0].startsWith("field:")) exp = exp.Exp[0]; return expressionIsDeclaredVariable(exp, decl); } // Return whether the edge starts the live range of a variable's value, by setting // it to some new value. Examples of starting obj's live range: // // obj = foo; // obj = foo(); // obj = foo(obj); // uses previous value but then sets to new value // SomeClass obj(true, 1); // constructor // function edgeStartsValueLiveRange(ffg, edge, decl) { // Direct assignments start live range of lhs: var = value (but not // including = nullptr). if (edge.Kind == "Assign") { const [lhs, rhs] = edge.Exp; return (exprCoversVariable(ffg, lhs, decl) && !isReturningImmobileValue(edge, decl.Variable)); } if (edge.Kind != "Call") return false; // Assignments of call results start live range: var = foo() if (1 in edge.Exp) { var lhs = edge.Exp[1]; if (exprCoversVariable(ffg, lhs, decl)) return true; } // Constructor calls start live range of instance: SomeClass var(...) if ("PEdgeCallInstance" in edge) { var instance = edge.PEdgeCallInstance.Exp; // Kludge around incorrect dereference on some constructor calls. if (instance.Kind == "Drf") instance = instance.Exp[0]; if (!exprCoversVariable(ffg, instance, decl)) return false; var callee = edge.Exp[0]; if (callee.Kind != "Var") return false; assert(callee.Variable.Kind == "Func"); var calleeName = readable(callee.Variable.Name[0]); // Constructor calls include the text 'Name::Name(' or 'Name<...>::Name('. var openParen = calleeName.indexOf('('); if (openParen < 0) return false; calleeName = calleeName.substring(0, openParen); var lastColon = calleeName.lastIndexOf('::'); if (lastColon < 0) return false; var constructorName = calleeName.substr(lastColon + 2); calleeName = calleeName.substr(0, lastColon); var lastTemplateOpen = calleeName.lastIndexOf('<'); if (lastTemplateOpen >= 0) calleeName = calleeName.substr(0, lastTemplateOpen); if (calleeName.endsWith(constructorName)) return true; } return false; } // Return the result of a `matcher` callback on the call found in the given // `edge`, if the edge is a direct call to a named function (if not, return false). // `matcher` is given the name of the callee (actually, a tuple // [fully qualified name, base name]), an array of expressions containing the // arguments, and if the result of the call is assigned to a variable, // the expression representing that variable(the lhs). // // https://firefox-source-docs.mozilla.org/js/HazardAnalysis/CFG.html for // documentation of the data structure used here. function matchEdgeCall(edge, matcher) { if (edge.Kind != "Call") { return false; } const callee = edge.Exp[0]; if (edge.Type.Kind == 'Function' && edge.Exp[0].Kind == 'Var' && edge.Exp[0].Variable.Kind == 'Func') { const calleeName = edge.Exp[0].Variable.Name; const args = edge.PEdgeCallArguments; const argExprs = args ? args.Exp : []; const lhs = edge.Exp[1]; // May be undefined return matcher(calleeName, argExprs, lhs); } return false; } function edgeMarksVariableGCSafe(edge, variable) { return matchEdgeCall(edge, (calleeName, argExprs, _lhs) => { // explicit JS_HAZ_VARIABLE_IS_GC_SAFE annotation return (calleeName[1] == 'MarkVariableAsGCSafe' && calleeName[0].includes("JS::detail::MarkVariableAsGCSafe") && argExprs.length == 1 && expressionIsVariable(argExprs[0], variable)); }); } // Match an optional :: followed by the class name, // and then an optional template parameter marker. // // Example: mozilla::dom::UniquePtr<... // function parseTypeName(typeName) { const m = typeName.match(/^(((?:\w|::)+::)?(\w+))\b(\<)?/); if (!m) { return undefined; } const [, type, raw_namespace, classname, is_specialized] = m; const namespace = raw_namespace === null ? "" : raw_namespace; return { type, namespace, classname, is_specialized } } // Return whether an edge "clears out" a variable's value for the following // statements. A simple example would be // // var = nullptr; // // A more complex example is a Maybe that gets reset: // // Maybe nogc(std::in_place, cx); // nogc.reset(); // gc(); // <-- not a problem; nogc is invalidated by prev line // nogc.emplace(cx); // foo(nogc); // // Yet another example is a UniquePtr being passed by value, which means the // receiver takes ownership: // // UniquePtr uobj(obj); // foo(uobj); // gc(); // function edgeEndsValueLiveRange(ffg, edge, decl, body) { if (edge.Kind == "Assign") { // [c++] somevar = nullptr; const [lhs, rhs] = edge.Exp; if (exprCoversVariable(ffg, lhs, decl) && isImmobileValue(rhs)) { return true; } // Resetting a Maybe<> as part of a constexpr constructor. // The expression is Assign v...mIsSome = 0. if (isImmobileValue(rhs) && lhs.Kind == "Fld") { if (lhs.Field.Name[0] == "mIsSome" && lhs.Field.FieldCSU.Type.Name.includes("::MaybeStorage<") && rhs.Kind === "Int" && rhs.String === "0") { // Dig the `v` out of the lhs. It is the innermost Exp[0] of a sequence of field accesses. let inner = lhs; while (inner.Kind == "Fld") { inner = inner.Exp[0]; } return expressionIsVariable(inner, decl.Variable); } } return false; } if (edge.Kind != "Call") return false; if (edgeMarksVariableGCSafe(edge, decl.Variable)) { // explicit JS_HAZ_VARIABLE_IS_GC_SAFE annotation return true; } if (matchEdgeCall(edge, (calleeName, argExprs, lhs) => { return calleeName[1] == 'move' && calleeName[0].includes('std::move(') && expressionIsDeclaredVariable(argExprs[0], decl) && lhs && lhs.Kind == 'Var' && lhs.Variable.Kind == 'Temp'; })) { // temp = std::move(var) // // If var is a UniquePtr, and we pass it into something that takes // ownership, then it should be considered to be invalid. Example: // // consume(std::move(var)); // // where consume takes a UniquePtr. This will compile to something like // // UniquePtr* __temp_1 = &std::move(var); // UniquePtr&& __temp_2(*temp_1); // move constructor // consume(__temp_2); // ~UniquePtr(__temp_2); // // The line commented with "// move constructor" is a result of passing // a UniquePtr as a parameter. If consume() took a UniquePtr&& // directly, this would just be: // // UniquePtr* __temp_1 = &std::move(var); // consume(__temp_1); // // which is not guaranteed to move from the reference. It might just // ignore the parameter. We can't predict what consume(UniquePtr&&) // will do. We do know that UniquePtr(UniquePtr&& other) moves out of // `other`. // // The std::move() technically is irrelevant, but because we only care // about bare variables, it has to be used, which is fortunate because // the UniquePtr&& constructor operates on a temporary, not the // variable we care about. const lhs = edge.Exp[1].Variable; if (basicBlockEatsVariable(ffg, lhs, body, edge.Index[1])) return true; } const callee = edge.Exp[0]; if (edge.Type.Kind == 'Function' && edge.Type.TypeFunctionCSU && edge.PEdgeCallInstance && expressionIsMethodOnVariableDecl(edge.PEdgeCallInstance.Exp, decl)) { const typeName = edge.Type.TypeFunctionCSU.Type.Name; // Synthesize a zero-arg constructor name like // mozilla::dom::UniquePtr::UniquePtr(). Note that the `` is // literal -- the pretty name from sixgill will render the actual // constructor name as something like // // UniquePtr::UniquePtr() [where T = int] // const parsed = parseTypeName(typeName); if (parsed) { const { type, namespace, classname, is_specialized } = parsed; // special-case: the initial constructor that doesn't provide a value. // Useful for things like Maybe. const template = is_specialized ? '' : ''; const ctorName = `${namespace}${classname}${template}::${classname}()`; if (callee.Kind == 'Var' && typesWithSafeConstructors.has(type) && callee.Variable.Name[0].includes(ctorName)) { return true; } // special-case: UniquePtr::reset() and similar. if (callee.Kind == 'Var' && type in resetterMethods && resetterMethods[type].has(callee.Variable.Name[1])) { return true; } } } // special-case: passing UniquePtr by value. if (edge.Type.Kind == 'Function' && edge.Type.TypeFunctionArgument && edge.PEdgeCallArguments) { for (const i in edge.Type.TypeFunctionArgument) { const param = edge.Type.TypeFunctionArgument[i]; if (param.Type.Kind != 'CSU') continue; if (!param.Type.Name.startsWith("mozilla::UniquePtr<")) continue; const arg = edge.PEdgeCallArguments.Exp[i]; if (expressionIsVariable(arg, decl.Variable)) { return true; } } } return false; } function edgeMovesVariable(edge, decl) { if (edge.Kind != 'Call') return false; const callee = edge.Exp[0]; if (callee.Kind == 'Var' && callee.Variable.Kind == 'Func') { const { Variable: { Name: [ fullname, shortname ] } } = callee; // Match an rvalue parameter. if (!edge || !edge.PEdgeCallArguments || !edge.PEdgeCallArguments.Exp) { return false; } for (const arg of edge.PEdgeCallArguments.Exp) { if (arg.Kind != 'Drf') continue; const val = arg.Exp[0]; if (val.Kind == 'Var' && sameVariable(val.Variable, decl.Variable)) { // This argument is the variable we're looking for. Return true // if it is passed as an rvalue reference. const type = decl.Type; if (type.Kind == "Pointer" && type.Reference == PTR_RVALUE_REF) { return true; } } } } return false; } // Scan forward through the basic block in 'body' starting at 'startpoint', // looking for a call that passes 'variable' to a move constructor that // "consumes" it (eg UniquePtr::UniquePtr(UniquePtr&&)). function basicBlockEatsVariable(ffg, variable, body, startpoint) { const decl = ffg.lookupDecl(variable); assert(decl); const successors = getSuccessors(body); let point = startpoint; while (point in successors) { // Only handle a single basic block. If it forks, stop looking. const edges = successors[point]; if (edges.length != 1) { return false; } const edge = edges[0]; if (edgeMovesVariable(edge, decl)) { return true; } // edgeStartsValueLiveRange will find places where 'variable' is given // a new value. Never observed in practice, since this function is only // called with a temporary resulting from std::move(), which is used // immediately for a call. But just to be robust to future uses: if (edgeStartsValueLiveRange(ffg, edge, decl)) { return false; } point = edge.Index[1]; } return false; } var PROP_REFCNT = 1 << 0; var PROP_SHARED_PTR_DTOR = 1 << 1; function getCalleeProperties(calleeName) { let props = 0; if (isRefcountedDtor(calleeName)) { props |= PROP_REFCNT; } if (calleeName.includes("~shared_ptr()")) { props |= PROP_SHARED_PTR_DTOR; } return props; } // Basic C++ ABI mangling: prefix an identifier with its length, in decimal. function mangle(name) { return name.length + name; } var TriviallyDestructibleTypes = new Set([ // Single-token types from // https://itanium-cxx-abi.github.io/cxx-abi/abi.html#mangling-builtin "void", "wchar_t", "bool", "char", "short", "int", "long", "float", "double", "__int64", "__int128", "__float128", "char32_t", "char16_t", "char8_t", // Remaining observed cases. These are types T in shared_ptr that have // been observed, where the types themselves have trivial destructors, and // the custom deleter doesn't do anything nontrivial that we might care about. "_IO_FILE" ]); function synthesizeDestructorName(className) { if (className.includes("<") || className.includes(" ") || className.includes("{")) { return; } if (TriviallyDestructibleTypes.has(className)) { return; } const parts = className.split("::"); const mangled_dtor = "_ZN" + parts.map(p => mangle(p)).join("") + "D2Ev"; const pretty_dtor = `void ${className}::~${parts.at(-1)}()`; // Note that there will be a later check to verify that the function name // synthesized here is an actual function, and assert if not (see // assertFunctionExists() in computeCallgraph.js.) return mangled_dtor + "$" + pretty_dtor; } function getCallEdgeProperties(ffg, body, edge, calleeName) { let attrs = 0; let extraCalls = []; if (edge.Kind !== "Call") { return { attrs, extraCalls }; } const props = getCalleeProperties(calleeName); if (props & PROP_REFCNT) { // std::swap of two refcounted values thinks it can drop the // ref count to zero. Or rather, it just calls operator=() in a context // where the refcount will never drop to zero. const blockId = blockIdentifier(body); if (blockId.includes("std::swap") || blockId.includes("mozilla::Swap")) { // Replace the refcnt release call with nothing. It's not going to happen. attrs |= ATTR_REPLACED; } } if (props & PROP_SHARED_PTR_DTOR) { // Replace shared_ptr::~shared_ptr() calls to T::~T() calls. // Note that this will only apply to simple cases. // Any templatized type, in particular, will be ignored and the original // call tree will be left alone. If this triggers a hazard, then we can // consider extending the mangling support. // // If the call to ~shared_ptr is not replaced, then it might end up calling // an unknown function pointer. This does not always happen-- in some cases, // the call tree below ~shared_ptr will invoke the correct destructor without // going through function pointers. const m = calleeName.match(/shared_ptr<(.*?)>::~shared_ptr\(\)(?: \[with T = ([\w:]+))?/); assert(m); let className = m[1] == "T" ? m[2] : m[1]; assert(className != ""); // cv qualification does not apply to destructors. className = className.replace("const ", ""); className = className.replace("volatile ", ""); const dtor = synthesizeDestructorName(className); if (dtor) { attrs |= ATTR_REPLACED; extraCalls.push({ attrs: ATTR_SYNTHETIC, name: dtor, }); } } if ((props & PROP_REFCNT) == 0) { return { attrs, extraCalls }; } let callee = edge.Exp[0]; while (callee.Kind === "Drf") { callee = callee.Exp[0]; } const instance = edge.PEdgeCallInstance.Exp; if (instance.Kind !== "Var") { // TODO: handle field destructors return { attrs, extraCalls }; } // Test whether the dtor call is dominated by operations on the variable // that mean it will not go to a zero refcount in the dtor: either because // it's already dead (eg r.forget() was called) or because it can be proven // to have a ref count of greater than 1. This is implemented by looking // for the reverse: find a path scanning backwards from the dtor call where // the variable is used in any way that does *not* ensure that it is // trivially destructible. const decl = ffg.lookupDecl(instance.Variable); const visitor = new class DominatorVisitor extends Visitor { // Do not revisit nodes. For new nodes, relay the decision made by // extend_path. next_action(seen, current) { return seen ? "prune" : current; } // We don't revisit, so always use the new. merge_info(seen, current) { return current; } // Return the action to take from this node. extend_path(edge, body, ppoint, successor_value) { if (!edge) { // Dummy edge to join two points. return "continue"; } if (!edgeUsesVariable(ffg, edge, decl, body)) { // Nothing of interest on this edge, keep searching. return "continue"; } if (edgeEndsValueLiveRange(ffg, edge, decl, body)) { // This path is safe! return "prune"; } // Unsafe. Found a use that might set the variable to a // nonzero refcount. return "done"; } }(ffg); // Searching upwards from a destructor call, return the opposite of: is // there a path to a use or the start of the function that does NOT hit a // safe assignment like refptr.forget() first? // // In graph terms: return whether the destructor call is dominated by forget() calls (or similar). const edgeIsNonReleasingDtor = !BFS_upwards( body, edge.Index[0], ffg, visitor, "start", false // Return value if we do not reach the root without finding a non-forget() use. ); if (edgeIsNonReleasingDtor) { attrs |= ATTR_GC_SUPPRESSED | ATTR_NONRELEASING; } return { attrs, extraCalls }; } // gcc uses something like "__dt_del " for virtual destructors that it // generates. function isSyntheticVirtualDestructor(funcName) { return funcName.endsWith(" "); } function typedField(field) { if ("FieldInstanceFunction" in field) { // Virtual call // // This makes a minimal attempt at dealing with overloading, by // incorporating the number of parameters. So far, that is all that has // been needed. If more is needed, sixgill will need to produce a full // mangled type. const {Type, Name: [name]} = field; // Virtual destructors don't need a type or argument count, // and synthetic ones don't have them filled in. if (isSyntheticVirtualDestructor(name)) { return name; } var nargs = 0; if (Type.Kind == "Function" && "TypeFunctionArguments" in Type) nargs = Type.TypeFunctionArguments.Type.length; return name + ":" + nargs; } else { // Function pointer field return field.Name[0]; } } function fieldKey(csuName, field) { return csuName + "." + typedField(field); }