From bbf7ff9a3f498fe108a76bf09f972ea21686cf84 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Wed, 24 Apr 2019 11:33:10 +0100 Subject: [PATCH 01/11] Python: do pruning in QL. --- python/ql/src/semmle/python/Exprs.qll | 23 +- python/ql/src/semmle/python/Flow.qll | 36 +- python/ql/src/semmle/python/Pruning.qll | 574 ++++++++++++++++++ .../library-tests/ControlFlow/pruning/options | 2 + .../ControlFlow/pruning/test.expected | 7 + .../library-tests/ControlFlow/pruning/test.py | 46 +- 6 files changed, 662 insertions(+), 26 deletions(-) create mode 100644 python/ql/src/semmle/python/Pruning.qll create mode 100644 python/ql/test/library-tests/ControlFlow/pruning/options diff --git a/python/ql/src/semmle/python/Exprs.qll b/python/ql/src/semmle/python/Exprs.qll index 81d6989768f3..0b1c4fd3c982 100644 --- a/python/ql/src/semmle/python/Exprs.qll +++ b/python/ql/src/semmle/python/Exprs.qll @@ -339,7 +339,7 @@ class Ellipsis extends Ellipsis_ { * and numeric literals. */ abstract class ImmutableLiteral extends Expr { - + abstract Object getLiteralObject(); abstract boolean booleanValue(); @@ -380,7 +380,7 @@ class IntegerLiteral extends Num { override Object getLiteralObject() { py_cobjecttypes(result, theIntType()) and py_cobjectnames(result, this.getN()) or - py_cobjecttypes(result, theLongType()) and py_cobjectnames(result, this.getN()) + py_cobjecttypes(result, theLongType()) and py_cobjectnames(result, this.getN()) } override boolean booleanValue() { @@ -454,6 +454,25 @@ class ImaginaryLiteral extends Num { } +class NegativeIntegerLiteral extends ImmutableLiteral, UnaryExpr { + + NegativeIntegerLiteral() { + this.getOp() instanceof USub and + this.getOperand() instanceof IntegerLiteral + } + + override boolean booleanValue() { + result = this.getOperand().(IntegerLiteral).booleanValue() + } + + override Object getLiteralObject() { + py_cobjecttypes(result, theIntType()) and py_cobjectnames(result, "-" + this.getOperand().(IntegerLiteral).getN()) + or + py_cobjecttypes(result, theLongType()) and py_cobjectnames(result, "-" + this.getOperand().(IntegerLiteral).getN()) + } + +} + /** A unicode string expression, such as `u"\u20ac"`. Note that unadorned string constants such as "hello" are treated as Bytes for Python2, but Unicode for Python3. */ class Unicode extends StrConst { diff --git a/python/ql/src/semmle/python/Flow.qll b/python/ql/src/semmle/python/Flow.qll index 2b663323a48e..8cce671a4c4a 100755 --- a/python/ql/src/semmle/python/Flow.qll +++ b/python/ql/src/semmle/python/Flow.qll @@ -1,12 +1,12 @@ import python import semmle.python.flow.NameNode private import semmle.python.pointsto.PointsTo - +private import semmle.python.Pruning /* Note about matching parent and child nodes and CFG splitting: * * As a result of CFG splitting a single AST node may have multiple CFG nodes. - * Therefore, when matching CFG nodes to children, we need to make sure that + * Therefore, when matching CFG nodes to children, we need to make sure that * we don't match the child of one CFG node to the wrong parent. * We do this by checking dominance. If the CFG node for the parent precedes that of * the child, then he child node matches the parent node if it is dominated by it. @@ -33,6 +33,10 @@ private AstNode toAst(ControlFlowNode n) { */ class ControlFlowNode extends @py_flow_node { + ControlFlowNode() { + not Pruner::pruned(this) + } + /** Whether this control flow node is a load (including those in augmented assignments) */ predicate isLoad() { exists(Expr e | e = toAst(this) | py_expr_contexts(_, 3, e) and not augstore(_, this)) @@ -219,7 +223,7 @@ class ControlFlowNode extends @py_flow_node { /** Gets what this flow node might "refer-to". Performs a combination of localized (intra-procedural) points-to * analysis and global module-level analysis. This points-to analysis favours precision over recall. It is highly - * precise, but may not provide information for a significant number of flow-nodes. + * precise, but may not provide information for a significant number of flow-nodes. * If the class is unimportant then use `refersTo(value)` or `refersTo(value, origin)` instead. */ predicate refersTo(Object value, ClassObject cls, ControlFlowNode origin) { @@ -238,9 +242,9 @@ class ControlFlowNode extends @py_flow_node { PointsTo::points_to(this, context, value, cls, origin) } - /** Whether this flow node might "refer-to" to `value` which is from `origin` - * Unlike `this.refersTo(value, _, origin)` this predicate includes results - * where the class cannot be inferred. + /** Whether this flow node might "refer-to" to `value` which is from `origin` + * Unlike `this.refersTo(value, _, origin)` this predicate includes results + * where the class cannot be inferred. */ predicate refersTo(Object value, ControlFlowNode origin) { PointsTo::points_to(this, _, value, _, origin) @@ -312,7 +316,7 @@ class ControlFlowNode extends @py_flow_node { exists(BasicBlock b | start_bb_likely_reachable(b) and not end_bb_likely_reachable(b) and - /* If there is an unlikely successor edge earlier in the BB + /* If there is an unlikely successor edge earlier in the BB * than this node, then this node must be unreachable */ exists(ControlFlowNode p, int i, int j | p.(RaisingNode).unlikelySuccessor(_) and @@ -355,7 +359,7 @@ class ControlFlowNode extends @py_flow_node { } /** Whether this dominates other. - * Note that all nodes dominate themselves. + * Note that all nodes dominate themselves. */ pragma [inline] predicate dominates(ControlFlowNode other) { // This predicate is gigantic, so it must be inlined. @@ -446,7 +450,7 @@ class CallNode extends ControlFlowNode { /** Gets the flow node corresponding to the named argument of the call corresponding to this flow node */ ControlFlowNode getArgByName(string name) { - exists(Call c, Keyword k | this.getNode() = c and k = c.getAKeyword() and + exists(Call c, Keyword k | this.getNode() = c and k = c.getAKeyword() and k.getValue() = result.getNode() and k.getArg() = name and result.getBasicBlock().dominates(this.getBasicBlock())) } @@ -487,7 +491,7 @@ class AttrNode extends ControlFlowNode { /** Gets the flow node corresponding to the object of the attribute expression corresponding to this flow node, with the matching name */ ControlFlowNode getObject(string name) { - exists(Attribute a | + exists(Attribute a | this.getNode() = a and a.getObject() = result.getNode() and a.getName() = name and result.getBasicBlock().dominates(this.getBasicBlock())) @@ -558,7 +562,7 @@ class SubscriptNode extends ControlFlowNode { toAst(this) instanceof Subscript } - /** DEPRECATED: Use `getObject()` instead. + /** DEPRECATED: Use `getObject()` instead. * This will be formally deprecated before the end 2018 and removed in 2019.*/ ControlFlowNode getValue() { exists(Subscript s | this.getNode() = s and s.getObject() = result.getNode() and @@ -681,7 +685,7 @@ class UnaryExprNode extends ControlFlowNode { } /** A control flow node corresponding to a definition, that is a control flow node - * where a value is assigned to this node. + * where a value is assigned to this node. * Includes control flow nodes for the targets of assignments, simple or augmented, * and nodes implicitly assigned in class and function definitions and imports. */ @@ -908,7 +912,7 @@ class BasicBlock extends @py_flow_node { /** Whether this basic block dominates the other */ pragma[nomagic] predicate dominates(BasicBlock other) { - this = other + this = other or this.strictlyDominates(other) } @@ -917,8 +921,8 @@ class BasicBlock extends @py_flow_node { this.firstNode().getImmediateDominator().getBasicBlock() = result } - /** Dominance frontier of a node x is the set of all nodes `other` such that `this` dominates a predecessor - * of `other` but does not strictly dominate `other` */ + /** Dominance frontier of a node x is the set of all nodes `other` such that `this` dominates a predecessor + * of `other` but does not strictly dominate `other` */ predicate dominanceFrontier(BasicBlock other) { this.dominates(other.getAPredecessor()) and not this.strictlyDominates(other) } @@ -1059,3 +1063,5 @@ private predicate end_bb_likely_reachable(BasicBlock b) { ) } + + diff --git a/python/ql/src/semmle/python/Pruning.qll b/python/ql/src/semmle/python/Pruning.qll new file mode 100644 index 000000000000..b0927d7c4586 --- /dev/null +++ b/python/ql/src/semmle/python/Pruning.qll @@ -0,0 +1,574 @@ +import python + + +module Pruner { + + /** A basic block before pruning */ + class UnprunedCfgNode extends @py_flow_node { + + string toString() { none() } + /** Gets a predecessor of this flow node */ + UnprunedCfgNode getAPredecessor() { + py_successors(result, this) + } + + /** Gets a successor of this flow node */ + UnprunedCfgNode getASuccessor() { + py_successors(this, result) + } + + /** Gets the immediate dominator of this flow node */ + UnprunedCfgNode getImmediateDominator() { + py_idoms(this, result) + } + + /* Holds if this CFG node is a branch */ + predicate isBranch() { + py_true_successors(this, _) or py_false_successors(this, _) + } + + /** Gets the syntactic element corresponding to this flow node */ + AstNode getNode() { + py_flow_bb_node(this, result, _, _) + } + + UnprunedBasicBlock getBasicBlock() { + py_flow_bb_node(this, _, result, _) + } + + /** Gets a successor for this node if the relevant condition is True. */ + UnprunedCfgNode getATrueSuccessor() { + py_true_successors(this, result) + } + + /** Gets a successor for this node if the relevant condition is False. */ + UnprunedCfgNode getAFalseSuccessor() { + py_false_successors(this, result) + } + + } + + /** A control flow node corresponding to a comparison operation, such as `x + if x: + controlled + false_successor + uncontrolled + + false_successor dominates uncontrolled, but not all of its predecessors are this (if x) + or dominated by itself. Whereas in the following code: + + if x: + while controlled: + also_controlled + false_successor + uncontrolled + + the block 'while controlled' is controlled because all of its predecessors are this (if x) + or (in the case of 'also_controlled') dominated by itself. + + The additional constraint on the predecessors of the test successor implies + that `this` strictly dominates `controlled` so that isn't necessary to check + directly. + */ + exists(UnprunedBasicBlock succ | + testIsTrue = true and succ = this.getATrueSuccessor() + or + testIsTrue = false and succ = this.getAFalseSuccessor() + | + succ.dominates(controlled) and + forall(UnprunedBasicBlock pred | pred.getASuccessor() = succ | + pred = this or succ.dominates(pred) + ) + ) + } + + /** Holds if this condition controls the edge `pred->succ`, i.e. those edges for which the condition is `testIsTrue`. */ + predicate controlsEdge(UnprunedBasicBlock pred, UnprunedBasicBlock succ, boolean testIsTrue) { + this.controls(pred, testIsTrue) and succ = pred.getASuccessor() + or + pred.last() = this and ( + testIsTrue = true and succ = this.getATrueSuccessor() + or + testIsTrue = false and succ = this.getAFalseSuccessor() + ) + } + + } + + class Truthy extends ConditionalConstant, TTruthy { + + private boolean booleanValue() { + this = TTruthy(result) + } + + override string toString() { + result = "Truthy" and this.booleanValue() = true + or + result = "Falsey" and this.booleanValue() = false + } + + override ConditionalConstant invert() { + result = TTruthy(this.booleanValue().booleanNot()) + } + + override predicate constrainsVariableToBe(boolean value) { + value = this.booleanValue() + } + + override predicate impliesFalse(ConditionalConstant other) { + other.constrainsVariableToBe(this.booleanValue().booleanNot()) + } + + } + + class IsNone extends ConditionalConstant, TIsNone { + + private boolean isNone() { + this = TIsNone(result) + } + + override string toString() { + result = "Is None" and this.isNone() = true + or + result = "Is not None" and this.isNone() = false + } + + override ConditionalConstant invert() { + result = TIsNone(this.isNone().booleanNot()) + } + + override predicate constrainsVariableToBe(boolean value) { + value = false and this.isNone() = true + } + + override predicate impliesFalse(ConditionalConstant other) { + other = TIsNone(this.isNone().booleanNot()) + or + this.isNone() = true and other = TTruthy(true) + } + + } + + class ComparedToConstant extends ConditionalConstant, TComparedToConstant { + + private int intValue() { + this = TComparedToConstant(_, result) + } + + private CompareOp getOp() { + this = TComparedToConstant(result, _) + } + + override string toString() { + result = this.getOp().repr() + " " + this.intValue().toString() + } + + override ConditionalConstant invert() { + result = TComparedToConstant(this.getOp().invert(), this.intValue()) + } + + override predicate constrainsVariableToBe(boolean value) { + this.getOp() = eq() and this.intValue() = 0 and value = false + or + value = true and ( + this.getOp() = eq() and this.intValue() != 0 + or + this.getOp() = ne() and this.intValue() = 0 + or + this.getOp() = lt() and this.intValue() <= 0 + or + this.getOp() = le() and this.intValue() < 0 + or + this.getOp() = gt() and this.intValue() >= 0 + or + this.getOp() = ge() and this.intValue() > 0 + ) + } + + override predicate impliesFalse(ConditionalConstant other) { + exists(boolean b | + this.constrainsVariableToBe(b) and other = TTruthy(b.booleanNot()) + ) + or + this.getOp() = eq() and other = TIsNone(true) + or + this.getOp() = ne() and other.(ComparedToConstant).getOp() = eq() + and this.intValue() = other.(ComparedToConstant).intValue() + } + + int minValue() { + this.getOp() = eq() and result = this.intValue() + or + this.getOp() = lt() and result = -2147483648 + or + this.getOp() = le() and result = -2147483648 + or + this.getOp() = gt() and result = this.intValue()+1 + or + this.getOp() = ge() and result = this.intValue() + } + + int maxValue() { + this.getOp() = eq() and result = this.intValue() + or + this.getOp() = gt() and result = 2147483647 + or + this.getOp() = ge() and result = 2147483647 + or + this.getOp() = lt() and result = this.intValue()-1 + or + this.getOp() = le() and result = this.intValue() + } + + } + + predicate pruned(@py_flow_node n) { + exists(UnprunedBasicBlock bb | + pruned_bb(bb) and bb.contains(n) + ) + } + + //private + predicate pruned_bb(UnprunedBasicBlock bb) { + not bb.isEntry() and + forall(UnprunedBasicBlock pred | + pred.getASuccessor() = bb + | + pruned_edge(pred, bb) + ) + } + + ConditionalConstant conditionForTest(SsaVariable var, UnprunedBasicBlock test) { + result = conditionForNode(var, test.last()) + } + + private ConditionalConstant conditionForNode(SsaVariable var, UnprunedCfgNode node) { + py_ssa_use(node, var) and result = TTruthy(true) + or + exists(boolean b | + none_test(node, var, b) and result = TIsNone(b) + ) + or + exists(CompareOp op, int k | + int_test(node, var, op, k) and + result = TComparedToConstant(op, k) + ) + or + result = conditionForNode(var, node.(UnprunedNot).getOperand()).invert() + } + + predicate impliesFalse(ConditionalConstant a, ConditionalConstant b) { + a.impliesFalse(b) or + a.(ComparedToConstant).minValue() > b.(ComparedToConstant).maxValue() or + a.(ComparedToConstant).maxValue() < b.(ComparedToConstant).minValue() + } + + predicate none_test(UnprunedCompareNode test, SsaVariable var, boolean is) { + exists(UnprunedCfgNode left, Cmpop op, UnprunedCfgNode right | + py_ssa_use(left, var) and + test.operands(left, op, right) and + right.getNode() instanceof None + | + op instanceof Is and is = true + or + op instanceof IsNot and is = false + ) + } + + predicate int_test(UnprunedCompareNode test, SsaVariable var, CompareOp op, int k) { + exists(UnprunedCfgNode left, UnprunedCfgNode right, Cmpop cop | + py_ssa_use(left, var) and + test.operands(left, cop, right) and + right.getNode().(IntegerLiteral).getValue() = k and + op.forOp(cop) + ) + } + + predicate int_assignment(UnprunedCfgNode asgn, SsaVariable var, CompareOp op, int k) { + exists(Assign a | + a.getATarget() = asgn.getNode() and + py_ssa_use(asgn, var) and + k = a.getValue().(IntegerLiteral).getValue() and + op = eq() + ) + } + + predicate none_assignment(UnprunedCfgNode asgn, SsaVariable var) { + exists(Assign a | + a.getATarget() = asgn.getNode() and + py_ssa_use(asgn, var) and + a.getValue() instanceof None + ) + } + + private boolean truthy_assignment(UnprunedCfgNode asgn, SsaVariable var) { + exists(Assign a | + a.getATarget() = asgn.getNode() and + py_ssa_use(asgn, var) + | + a.getValue() instanceof True and result = true + or + a.getValue() instanceof False and result = false + ) + or + module_import(var) and result = true + } + + private ConditionalConstant conditionForAssign(SsaVariable var, UnprunedBasicBlock asgn) { + exists(CompareOp op, int k | + int_assignment(asgn.getANode(), var, op, k) and + result = TComparedToConstant(op, k) + ) + or + none_assignment(asgn.getANode(), var) and result = TIsNone(true) + or + result = TTruthy(truthy_assignment(asgn.getANode(), var)) + } + + predicate priorCondition(UnprunedBasicBlock pred, UnprunedBasicBlock succ, ConditionalConstant preval, SsaVariable var) { + not (blacklisted(var) and preval = TTruthy(_)) + and + not var.getVariable().escapes() + and + exists(UnprunedBasicBlock first | + not first = pred and + first.(UnprunedConditionBlock).controlsEdge(pred, succ, true) and + preval = conditionForTest(var, first) + or + not first = pred and + first.(UnprunedConditionBlock).controlsEdge(pred, succ, false) and + preval = conditionForTest(var, first).invert() + or + preval = conditionForAssign(var, first) and + first.dominates(pred) and + (succ = pred.getAFalseSuccessor() or succ = pred.getATrueSuccessor()) + ) + } + + predicate branchCondition(UnprunedBasicBlock pred, UnprunedBasicBlock succ, ConditionalConstant cond, SsaVariable var) { + cond = conditionForTest(var, pred) and + succ = pred.getATrueSuccessor() + or + cond = conditionForTest(var, pred).invert() and + succ = pred.getAFalseSuccessor() + } + + predicate controllingConditions(UnprunedBasicBlock pred, UnprunedBasicBlock succ, ConditionalConstant preval, ConditionalConstant postcond) { + exists(SsaVariable var | + priorCondition(pred, succ, preval, var) and + branchCondition(pred, succ, postcond, var) + ) + } + + predicate pruned_edge(UnprunedBasicBlock pred, UnprunedBasicBlock succ) { + exists(ConditionalConstant pre, ConditionalConstant cond | + controllingConditions(pred, succ, pre, cond) and + impliesFalse(pre, cond) + ) + or + pruned_bb(pred) and succ = pred.getASuccessor() + or + simply_dead(pred, succ) + } + + /** Holds if edge is simply dead. Stuff like `if False: ...` */ + //private + predicate simply_dead(UnprunedBasicBlock pred, UnprunedBasicBlock succ) { + constTest(pred.last()) = true and pred.getAFalseSuccessor() = succ + or + constTest(pred.last()) = false and pred.getATrueSuccessor() = succ + } + + private boolean constTest(UnprunedCfgNode node) { + exists(ImmutableLiteral lit | + result = lit.booleanValue() and lit = node.getNode() + ) + or + result = constTest(node.(UnprunedNot).getOperand()).booleanNot() + } + + /** Holds if `var` is blacklisted as having possbily been mutated */ + predicate blacklisted(SsaVariable var) { + possibly_mutated(var) and not whitelisted(var) + } + + predicate possibly_mutated(SsaVariable var) { + exists(Subscript subscr, UnprunedCfgNode node | + subscr.getObject() = node.getNode() and + py_ssa_use(node, var) + ) + or + exists(Attribute attr, UnprunedCfgNode node | + attr.getObject() = node.getNode() and + py_ssa_use(node, var) + ) + } + + /** If SSA variable is defined by an import, then it should + * be whitelisted as taking an attribute cannot change its + * truthiness. + */ + predicate whitelisted(SsaVariable var) { + module_import(var) + } + + private predicate module_import(SsaVariable var) { + exists(Alias alias, UnprunedCfgNode node | + alias.getValue() instanceof ImportExpr and + py_ssa_defn(var, node) and + alias.getAsname() = node.getNode() + ) + } +} + diff --git a/python/ql/test/library-tests/ControlFlow/pruning/options b/python/ql/test/library-tests/ControlFlow/pruning/options new file mode 100644 index 000000000000..ebbd9efa4332 --- /dev/null +++ b/python/ql/test/library-tests/ControlFlow/pruning/options @@ -0,0 +1,2 @@ +semmle-extractor-options: --dont-prune-graph +optimize: true diff --git a/python/ql/test/library-tests/ControlFlow/pruning/test.expected b/python/ql/test/library-tests/ControlFlow/pruning/test.expected index b8cd5b6b98d7..bab31184e101 100644 --- a/python/ql/test/library-tests/ControlFlow/pruning/test.expected +++ b/python/ql/test/library-tests/ControlFlow/pruning/test.expected @@ -43,3 +43,10 @@ | 154 | 1 | | 161 | 1 | | 163 | 1 | +| 176 | 1 | +| 178 | 1 | +| 180 | 1 | +| 184 | 1 | +| 186 | 1 | +| 189 | 1 | +| 192 | 1 | diff --git a/python/ql/test/library-tests/ControlFlow/pruning/test.py b/python/ql/test/library-tests/ControlFlow/pruning/test.py index bdc43c414ad5..4b5915f94884 100644 --- a/python/ql/test/library-tests/ControlFlow/pruning/test.py +++ b/python/ql/test/library-tests/ControlFlow/pruning/test.py @@ -3,20 +3,20 @@ def dead(): return 0 count - + def conditional_dead(test): if test: return if test: count - + def made_true(seq): if seq: return seq.append(1) if seq: count - + def boolop(t1, t2, t3, t4, t5, t6): if not t1: return @@ -38,7 +38,7 @@ def boolop(t1, t2, t3, t4, t5, t6): t5 and count t6 or count t6 and count - + def with_splitting(t1, t2): if t1: if not t2: @@ -50,7 +50,7 @@ def with_splitting(t1, t2): else: t2 or count t2 and count - + def loops(seq1, seq2, seq3, seq4, seq5): if seq1: return @@ -82,7 +82,7 @@ def loops(seq1, seq2, seq3, seq4, seq5): count print(var) -#Logic does not apply to global variables in calls, +#Logic does not apply to global variables in calls, #as they may be changed from true to false externally. from some_module import x, y if not x: @@ -102,7 +102,7 @@ def loops(seq1, seq2, seq3, seq4, seq5): if not another_module: count - + def negated_conditional_live(t1, t2): if not t1: return @@ -112,13 +112,13 @@ def negated_conditional_live(t1, t2): count if not t2: count - + def negated_conditional_dead(test): if not test: return if not test: count - + def made_true2(m): if m: return @@ -162,3 +162,31 @@ def attribute_lookup_cannot_effect_comparisons_with_immutable_constants(ps): else: count +def func(): + global escapes + so_something() + escapes = True + +#Don't prune on `escapes` as it escapes. +if __name__ == '__main__': + escapes = None # global + try: + func() + except Exception as err: + count + if escapes is None: + count + else: + count + +def func2(): + while 1: + count + if cond12: + count + try: + true12() + count + except IOError: + true12 = 0 + count From 75feab53db89526ed39ee18b30a5d5a50679d6d2 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Fri, 10 May 2019 10:59:44 +0100 Subject: [PATCH 02/11] Python QL: Clean up pruning code, renaming and adding comments for clarity. --- python/ql/src/semmle/python/Flow.qll | 2 +- python/ql/src/semmle/python/Pruning.qll | 179 ++++++++++++++---------- 2 files changed, 104 insertions(+), 77 deletions(-) diff --git a/python/ql/src/semmle/python/Flow.qll b/python/ql/src/semmle/python/Flow.qll index 8cce671a4c4a..bb9f15720dd4 100755 --- a/python/ql/src/semmle/python/Flow.qll +++ b/python/ql/src/semmle/python/Flow.qll @@ -34,7 +34,7 @@ private AstNode toAst(ControlFlowNode n) { class ControlFlowNode extends @py_flow_node { ControlFlowNode() { - not Pruner::pruned(this) + not Pruner::unreachable(this) } /** Whether this control flow node is a load (including those in augmented assignments) */ diff --git a/python/ql/src/semmle/python/Pruning.qll b/python/ql/src/semmle/python/Pruning.qll index b0927d7c4586..4cc82cb35408 100644 --- a/python/ql/src/semmle/python/Pruning.qll +++ b/python/ql/src/semmle/python/Pruning.qll @@ -3,10 +3,11 @@ import python module Pruner { - /** A basic block before pruning */ + /** A control flow node before pruning */ class UnprunedCfgNode extends @py_flow_node { - string toString() { none() } + string toString() { none() } + /** Gets a predecessor of this flow node */ UnprunedCfgNode getAPredecessor() { py_successors(result, this) @@ -50,6 +51,7 @@ module Pruner { /** A control flow node corresponding to a comparison operation, such as `x 0` implies that `not bool(x)` is `False`. + */ + abstract predicate impliesFalse(Constraint other); - private predicate comparison_or_assignment_to_constant(CompareOp op, int k) { - exists(Compare comp, Cmpop cop, IntegerLiteral l | - comp.compares(_, cop, l) and - l.getValue() = k and - op.forOp(cop) - ) - or - exists(Assign a | a.getValue().(IntegerLiteral).getValue() = k) and op = eq() } + /** A basic block ending in a test (and branch). */ class UnprunedConditionBlock extends UnprunedBasicBlock { UnprunedConditionBlock() { this.last().isBranch() } + /** Holds if `controlled` is only reachable if the test in this block evaluates to `testIsTrue` */ predicate controls(UnprunedBasicBlock controlled, boolean testIsTrue) { /* For this block to control the block 'controlled' with 'testIsTrue' the following must be true: Execution must have passed through the test i.e. 'this' must strictly dominate 'controlled'. @@ -233,11 +242,11 @@ module Pruner { ) } - /** Holds if this condition controls the edge `pred->succ`, i.e. those edges for which the condition is `testIsTrue`. */ + /** Holds if the edge `pred->succ` is reachable only if the test in this block evaluates to `testIsTrue` */ predicate controlsEdge(UnprunedBasicBlock pred, UnprunedBasicBlock succ, boolean testIsTrue) { this.controls(pred, testIsTrue) and succ = pred.getASuccessor() or - pred.last() = this and ( + pred = this and ( testIsTrue = true and succ = this.getATrueSuccessor() or testIsTrue = false and succ = this.getAFalseSuccessor() @@ -246,7 +255,8 @@ module Pruner { } - class Truthy extends ConditionalConstant, TTruthy { + /** A constraint that the variable is truthy `bool(var) is True` or falsey `bool(var) is False` */ + class Truthy extends Constraint, TTruthy { private boolean booleanValue() { this = TTruthy(result) @@ -258,7 +268,7 @@ module Pruner { result = "Falsey" and this.booleanValue() = false } - override ConditionalConstant invert() { + override Constraint invert() { result = TTruthy(this.booleanValue().booleanNot()) } @@ -266,13 +276,14 @@ module Pruner { value = this.booleanValue() } - override predicate impliesFalse(ConditionalConstant other) { + override predicate impliesFalse(Constraint other) { other.constrainsVariableToBe(this.booleanValue().booleanNot()) } } - class IsNone extends ConditionalConstant, TIsNone { + /** A constraint that the variable is None `(var is None) is True` or not None `(var is None) is False` */ + class IsNone extends Constraint, TIsNone { private boolean isNone() { this = TIsNone(result) @@ -284,7 +295,7 @@ module Pruner { result = "Is not None" and this.isNone() = false } - override ConditionalConstant invert() { + override Constraint invert() { result = TIsNone(this.isNone().booleanNot()) } @@ -292,7 +303,7 @@ module Pruner { value = false and this.isNone() = true } - override predicate impliesFalse(ConditionalConstant other) { + override predicate impliesFalse(Constraint other) { other = TIsNone(this.isNone().booleanNot()) or this.isNone() = true and other = TTruthy(true) @@ -300,22 +311,25 @@ module Pruner { } - class ComparedToConstant extends ConditionalConstant, TComparedToConstant { + /** A constraint that the variable fulfils some equality or inequality to an integral constant. + * `(var op k) is True` where `op` is an equality or inequality operator and `k` is an integer constant + */ + class ConstrainedByConstant extends Constraint, TConstrainedByConstant { private int intValue() { - this = TComparedToConstant(_, result) + this = TConstrainedByConstant(_, result) } private CompareOp getOp() { - this = TComparedToConstant(result, _) + this = TConstrainedByConstant(result, _) } override string toString() { result = this.getOp().repr() + " " + this.intValue().toString() } - override ConditionalConstant invert() { - result = TComparedToConstant(this.getOp().invert(), this.intValue()) + override Constraint invert() { + result = TConstrainedByConstant(this.getOp().invert(), this.intValue()) } override predicate constrainsVariableToBe(boolean value) { @@ -336,17 +350,20 @@ module Pruner { ) } - override predicate impliesFalse(ConditionalConstant other) { + override predicate impliesFalse(Constraint other) { exists(boolean b | this.constrainsVariableToBe(b) and other = TTruthy(b.booleanNot()) ) or this.getOp() = eq() and other = TIsNone(true) or - this.getOp() = ne() and other.(ComparedToConstant).getOp() = eq() - and this.intValue() = other.(ComparedToConstant).intValue() + this.getOp() = ne() and other.(ConstrainedByConstant).getOp() = eq() + and this.intValue() = other.(ConstrainedByConstant).intValue() } + /** The minimum value that a variable fulfilling this constraint may hold + * within the bounds of a signed 32 bit number. + */ int minValue() { this.getOp() = eq() and result = this.intValue() or @@ -359,6 +376,9 @@ module Pruner { this.getOp() = ge() and result = this.intValue() } + /** The maximum value that a variable fulfilling this constraint may hold + * within the bounds of a signed 32 bit number. + */ int maxValue() { this.getOp() = eq() and result = this.intValue() or @@ -373,27 +393,28 @@ module Pruner { } - predicate pruned(@py_flow_node n) { + /** Holds if the control flow node `n` is unreachable due to + * one or more constraints. + */ + predicate unreachable(UnprunedCfgNode n) { exists(UnprunedBasicBlock bb | - pruned_bb(bb) and bb.contains(n) + unreachableBB(bb) and bb.contains(n) ) } - //private - predicate pruned_bb(UnprunedBasicBlock bb) { + /** Holds if the basic block `bb` is unreachable due to + * one or more constraints. + */ + predicate unreachableBB(UnprunedBasicBlock bb) { not bb.isEntry() and forall(UnprunedBasicBlock pred | pred.getASuccessor() = bb | - pruned_edge(pred, bb) + unreachableEdge(pred, bb) ) } - ConditionalConstant conditionForTest(SsaVariable var, UnprunedBasicBlock test) { - result = conditionForNode(var, test.last()) - } - - private ConditionalConstant conditionForNode(SsaVariable var, UnprunedCfgNode node) { + private Constraint constraintFromTest(SsaVariable var, UnprunedCfgNode node) { py_ssa_use(node, var) and result = TTruthy(true) or exists(boolean b | @@ -402,16 +423,10 @@ module Pruner { or exists(CompareOp op, int k | int_test(node, var, op, k) and - result = TComparedToConstant(op, k) + result = TConstrainedByConstant(op, k) ) or - result = conditionForNode(var, node.(UnprunedNot).getOperand()).invert() - } - - predicate impliesFalse(ConditionalConstant a, ConditionalConstant b) { - a.impliesFalse(b) or - a.(ComparedToConstant).minValue() > b.(ComparedToConstant).maxValue() or - a.(ComparedToConstant).maxValue() < b.(ComparedToConstant).minValue() + result = constraintFromTest(var, node.(UnprunedNot).getOperand()).invert() } predicate none_test(UnprunedCompareNode test, SsaVariable var, boolean is) { @@ -452,7 +467,7 @@ module Pruner { ) } - private boolean truthy_assignment(UnprunedCfgNode asgn, SsaVariable var) { + boolean truthy_assignment(UnprunedCfgNode asgn, SsaVariable var) { exists(Assign a | a.getATarget() = asgn.getNode() and py_ssa_use(asgn, var) @@ -465,10 +480,11 @@ module Pruner { module_import(var) and result = true } - private ConditionalConstant conditionForAssign(SsaVariable var, UnprunedBasicBlock asgn) { + /** Gets the constraint on `var` resulting from the an assignment in `asgn` */ + Constraint constraintFromAssignment(SsaVariable var, UnprunedBasicBlock asgn) { exists(CompareOp op, int k | int_assignment(asgn.getANode(), var, op, k) and - result = TComparedToConstant(op, k) + result = TConstrainedByConstant(op, k) ) or none_assignment(asgn.getANode(), var) and result = TIsNone(true) @@ -476,7 +492,8 @@ module Pruner { result = TTruthy(truthy_assignment(asgn.getANode(), var)) } - predicate priorCondition(UnprunedBasicBlock pred, UnprunedBasicBlock succ, ConditionalConstant preval, SsaVariable var) { + /** Holds if the constraint `preval` holds for `var` on edge `pred` -> `succ` as a result of a prior test or assignment */ + predicate priorConstraint(UnprunedBasicBlock pred, UnprunedBasicBlock succ, Constraint preval, SsaVariable var) { not (blacklisted(var) and preval = TTruthy(_)) and not var.getVariable().escapes() @@ -484,52 +501,62 @@ module Pruner { exists(UnprunedBasicBlock first | not first = pred and first.(UnprunedConditionBlock).controlsEdge(pred, succ, true) and - preval = conditionForTest(var, first) + preval = constraintFromTest(var, first.last()) or not first = pred and first.(UnprunedConditionBlock).controlsEdge(pred, succ, false) and - preval = conditionForTest(var, first).invert() + preval = constraintFromTest(var, first.last()).invert() or - preval = conditionForAssign(var, first) and + preval = constraintFromAssignment(var, first) and first.dominates(pred) and (succ = pred.getAFalseSuccessor() or succ = pred.getATrueSuccessor()) ) } - predicate branchCondition(UnprunedBasicBlock pred, UnprunedBasicBlock succ, ConditionalConstant cond, SsaVariable var) { - cond = conditionForTest(var, pred) and + /** Holds if `cond` holds for `var` on conditional edge `pred` -> `succ` as a result of the test for that edge */ + predicate constraintOnBranch(UnprunedBasicBlock pred, UnprunedBasicBlock succ, Constraint cond, SsaVariable var) { + cond = constraintFromTest(var, pred.last()) and succ = pred.getATrueSuccessor() or - cond = conditionForTest(var, pred).invert() and + cond = constraintFromTest(var, pred.last()).invert() and succ = pred.getAFalseSuccessor() } - predicate controllingConditions(UnprunedBasicBlock pred, UnprunedBasicBlock succ, ConditionalConstant preval, ConditionalConstant postcond) { + /** Holds if the pair of constraints (`preval`, `postcond`) holds on the edge `pred` -> `succ` for some SSA variable */ + predicate controllingConditions(UnprunedBasicBlock pred, UnprunedBasicBlock succ, Constraint preval, Constraint postcond) { exists(SsaVariable var | - priorCondition(pred, succ, preval, var) and - branchCondition(pred, succ, postcond, var) + priorConstraint(pred, succ, preval, var) and + constraintOnBranch(pred, succ, postcond, var) ) } - predicate pruned_edge(UnprunedBasicBlock pred, UnprunedBasicBlock succ) { - exists(ConditionalConstant pre, ConditionalConstant cond | + /** Holds if the edge `pred` -> `succ` should be pruned as it cannot be reached */ + predicate unreachableEdge(UnprunedBasicBlock pred, UnprunedBasicBlock succ) { + exists(Constraint pre, Constraint cond | controllingConditions(pred, succ, pre, cond) and impliesFalse(pre, cond) ) or - pruned_bb(pred) and succ = pred.getASuccessor() + unreachableBB(pred) and succ = pred.getASuccessor() or simply_dead(pred, succ) } + /* Helper for `unreachableEdge` */ + private predicate impliesFalse(Constraint a, Constraint b) { + a.impliesFalse(b) or + a.(ConstrainedByConstant).minValue() > b.(ConstrainedByConstant).maxValue() or + a.(ConstrainedByConstant).maxValue() < b.(ConstrainedByConstant).minValue() + } + /** Holds if edge is simply dead. Stuff like `if False: ...` */ - //private predicate simply_dead(UnprunedBasicBlock pred, UnprunedBasicBlock succ) { constTest(pred.last()) = true and pred.getAFalseSuccessor() = succ or constTest(pred.last()) = false and pred.getATrueSuccessor() = succ } + /* Helper for simply_dead */ private boolean constTest(UnprunedCfgNode node) { exists(ImmutableLiteral lit | result = lit.booleanValue() and lit = node.getNode() @@ -538,7 +565,7 @@ module Pruner { result = constTest(node.(UnprunedNot).getOperand()).booleanNot() } - /** Holds if `var` is blacklisted as having possbily been mutated */ + /** Holds if `var` is blacklisted as having possibly been mutated */ predicate blacklisted(SsaVariable var) { possibly_mutated(var) and not whitelisted(var) } From d7558e8fe5620e183786c254a08ab485e5587541 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Wed, 15 May 2019 15:25:12 +0100 Subject: [PATCH 03/11] Python: Fix CP in pruner and cache to avoid re-evaluation. --- python/ql/src/semmle/python/Flow.qll | 2 +- python/ql/src/semmle/python/Pruning.qll | 21 ++++++++++++--------- 2 files changed, 13 insertions(+), 10 deletions(-) diff --git a/python/ql/src/semmle/python/Flow.qll b/python/ql/src/semmle/python/Flow.qll index bb9f15720dd4..e0d713a72f16 100755 --- a/python/ql/src/semmle/python/Flow.qll +++ b/python/ql/src/semmle/python/Flow.qll @@ -33,7 +33,7 @@ private AstNode toAst(ControlFlowNode n) { */ class ControlFlowNode extends @py_flow_node { - ControlFlowNode() { + cached ControlFlowNode() { not Pruner::unreachable(this) } diff --git a/python/ql/src/semmle/python/Pruning.qll b/python/ql/src/semmle/python/Pruning.qll index 4cc82cb35408..9ffbfb162c43 100644 --- a/python/ql/src/semmle/python/Pruning.qll +++ b/python/ql/src/semmle/python/Pruning.qll @@ -1,5 +1,9 @@ -import python +private import AST +private import Exprs +private import Stmts +private import Import +private import Operations module Pruner { @@ -68,8 +72,6 @@ module Pruner { right.getBasicBlock().dominates(this.getBasicBlock()) } - override Compare getNode() { result = super.getNode() } - } /** A control flow node corresponding to a unary not expression: (`not x`) */ @@ -152,6 +154,7 @@ module Pruner { } private import Comparisons + private import SSA newtype TConstraint = TTruthy(boolean b) { b = true or b = false } @@ -477,7 +480,7 @@ module Pruner { a.getValue() instanceof False and result = false ) or - module_import(var) and result = true + module_import(asgn, var) and result = true } /** Gets the constraint on `var` resulting from the an assignment in `asgn` */ @@ -587,14 +590,14 @@ module Pruner { * truthiness. */ predicate whitelisted(SsaVariable var) { - module_import(var) + module_import(_, var) } - private predicate module_import(SsaVariable var) { - exists(Alias alias, UnprunedCfgNode node | + private predicate module_import(UnprunedCfgNode asgn, SsaVariable var) { + exists(Alias alias | alias.getValue() instanceof ImportExpr and - py_ssa_defn(var, node) and - alias.getAsname() = node.getNode() + py_ssa_defn(var, asgn) and + alias.getAsname() = asgn.getNode() ) } } From f975b8b87df0540c4353fbd5fedd8d57ed799180 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Fri, 17 May 2019 12:15:51 +0100 Subject: [PATCH 04/11] Python: Avoid bad magic. --- python/ql/src/semmle/python/Pruning.qll | 1 + 1 file changed, 1 insertion(+) diff --git a/python/ql/src/semmle/python/Pruning.qll b/python/ql/src/semmle/python/Pruning.qll index 9ffbfb162c43..771007be7c8f 100644 --- a/python/ql/src/semmle/python/Pruning.qll +++ b/python/ql/src/semmle/python/Pruning.qll @@ -496,6 +496,7 @@ module Pruner { } /** Holds if the constraint `preval` holds for `var` on edge `pred` -> `succ` as a result of a prior test or assignment */ + pragma [nomagic] predicate priorConstraint(UnprunedBasicBlock pred, UnprunedBasicBlock succ, Constraint preval, SsaVariable var) { not (blacklisted(var) and preval = TTruthy(_)) and From e1fb1d27a1db8b2402cf10b6ea90d21137609a02 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Thu, 13 Jun 2019 17:26:43 +0100 Subject: [PATCH 05/11] Python: Fix logic in pruning for tests like 'x != 0' as that does not imply that 'x is None' is false. --- python/ql/src/semmle/python/Pruning.qll | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/python/ql/src/semmle/python/Pruning.qll b/python/ql/src/semmle/python/Pruning.qll index 771007be7c8f..a9c60cd5f6cc 100644 --- a/python/ql/src/semmle/python/Pruning.qll +++ b/python/ql/src/semmle/python/Pruning.qll @@ -181,7 +181,7 @@ module Pruner { abstract Constraint invert(); /** Holds if this constraint constrains the "truthiness" of the variable. - * That is, for a variable `var` constrainted by this constraint + * That is, for a variable `var` constrained by this constraint * `bool(var) is value` */ abstract predicate constrainsVariableToBe(boolean value); @@ -341,8 +341,6 @@ module Pruner { value = true and ( this.getOp() = eq() and this.intValue() != 0 or - this.getOp() = ne() and this.intValue() = 0 - or this.getOp() = lt() and this.intValue() <= 0 or this.getOp() = le() and this.intValue() < 0 @@ -355,7 +353,7 @@ module Pruner { override predicate impliesFalse(Constraint other) { exists(boolean b | - this.constrainsVariableToBe(b) and other = TTruthy(b.booleanNot()) + this.constrainsVariableToBe(b) and other.constrainsVariableToBe(b.booleanNot()) ) or this.getOp() = eq() and other = TIsNone(true) From a3d50e88cdbac3d43be993481ae211e34c23e917 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Thu, 13 Jun 2019 17:42:53 +0100 Subject: [PATCH 06/11] Python (pruning): Refactor a bit and all comments for clarity. --- python/ql/src/semmle/python/Exprs.qll | 6 +++--- python/ql/src/semmle/python/Pruning.qll | 19 ++++++++++--------- 2 files changed, 13 insertions(+), 12 deletions(-) diff --git a/python/ql/src/semmle/python/Exprs.qll b/python/ql/src/semmle/python/Exprs.qll index 0b1c4fd3c982..365f19d8af1f 100644 --- a/python/ql/src/semmle/python/Exprs.qll +++ b/python/ql/src/semmle/python/Exprs.qll @@ -466,9 +466,9 @@ class NegativeIntegerLiteral extends ImmutableLiteral, UnaryExpr { } override Object getLiteralObject() { - py_cobjecttypes(result, theIntType()) and py_cobjectnames(result, "-" + this.getOperand().(IntegerLiteral).getN()) - or - py_cobjecttypes(result, theLongType()) and py_cobjectnames(result, "-" + this.getOperand().(IntegerLiteral).getN()) + (py_cobjecttypes(result, theIntType()) or py_cobjecttypes(result, theLongType())) + and + py_cobjectnames(result, "-" + this.getOperand().(IntegerLiteral).getN()) } } diff --git a/python/ql/src/semmle/python/Pruning.qll b/python/ql/src/semmle/python/Pruning.qll index a9c60cd5f6cc..638ae71802ab 100644 --- a/python/ql/src/semmle/python/Pruning.qll +++ b/python/ql/src/semmle/python/Pruning.qll @@ -190,7 +190,7 @@ module Pruner { * same variable. * For example `x > 0` implies that `not bool(x)` is `False`. */ - abstract predicate impliesFalse(Constraint other); + abstract predicate contradicts(Constraint other); } @@ -279,13 +279,14 @@ module Pruner { value = this.booleanValue() } - override predicate impliesFalse(Constraint other) { + override predicate contradicts(Constraint other) { other.constrainsVariableToBe(this.booleanValue().booleanNot()) } } - /** A constraint that the variable is None `(var is None) is True` or not None `(var is None) is False` */ + /** A constraint that the variable is None `(var is None) is True` or not None `(var is None) is False`. + * This includes the `is not` operator, `x is not None` being equivalent to `not x is None` */ class IsNone extends Constraint, TIsNone { private boolean isNone() { @@ -306,7 +307,7 @@ module Pruner { value = false and this.isNone() = true } - override predicate impliesFalse(Constraint other) { + override predicate contradicts(Constraint other) { other = TIsNone(this.isNone().booleanNot()) or this.isNone() = true and other = TTruthy(true) @@ -351,7 +352,7 @@ module Pruner { ) } - override predicate impliesFalse(Constraint other) { + override predicate contradicts(Constraint other) { exists(boolean b | this.constrainsVariableToBe(b) and other.constrainsVariableToBe(b.booleanNot()) ) @@ -481,7 +482,7 @@ module Pruner { module_import(asgn, var) and result = true } - /** Gets the constraint on `var` resulting from the an assignment in `asgn` */ + /** Gets the constraint on `var` resulting from the assignment in `asgn` */ Constraint constraintFromAssignment(SsaVariable var, UnprunedBasicBlock asgn) { exists(CompareOp op, int k | int_assignment(asgn.getANode(), var, op, k) and @@ -536,7 +537,7 @@ module Pruner { predicate unreachableEdge(UnprunedBasicBlock pred, UnprunedBasicBlock succ) { exists(Constraint pre, Constraint cond | controllingConditions(pred, succ, pre, cond) and - impliesFalse(pre, cond) + contradicts(pre, cond) ) or unreachableBB(pred) and succ = pred.getASuccessor() @@ -545,8 +546,8 @@ module Pruner { } /* Helper for `unreachableEdge` */ - private predicate impliesFalse(Constraint a, Constraint b) { - a.impliesFalse(b) or + private predicate contradicts(Constraint a, Constraint b) { + a.contradicts(b) or a.(ConstrainedByConstant).minValue() > b.(ConstrainedByConstant).maxValue() or a.(ConstrainedByConstant).maxValue() < b.(ConstrainedByConstant).minValue() } From 00fa80346b5686968647d694b99e60e452bb7086 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Thu, 13 Jun 2019 18:20:15 +0100 Subject: [PATCH 07/11] Python (pruning): Fix up handling of integer inequality. --- python/ql/src/semmle/python/Pruning.qll | 23 +++++++++++-------- .../ControlFlow/pruning/test.expected | 3 +++ .../library-tests/ControlFlow/pruning/test.py | 19 +++++++++++++++ 3 files changed, 35 insertions(+), 10 deletions(-) diff --git a/python/ql/src/semmle/python/Pruning.qll b/python/ql/src/semmle/python/Pruning.qll index 638ae71802ab..6ef8aa4f110e 100644 --- a/python/ql/src/semmle/python/Pruning.qll +++ b/python/ql/src/semmle/python/Pruning.qll @@ -162,11 +162,7 @@ module Pruner { TIsNone(boolean b) { b = true or b = false } or TConstrainedByConstant(CompareOp op, int k) { - exists(Compare comp, Cmpop cop, IntegerLiteral l | - comp.compares(_, cop, l) and - l.getValue() = k and - op.forOp(cop) - ) + int_test(_, _, op, k) or exists(Assign a | a.getValue().(IntegerLiteral).getValue() = k) and op = eq() } @@ -416,7 +412,7 @@ module Pruner { ) } - private Constraint constraintFromTest(SsaVariable var, UnprunedCfgNode node) { + Constraint constraintFromTest(SsaVariable var, UnprunedCfgNode node) { py_ssa_use(node, var) and result = TTruthy(true) or exists(boolean b | @@ -443,13 +439,20 @@ module Pruner { ) } - predicate int_test(UnprunedCompareNode test, SsaVariable var, CompareOp op, int k) { + predicate int_test(UnprunedCfgNode test, SsaVariable var, CompareOp op, int k) { exists(UnprunedCfgNode left, UnprunedCfgNode right, Cmpop cop | + test.(UnprunedCompareNode).operands(left, cop, right) + | + op.forOp(cop) and py_ssa_use(left, var) and - test.operands(left, cop, right) and - right.getNode().(IntegerLiteral).getValue() = k and - op.forOp(cop) + right.getNode().(IntegerLiteral).getValue() = k + or + op.reverse().forOp(cop) and + py_ssa_use(right, var) and + left.getNode().(IntegerLiteral).getValue() = k ) + or + int_test(test.(UnprunedNot).getOperand(), var, op.invert(), k) } predicate int_assignment(UnprunedCfgNode asgn, SsaVariable var, CompareOp op, int k) { diff --git a/python/ql/test/library-tests/ControlFlow/pruning/test.expected b/python/ql/test/library-tests/ControlFlow/pruning/test.expected index bab31184e101..77cfc8e68749 100644 --- a/python/ql/test/library-tests/ControlFlow/pruning/test.expected +++ b/python/ql/test/library-tests/ControlFlow/pruning/test.expected @@ -50,3 +50,6 @@ | 186 | 1 | | 189 | 1 | | 192 | 1 | +| 198 | 0 | +| 204 | 0 | +| 210 | 0 | diff --git a/python/ql/test/library-tests/ControlFlow/pruning/test.py b/python/ql/test/library-tests/ControlFlow/pruning/test.py index 4b5915f94884..1e43090e8d7d 100644 --- a/python/ql/test/library-tests/ControlFlow/pruning/test.py +++ b/python/ql/test/library-tests/ControlFlow/pruning/test.py @@ -190,3 +190,22 @@ def func2(): except IOError: true12 = 0 count + +def inequality1(x): + if x < 4: + return + if x < 4: + count + +def inequality2(x): + if x < 4: + return + if not x >= 4: + count + +def reversed_inequality(x): + if x < 4: + return + if 4 > x: + count + From 1d269b0cd54c64b183a6a1751fd2f74b56cb7ebf Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Fri, 14 Jun 2019 09:59:28 +0100 Subject: [PATCH 08/11] Python: Add extra test for pruning. --- .../ControlFlow/pruning/Constraint.expected | 107 ++++++++++++++++++ .../ControlFlow/pruning/Constraint.ql | 10 ++ 2 files changed, 117 insertions(+) create mode 100644 python/ql/test/library-tests/ControlFlow/pruning/Constraint.expected create mode 100644 python/ql/test/library-tests/ControlFlow/pruning/Constraint.ql diff --git a/python/ql/test/library-tests/ControlFlow/pruning/Constraint.expected b/python/ql/test/library-tests/ControlFlow/pruning/Constraint.expected new file mode 100644 index 000000000000..975f3efdcde1 --- /dev/null +++ b/python/ql/test/library-tests/ControlFlow/pruning/Constraint.expected @@ -0,0 +1,107 @@ +| 8 | test | test | Truthy | +| 10 | test | test | Truthy | +| 14 | seq | seq | Truthy | +| 16 | seq | seq | Truthy | +| 17 | seq | seq | Truthy | +| 21 | UnaryExpr | t1 | Falsey | +| 21 | t1 | t1 | Truthy | +| 24 | t1 | t1 | Truthy | +| 25 | t1 | t1 | Truthy | +| 26 | t2 | t2 | Truthy | +| 29 | t2 | t2 | Truthy | +| 30 | t2 | t2 | Truthy | +| 31 | t3 | t3 | Truthy | +| 31 | t4 | t4 | Truthy | +| 32 | t3 | t3 | Truthy | +| 33 | t3 | t3 | Truthy | +| 34 | t3 | t3 | Truthy | +| 35 | t4 | t4 | Truthy | +| 36 | t5 | t5 | Truthy | +| 36 | t6 | t6 | Truthy | +| 37 | t5 | t5 | Truthy | +| 38 | t5 | t5 | Truthy | +| 39 | t6 | t6 | Truthy | +| 40 | t6 | t6 | Truthy | +| 43 | t1 | t1 | Truthy | +| 44 | UnaryExpr | t2 | Falsey | +| 44 | t2 | t2 | Truthy | +| 47 | t1 | t1 | Truthy | +| 48 | t2 | t2 | Truthy | +| 49 | t2 | t2 | Truthy | +| 51 | t2 | t2 | Truthy | +| 52 | t2 | t2 | Truthy | +| 55 | seq1 | seq1 | Truthy | +| 57 | UnaryExpr | seq2 | Falsey | +| 57 | seq2 | seq2 | Truthy | +| 60 | seq1 | seq1 | Truthy | +| 62 | seq1 | seq1 | Truthy | +| 63 | seq2 | seq2 | Truthy | +| 65 | seq2 | seq2 | Truthy | +| 66 | seq3 | seq3 | Truthy | +| 68 | UnaryExpr | seq4 | Falsey | +| 68 | seq4 | seq4 | Truthy | +| 71 | seq3 | seq3 | Truthy | +| 73 | var | var | Truthy | +| 74 | seq4 | seq4 | Truthy | +| 76 | var | var | Truthy | +| 78 | seq5 | seq5 | Truthy | +| 80 | seq5 | seq5 | Truthy | +| 81 | seq5 | seq5 | Truthy | +| 83 | var | var | Truthy | +| 88 | UnaryExpr | x | Falsey | +| 88 | x | x | Truthy | +| 89 | Exception | Exception | Truthy | +| 90 | y | y | Truthy | +| 91 | Exception | Exception | Truthy | +| 92 | make_a_call | make_a_call | Truthy | +| 93 | UnaryExpr | x | Falsey | +| 93 | x | x | Truthy | +| 94 | count | count | Truthy | +| 95 | y | y | Truthy | +| 96 | count | count | Truthy | +| 101 | make_a_call | make_a_call | Truthy | +| 102 | UnaryExpr | another_module | Falsey | +| 102 | another_module | another_module | Truthy | +| 103 | count | count | Truthy | +| 107 | UnaryExpr | t1 | Falsey | +| 107 | t1 | t1 | Truthy | +| 109 | t2 | t2 | Truthy | +| 111 | t1 | t1 | Truthy | +| 113 | UnaryExpr | t2 | Falsey | +| 113 | t2 | t2 | Truthy | +| 117 | UnaryExpr | test | Falsey | +| 117 | test | test | Truthy | +| 119 | UnaryExpr | test | Falsey | +| 119 | test | test | Truthy | +| 123 | m | m | Truthy | +| 125 | m | m | Truthy | +| 126 | m | m | Truthy | +| 158 | Compare | ps | Is not None | +| 158 | ps | ps | Truthy | +| 159 | ps | ps | Truthy | +| 160 | Compare | ps | Is None | +| 160 | ps | ps | Truthy | +| 171 | __name__ | __name__ | Truthy | +| 172 | None | None | Truthy | +| 174 | func | func | Truthy | +| 175 | Exception | Exception | Truthy | +| 176 | count | count | Truthy | +| 177 | Compare | escapes | Is None | +| 177 | None | None | Truthy | +| 177 | escapes | escapes | Truthy | +| 178 | count | count | Truthy | +| 180 | count | count | Truthy | +| 188 | true12 | true12 | Truthy | +| 195 | Compare | x | < 4 | +| 195 | x | x | Truthy | +| 197 | Compare | x | < 4 | +| 197 | x | x | Truthy | +| 201 | Compare | x | < 4 | +| 201 | x | x | Truthy | +| 203 | Compare | x | >= 4 | +| 203 | UnaryExpr | x | < 4 | +| 203 | x | x | Truthy | +| 207 | Compare | x | < 4 | +| 207 | x | x | Truthy | +| 209 | Compare | x | < 4 | +| 209 | x | x | Truthy | diff --git a/python/ql/test/library-tests/ControlFlow/pruning/Constraint.ql b/python/ql/test/library-tests/ControlFlow/pruning/Constraint.ql new file mode 100644 index 000000000000..22e10fc51f5f --- /dev/null +++ b/python/ql/test/library-tests/ControlFlow/pruning/Constraint.ql @@ -0,0 +1,10 @@ + +import python + +import semmle.python.Pruning + +from Pruner::Constraint c, SsaVariable var, Pruner::UnprunedCfgNode node, int line +where c = Pruner::constraintFromTest(var, node) and line = node.getNode().getLocation().getStartLine() and +line > 0 +select line, node.getNode().toString(), var.getId(), c + From f29dfa5112f746d495e1d5fa7c99c020d92b0fcb Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Fri, 14 Jun 2019 16:57:32 +0100 Subject: [PATCH 09/11] Python: Add clarifying comment and pragma. --- python/ql/src/semmle/python/Pruning.qll | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/python/ql/src/semmle/python/Pruning.qll b/python/ql/src/semmle/python/Pruning.qll index 6ef8aa4f110e..b90e06ffbb88 100644 --- a/python/ql/src/semmle/python/Pruning.qll +++ b/python/ql/src/semmle/python/Pruning.qll @@ -548,7 +548,8 @@ module Pruner { simply_dead(pred, succ) } - /* Helper for `unreachableEdge` */ + /* Helper for `unreachableEdge`, deal with inequalities here to avoid blow up */ + pragma [inline] private predicate contradicts(Constraint a, Constraint b) { a.contradicts(b) or a.(ConstrainedByConstant).minValue() > b.(ConstrainedByConstant).maxValue() or From 30f2df8ac461b74c7700c4839bedb11d35794318 Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Wed, 19 Jun 2019 13:09:20 +0100 Subject: [PATCH 10/11] Python: Refactor pruning to be more clearly symmetric and complete. --- python/ql/src/semmle/python/Pruning.qll | 54 +++++++++++++++---------- 1 file changed, 32 insertions(+), 22 deletions(-) diff --git a/python/ql/src/semmle/python/Pruning.qll b/python/ql/src/semmle/python/Pruning.qll index b90e06ffbb88..a433713327ec 100644 --- a/python/ql/src/semmle/python/Pruning.qll +++ b/python/ql/src/semmle/python/Pruning.qll @@ -182,11 +182,8 @@ module Pruner { */ abstract predicate constrainsVariableToBe(boolean value); - /** Holds if this constraint implies that `other` cannot hold for the - * same variable. - * For example `x > 0` implies that `not bool(x)` is `False`. - */ - abstract predicate contradicts(Constraint other); + /** Holds if this constraint implies that `other` cannot be `None` */ + abstract predicate cannotBeNone(); } @@ -275,8 +272,8 @@ module Pruner { value = this.booleanValue() } - override predicate contradicts(Constraint other) { - other.constrainsVariableToBe(this.booleanValue().booleanNot()) + override predicate cannotBeNone() { + this.booleanValue() = true } } @@ -303,10 +300,8 @@ module Pruner { value = false and this.isNone() = true } - override predicate contradicts(Constraint other) { - other = TIsNone(this.isNone().booleanNot()) - or - this.isNone() = true and other = TTruthy(true) + override predicate cannotBeNone() { + this = TIsNone(false) } } @@ -348,15 +343,16 @@ module Pruner { ) } - override predicate contradicts(Constraint other) { - exists(boolean b | - this.constrainsVariableToBe(b) and other.constrainsVariableToBe(b.booleanNot()) - ) - or - this.getOp() = eq() and other = TIsNone(true) - or - this.getOp() = ne() and other.(ConstrainedByConstant).getOp() = eq() - and this.intValue() = other.(ConstrainedByConstant).intValue() + predicate eq(int val) { + this = TConstrainedByConstant(eq(), val) + } + + predicate ne(int val) { + this = TConstrainedByConstant(ne(), val) + } + + override predicate cannotBeNone() { + this.getOp() = eq() } /** The minimum value that a variable fulfilling this constraint may hold @@ -551,9 +547,23 @@ module Pruner { /* Helper for `unreachableEdge`, deal with inequalities here to avoid blow up */ pragma [inline] private predicate contradicts(Constraint a, Constraint b) { - a.contradicts(b) or - a.(ConstrainedByConstant).minValue() > b.(ConstrainedByConstant).maxValue() or + a = TIsNone(true) and b.cannotBeNone() + or + a.cannotBeNone() and b = TIsNone(true) + or + a.constrainsVariableToBe(true) and b.constrainsVariableToBe(false) + or + a.constrainsVariableToBe(false) and b.constrainsVariableToBe(true) + or + a.(ConstrainedByConstant).minValue() > b.(ConstrainedByConstant).maxValue() + or a.(ConstrainedByConstant).maxValue() < b.(ConstrainedByConstant).minValue() + or + exists(int val | + a.(ConstrainedByConstant).eq(val) and b.(ConstrainedByConstant).ne(val) + or + a.(ConstrainedByConstant).ne(val) and b.(ConstrainedByConstant).eq(val) + ) } /** Holds if edge is simply dead. Stuff like `if False: ...` */ From 2040b010f43ad42ec7efe0346cd0aeee0398d26d Mon Sep 17 00:00:00 2001 From: Mark Shannon Date: Wed, 19 Jun 2019 14:02:24 +0100 Subject: [PATCH 11/11] Python: Clarify qldoc. --- python/ql/src/semmle/python/Pruning.qll | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/ql/src/semmle/python/Pruning.qll b/python/ql/src/semmle/python/Pruning.qll index a433713327ec..8bcd4c669512 100644 --- a/python/ql/src/semmle/python/Pruning.qll +++ b/python/ql/src/semmle/python/Pruning.qll @@ -182,7 +182,7 @@ module Pruner { */ abstract predicate constrainsVariableToBe(boolean value); - /** Holds if this constraint implies that `other` cannot be `None` */ + /** Holds if the value constrained by this constraint cannot be `None` */ abstract predicate cannotBeNone(); }