diff --git a/.gitignore b/.gitignore index 0e7cf58d..c910039f 100644 --- a/.gitignore +++ b/.gitignore @@ -18,5 +18,6 @@ docs/_build # Logfiles generated during the selftest suite: *.c.cpychecker-log.txt *.c.*-refcount-errors.html +*.cc.*-refcount-errors.html *.c.*-refcount-traces.html tests/plugin/dumpfiles/input.c.*t.test-pass diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index 663ca66a..00000000 --- a/.travis.yml +++ /dev/null @@ -1,7 +0,0 @@ -language: c -compiler: - - gcc -script: make -before_install: - - sudo apt-get update -qq - - sudo apt-get install -qq gcc-4.6-plugin-dev python-six python-pygments graphviz diff --git a/Makefile b/Makefile index efbb5005..0e01f475 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,5 @@ -# Copyright 2011, 2012 David Malcolm -# Copyright 2011, 2012 Red Hat, Inc. +# Copyright 2011, 2012, 2013 David Malcolm +# Copyright 2011, 2012, 2013 Red Hat, Inc. # # This is free software: you can redistribute it and/or modify it # under the terms of the GNU General Public License as published by @@ -176,6 +176,26 @@ debug: plugin demo: plugin $(srcdir)./gcc-with-cpychecker -c $(PYTHON_INCLUDES) demo.c +# Demos of the sm code: +demo-sm-taint: plugin + $(srcdir)./gcc-with-sm \ + sm/checkers/taint.sm \ + tests/sm/checkers/taint/example/input.c + +demo-sm-lto: plugin + $(srcdir)./gcc-with-sm \ + sm/checkers/malloc_checker.sm \ + -flto -flto-partition=none \ + tests/sm/lto/*.c + +demo-sm-lemon-timing: plugin + $(srcdir)./gcc-with-sm \ + --enable-profile \ + --enable-timing \ + sm/checkers/malloc_checker.sm \ + -D=_U_ \ + slow-tests/sm/integration/lemon/input.c + json-examples: plugin $(srcdir)./gcc-with-cpychecker -I/usr/include/python2.7 libcpychecker/html/test/example1/bug.c diff --git a/docs/getting-involved.rst b/docs/getting-involved.rst index 47656fe4..f9d10429 100644 --- a/docs/getting-involved.rst +++ b/docs/getting-involved.rst @@ -353,6 +353,9 @@ and the corresponding Python objects. More information can be seen in `run-test-suite.py` +By default, `run-test-suite.py` will invoke all the tests. You can pass it +a list of paths and it run all tests found in those paths and below. + You can generate the "gold" stdout.txt by hacking up this line in run-test-suite.py:: @@ -365,6 +368,28 @@ this to take effect though. Unfortunately, this approach over-specifies the selftests, making them rather "brittle". Improvements to this approach would be welcome. +To directly see the GCC command line being invoked for each test, and to see +the resulting stdout and stderr, add `--show` to the arguments of +`run-test-suite.py`. + +For example:: + + $ python run-test-suite.py tests/plugin/diagnostics --show + tests/plugin/diagnostics: gcc -c -o tests/plugin/diagnostics/output.o -fplugin=/home/david/coding/gcc-python-plugin/python.so -fplugin-arg-python-script=tests/plugin/diagnostics/script.py -Wno-format tests/plugin/diagnostics/input.c + tests/plugin/diagnostics/input.c: In function 'main': + tests/plugin/diagnostics/input.c:23:1: error: this is an error (with positional args) + tests/plugin/diagnostics/input.c:23:1: error: this is an error (with keyword args) + tests/plugin/diagnostics/input.c:25:1: warning: this is a warning (with positional args) [-Wdiv-by-zero] + tests/plugin/diagnostics/input.c:25:1: warning: this is a warning (with keyword args) [-Wdiv-by-zero] + tests/plugin/diagnostics/input.c:23:1: error: a warning with some embedded format strings %s and %i + tests/plugin/diagnostics/input.c:25:1: warning: this is an unconditional warning [enabled by default] + tests/plugin/diagnostics/input.c:25:1: warning: this is another unconditional warning [enabled by default] + expected error was found: option must be either None, or of type gcc.Option + tests/plugin/diagnostics/input.c:23:1: note: This is the start of the function + tests/plugin/diagnostics/input.c:25:1: note: This is the end of the function + OK + 1 success; 0 failures; 0 skipped + Documentation ============= diff --git a/docs/index.rst b/docs/index.rst index c54ee801..765427c4 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -39,6 +39,7 @@ Contents: callbacks.rst attributes.rst cpychecker.rst + sm.rst success.rst getting-involved.rst misc.rst diff --git a/docs/lto.rst b/docs/lto.rst new file mode 100644 index 00000000..aa8b9438 --- /dev/null +++ b/docs/lto.rst @@ -0,0 +1,94 @@ +.. Copyright 2012 David Malcolm + Copyright 2012 Red Hat, Inc. + + This is free software: you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + . + +Whole-program Analysis via Link-Time Optimization (LTO) +======================================================= +You can enable GCC's "link time optimization" feature by passing `-flto`. + +When this is enabled, gcc adds extra sections to the compiled .o file +containing the SSA-Gimple internal representation of every function, so that +this SSA representation is available at link-time. This allows gcc to inline +functions defined in one source file into functions defined in another +source file at link time. + +Although the feature is intended for optimization, we can also use it for +code analysis, and it's possible to run the Python plugin at link time. + +This means we can do interprocedural analysis across multiple source files. + +.. warning:: Running a gcc plugin from inside link-time optimization is + rather novel, and you're more likely to run into bugs. See e.g. + http://gcc.gnu.org/bugzilla/show_bug.cgi?id=54962 + +An invocation might look like this: + +.. code-block:: bash + + gcc \ + -flto \ + -flto-partition=none \ + -v \ + -fplugin=PATH/TO/python.so \ + -fplugin-arg-python-script=PATH/TO/YOUR/SCRIPT.py \ + INPUT-1.c \ + INPUT-2.c \ + ... + INPUT-n.c + +Looking at the above options in turn: + + * `-flto` enables link-time optimization + + * `-flto-partition=none` : by default, gcc with LTO partitions the code + and generates summary information for each partition, then combines the + results of the summaries (known as "WPA" and "LTRANS" respectively). + This appears to be of use for optimization, but to get at the function + bodies, for static analysis, you should pass this option, which instead + gathers all the code into one process. + + * `-v` means "verbose" and is useful for seeing all of the subprograms + that gcc invokes, along with their command line options. Given the + above options, you should see invocations of `cc1` (the C compiler), + `collect2` (the linker) and `lto1` (the link-time optimizer). + +For example, + +.. code-block:: bash + + $ ./gcc-with-python \ + examples/show-lto-supergraph.py \ + -flto \ + -flto-partition=none \ + tests/sm/lto/input-*.c + +will render a bitmap of the supergraph like this: + + .. figure:: sample-supergraph.png + :scale: 50 % + :alt: image of a supergraph + +.. py:function:: gcc.is_lto() + + :rtype: bool + + Determine whether or not we're being invoked during link-time + optimization (i.e. from within the `lto1` program) + + .. warning:: The underlying boolean is not set up until passes are being + invoked: it is always `False` during the initial invocation of the + Python script. + diff --git a/docs/misc.rst b/docs/misc.rst index 717e3bea..2c1d416b 100644 --- a/docs/misc.rst +++ b/docs/misc.rst @@ -25,6 +25,7 @@ scripts. .. toctree:: callgraph.rst + lto.rst options.rst parameters.rst preprocessor.rst diff --git a/docs/passes.rst b/docs/passes.rst index c8711ac7..b2bf853e 100644 --- a/docs/passes.rst +++ b/docs/passes.rst @@ -178,7 +178,7 @@ Here's an example:: # ...and wire it up, after the "cfg" pass: my_pass.register_after('cfg') -For :py:class:`gcc.GimplePass` and :py:class:`gcc.IpaPass`, the signatures of +For :py:class:`gcc.GimplePass` and :py:class:`gcc.RtlPass`, the signatures of `gate` and `execute` are: .. method:: gate(self, fun) @@ -192,6 +192,12 @@ of `gate` and `execute` are: .. method:: gate(self) .. method:: execute(self) +.. warning:: + + Unfortunately it doesn't appear to be possible to implement `gate()` for + `gcc.IpaPass` yet; for now, the `gate()` method on such passes will not be + called. See http://gcc.gnu.org/bugzilla/show_bug.cgi?id=54959 + If an unhandled exception is raised within `gate` or `execute`, it will lead to a GCC error: diff --git a/docs/sample-supergraph.png b/docs/sample-supergraph.png new file mode 100644 index 00000000..ba542578 Binary files /dev/null and b/docs/sample-supergraph.png differ diff --git a/docs/sm.rst b/docs/sm.rst new file mode 100644 index 00000000..f92c6955 --- /dev/null +++ b/docs/sm.rst @@ -0,0 +1,806 @@ +.. Copyright 2012, 2013 David Malcolm + Copyright 2012, 2013 Red Hat, Inc. + + This is free software: you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + . + +.. _sm: + +.. we'll use + :language: c + within the markup, though that's obviously not the real language. + +Usage example: state machine checker +==================================== + +The state machine checker provides a domain specific language that enables +you to add new warnings to GCC. The language is currently known as "sm" + +Programs in the language express simple state machines: each item of data +in the source being analyzed can have a state associated with it. When +the code matches certain patterns, an item of data can potentially +transition to another state, or a fragment of handler code can be invoked, +typically to emit an error message. + +You can use this to write simple scripts that express the rules of an API. + +Examples +-------- + +Checking malloc usage +^^^^^^^^^^^^^^^^^^^^^ + +.. literalinclude:: ../sm/checkers/malloc_checker.sm + :language: c + +.. (not the real language) + + +Example script: checking sizes of allocated data +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. literalinclude:: ../sm/checkers/sizeof_allocation.sm + :language: c + + +Example script: checking for tainted data +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. literalinclude:: ../sm/checkers/taint.sm + :language: c + +Example script: detecting return of pointers to the stack +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +http://cwe.mitre.org/data/definitions/562.html + +.. literalinclude:: ../sm/checkers/points_to_stack.sm + :lines: 20- + :language: c + +Invoking the checker +-------------------- + +``gcc-with-sm`` is a wrapper script which invokes GCC along with any .sm +files that are passed to it: + +.. code-block:: bash + + ./gcc-with-sm \ + [PATHS-TO-sm-FILES] + [NORMAL GCC ARGUMENTS] + +For example, ``make demo-sm-taint`` in the plugin's `Makefile` invokes this code: + +.. code-block:: bash + + ./gcc-with-sm \ + sm/checkers/taint.sm \ + tests/sm/checkers/taint/example/input.c + +running the "taint.sm" script on this input file: + +.. literalinclude:: ../tests/sm/checkers/taint/example/input.c + :lines: 20- + :language: c + +The following warnings are generated: + +.. literalinclude:: ../tests/sm/checkers/taint/example/stderr.txt + :language: c + +.. FIXME: what about internal API? +.. FIXME: what about rpm builds? + +Command-line options to `gcc-with-sm` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. option:: --enable-timing + + If set, `gcc-with-sm` will emit timing information to stderr + +.. option:: --enable-profile + + If set, use CPython's cProfile module to generate a profile + of the activity for each checker. The top 20 longest functions calls + (cumulatively) will be emitted to stdout, and a profile will be + written to a file for each checker that was run, suitable for viewing + e.g. by RunSnakeRun + + Given an input file "foo.c" and checkers "bar" and "baz", it will + write out files: + + foo.c.bar.sm-profile + + foo.c.baz.sm-profile + + +Syntax +------ +The language resembles C with embedded fragments of Python and some other +syntactic elements. + +Identifiers follow the same rules as both Python and C: a letter or +underscore, followed by zero or more letters, numbers or underscores. + +Case is significant. + +Reserved words, which can't be used as identifiers: + * sm + * decl + * stateful + * true, false: + * any_pointer, any_variable, any_expr + * pat + +Fragments of Python are enclosed in pairs of braces e.g.:: + + {{ error("%s called with NULL as 1st argument" % fn) }} + +Such Python fragments can have arbitrary amounts of leading whitespace, so +long as nothing is indented less that the first non-whitespace line. For +example, within this pattern-match rule: + +.. code-block:: c + + ptr.null: + { *ptr } + => {{ + # This fragment of Python code starts in column 10 + # and so that is treated as the left margin for Python + # indentation purposes + pass + }}; + +Whitespace is ignored elsewhere in the script. + +C-style comments can occur anywhere except within Python fragments, and are ignored: + +.. code-block:: c + + ptr.null: + { *ptr } + => + /* This is a C-style comment */ + {{ error("dereference of NULL pointer %s" % ptr) }} + + +Top-level structures +^^^^^^^^^^^^^^^^^^^^ +A .sm script consists of one or more sm checkers. + +A checker is declared with "sm" and a name, and the content is enclosed in braces: + +.. code-block:: c + + sm my_first_checker { + } + + sm another_checker { + } + + sm yet_another_checker { + } + +Within a checker are four types of high-level clause: + + * declarations of expressions that can be matched on in C code + + * named patterns, describing a rule for pattern-matching code and giving + it a name + + * "global" fragments of Python code, enclosed in double-braces, for use + in creating helper functions + + * pattern-matching rules, expressing patterns to detect when data is in + a particular state, and what to do when the pattern is encountered + +For example: + +.. code-block:: c + + sm example_checker { + /* Here are some declarations: */ + stateful decl any_pointer ptr; + decl any_expr x; + + /* Here is a named pattern: */ + pat deref { *ptr } | { ptr[x] }; + + /* Here is a global fragment of Python code: */ + {{ + # Helper function for a checker that can only be run on + # constant integer expressions: + def is_known_int(var): + import gcc + return isinstance(var.gccexpr, gcc.IntegerCst) + }} + + /* Here are some pattern-matching rules: */ + ptr.*: + { ptr = malloc() } => ptr.unchecked; + + ptr.*: + { ptr = 0 } => ptr.null; + + ptr.unchecked, ptr.null, ptr.nonnull: + { ptr == 0 } => true=ptr.null, false=ptr.nonnull + | { ptr != 0 } => true=ptr.nonnull, false=ptr.null + ; + + ptr.unchecked: + { *ptr } + => {{ + error('use of possibly-NULL pointer %s' % ptr, + # "CWE-690: Unchecked Return Value to NULL Pointer Dereference" + cwe='CWE-690') + }}, ptr.nonnull; + + } + + +Declarations +^^^^^^^^^^^^ +Declarations describe elements that may be used when pattern matching +fragments of C code. They are of the form:: + + decl TYPE NAME; + +where TYPE can be one of `any_expr`, `any_pointer` or `any_variable`, and +NAME is an identifier. + +One of the declarations should be prepended with "stateful", indicating that +this is the expression whose state is being tracked. + +Examples: + + .. code-block:: c + + stateful decl any_pointer ptr; + decl any_expr x; + + +Named patterns +^^^^^^^^^^^^^^ +Named patterns describe a rule for pattern-matching code and give it a name +(which must follow the rules for identifiers given above). This allows you +to abstract away some of the inner details of a match, and avoid repeating +yourself. + +Examples: + +.. code-block:: c + + /* Patterns that detect upper-bound and lower-bound checks: */ + pat check_ub { x < y } | { x <= y }; + pat check_lb { x > y } | { x >= y }; + pat check_eq { x == y }; + pat check_ne { x != y }; + + +Pattern-matching rules +^^^^^^^^^^^^^^^^^^^^^^ +A pattern-matching rule describes a set of states in which to search for +the pattern, the coding pattern to search for, and the outcome when such a +pattern is encountered when the relevant expressions are in the given state. + +For example: + +.. code-block:: c + + ptr.unknown, ptr.null, ptr.nonnull: + { ptr == 0 } => true=ptr.null, false=ptr.nonnull + | { ptr != 0 } => true=ptr.nonnull, false=ptr.null + ; + +States +****** + +The states are a comma-separated list of one of more state names, followed +by a colon, specifying in which states the rule should be run: + +.. code-block:: c + + ptr.unknown, ptr.null, ptr.nonnull: + +State names are either identifiers or two identifiers with a period between +them. + +You can also use a "*" character as a wildcard to indicate that a match can +happen in all states: + +.. code-block:: c + + ptr.*: + +The start state has the name of the stateful decl with ".start" appended. +For example, given a stateful decl named ptr: + +.. code-block:: c + + stateful decl any_pointer ptr; + +all pointers implicitly start in the state "ptr.start". + +.. note:: + + Currently there is no additional significance to these two forms of state + name, though I have followed the convention of making the first part of + the state name match that of the stateful declaration). + +Patterns +******** + +After the list of state names are the patterns and their outcomes, separated +by an ASCII-art arrow ("=>"). + +You can supply more than one pattern/outcome pair for a list of states, +separating them with the vertical bar character "|". + +The pattern/outcome list should be terminated by a semicolon (even if +there's just one pattern/outcome pair. + +.. code-block:: c + + { ptr == 0 } => true=ptr.null, false=ptr.nonnull + | { ptr != 0 } => true=ptr.nonnull, false=ptr.null + ; + +The matching pattern can take various forms. It can be a fragment of C code +enclosed in braces: + +.. code-block:: c + + /* Assignments: */ + { q = 0 } + { str = "hello world" } + { a = b } + + /* Assignment to address of another variable: */ + { p = &q } + + /* Return statement: */ + { return p } + { return 42 } + { return "foo" } + { return } + + /* Invocation of a specific named function: */ + { ptr = malloc(sz) } + { free(ptr) } + + /* Comparison of a declaration against a value: */ + { ptr == 0 } + { ptr != 0 } + { a < b } + { a <= b } + { a > b } + { a >= b } + + /* Dereference of a pointer: */ + { *ptr } + + /* Array access: */ + { arr[x] } + + /* Any usage of a value: */ + { ptr } + +You can also use named patterns to avoid repetition: + +.. code-block:: c + + pat check_ub { x < y } | { x <= y }; + pat check_lb { x > y } | { x >= y }; + + x.has_lb: + check_ub => true=x.ok + | check_lb => false=x.ok + ; + x.has_ub: + check_ub => false=x.ok + | check_lb => true=x.ok + ; + +You can supply more than one pattern, separating them with vertical bar +characters ("|") to signify "or": + +.. code-block:: c + + { x < y } | { x <= y } + + +There are also special patterns, referenced by wrapping them in a pair +of dollar characters: + +$arg_must_not_be_null$ +$$$$$$$$$$$$$$$$$$$$$$ + +This special pattern matches whenever a function marked with `GCC's nonnull +attribute `_ +is called, for each parameter so marked: + +.. code-block:: c + + ptr.null: + $arg_must_not_be_null$ + => {{ + error('NULL pointer %s passed as argument %i to %s', + % (ptr, argnumber, function), + # "CWE-690: Unchecked Return Value to NULL Pointer Dereference" + cwe='CWE-690') + }}; + +The following variables are set up as locals for use by the Python code: + + * argindex: (int) the index of the argument that was matched (0-based) + + * argnumber: (int) the number of the argument that was matched (1-based) + + * function: (:py:class:`gcc.FunctionDecl`): the function that was called + + * parameter: (:py:class:`gcc.ParmDecl`): the parameter that was matched. + This is only set for functions whose *definitions* are available (as + opposed to merely the *declaration*) + +Outcomes +******** +The outcome of a pattern match is generally either a transition to a given +state, or a fragment of Python code. + +You can provide a state name, in which case the value matching the stateful +declaration will transition to that state: + +.. code-block:: c + + /* Example of transitioning to named state "ptr.unchecked" */ + ptr.*: + { ptr = malloc() } => ptr.unchecked; + +An outcome for a conditional can be guarded with "true" or "false": the +outcome will only be taken for the relevant value of the conditional. + +For example, in this pattern rule, x will only transition to the state +"x.ok" along the path in which x equalled a given value. It won't change +state along the "not equal" path: + +.. code-block:: c + + /* Example of an outcome guarded by "true=" */ + x.tainted, x.has_lb, x.has_ub: + { x == a } => true=x.ok + +You can provide more than one outcome, separated by commas: + +.. code-block:: c + + /* Example of outcomes guarded by "true=" and "false=" */ + x.tainted: + { x < y } => true=x.has_ub, false=x.has_lb + +All applicable outcomes are run in sequence, so that you can have both a +Python fragment and a named state: + +.. code-block:: c + + /* + Example of both a Python outcome (to issue an error), and a + transition to a named state (since we only want to warn about the first + such dereference) + */ + ptr.unchecked: + { *ptr } + => {{ + error('use of possibly-NULL pointer %s' % ptr, + # "CWE-690: Unchecked Return Value to NULL Pointer Dereference" + cwe='CWE-690') + }}, ptr.nonnull; + +Python API +---------- + +You can embed Python in two ways within an sm file: within a top-level clause +in the checker, and as an outcome when a pattern is matched: + +.. code-block:: c + + sm example_checker { + stateful decl any_pointer ptr; + + {{ + # Example of top-level Python. This will be run once when the + # checker starts, and can be use for defining helper functions: + def some_helper_function(a): + pass + }} + + ptr.null: + { *ptr } => + {{ + # Example of a Python fragment used when a pattern is matched + # in a particular state: + if some_helper_function(ptr): + error("dereference of NULL pointer %s" % ptr) + }}; + } + +When a python fragment is called, the locals() will contain values for the +relevant named declarations for the given match. For example, when the +above fragment is run and matches for `q` on this C code: + +.. code-block:: c + + *q = 0; + +`ptr` is set up for you as an object such that str(ptr) == "q", and hence +this python code:: + + error("dereference of NULL pointer %s" % ptr) + +leads to this error message:: + + dereference of NULL pointer q + +The following API is available from within such a fragment: + +.. py:function:: error(msg, cwe=None) + + Emit an error message. + + Optionally, an ID can be provided describing + the error within the `Common Weakness Enumeration + `_ dictionary. + + :param msg: the error message to be emitted + :type msg: str + :param cwe: ID of the error, e.g. "CWE-690" + :type cwe: str or None + +.. py:function:: set_state(name, **kargs) + + Set the state to the one with the given name, potentially adding extra + key/value pairs to the state. + + For example, at the bottom of this helper function from + `sizeof_allocation.sm` the checker calls `set_state` supplying a `size` + keyword argument, annotating the "ptr.sized" state with a specific size + value, which can later be accessed as an attribute of the + :py:data:`state` variable:: + + def check_size(ptr, allocated_size): + import gcc + type_pointed_to = ptr.type.dereference + if not isinstance(type_pointed_to, gcc.VoidType): + required_size = type_pointed_to.sizeof + if allocated_size < required_size: + error("allocation too small: pointer to %s (%i bytes)" + " initialized with allocation of %i bytes" + % (type_pointed_to, required_size, allocated_size), + cwe="CWE-131") # "Incorrect Calculation of Buffer Size" + + # Handle cases where the cast happens on another line: + set_state("ptr.sized", size=int(allocated_size)) + + .. warning:: + + Be careful when using keyword arguments to add attributes to a state: + each set of attributes is effective its own instance of a state, and + the implementation will need to do more work for every possible state + created. + + In particular, the implementation is only guaranteed to terminate when + there a finite number of states: Python fragments that try to + manipulate states in complicated ways are likely to send the + implementation into an infinite loop. + +.. py:data:: state + + The current state. The attribute `name` gives the name of the state, + and other attributes that were provided as keyword arguments of + :py:func:`set_state` can be looked up as regular python attributes. + + For example, this fragment from `sizeof_allocation.sm` calls into a + Python function ("check_size") when a pointer of known size is assigned + to another pointer, looking up the saved size via `state.size`: + + .. code-block:: c + + ptr.sized: + { other_ptr = ptr } => + {{ + check_size(other_ptr, allocated_size=state.size) + }}; + +.. note:: + + The implementation makes no guarantees as to the number of times a given + Python outcome will be called: it may be called many times, only once + (and have its effects cached), or not at all. Avoid side-effects in + such Python code (such as writing to disk). + + +The grammar +=========== +High-level rules:: + + # start of grammar: + checker : sm + | sm checker + + sm : SM ID LBRACE smclauses RBRACE + + smclauses : smclause + | smclauses smclause + + smclause : optional_stateful decl declkind ID SEMICOLON + # e.g. "stateful decl any_pointer ptr;" + # e.g. "decl any_expr x;" + + smclause : PAT ID pattern SEMICOLON + smclause : PYTHON + smclause : statelist COLON patternrulelist SEMICOLON + # e.g. + # ptr.unknown, ptr.null, ptr.nonnull: + # { ptr == 0 } => true=ptr.null, false=ptr.nonnull + # | { ptr != 0 } => true=ptr.nonnull, false=ptr.null + # ; + # + +Declarations:: + + empty : + + optional_stateful : STATEFUL + | empty + + declkind : "any_expr" + | "any_pointer" + +Pattern-matching rules:: + + statelist : statename + | statename COMMA statelist + # e.g. + # ptr.unknown, ptr.null, ptr.nonnull + + patternrulelist : patternrule + | patternrule PIPE patternrulelist + # e.g. + # { ptr == 0 } => true=ptr.null, false=ptr.nonnull + # | { ptr != 0 } => true=ptr.nonnull, false=ptr.null + + patternrule : pattern ACTION outcomes + # e.g. "{ ptr = malloc() } => ptr.unknown" + # e.g. "$leaked$ => ptr.leaked" + + statename : ID DOT ID + | ID + +Various kinds of pattern:: + + pattern : LBRACE cpattern RBRACE + # e.g. + # { ptr = malloc() } + + pattern : ID + # e.g. + # checked_against_0 + + pattern : DOLLARPATTERN + # e.g. + # $leaked$ + + pattern : pattern PIPE pattern + # e.g. + # $leaked$ | { x == 0 } + +Various kinds of "cpattern":: + + cpattern : ID ASSIGNMENT LITERAL_STRING + | ID ASSIGNMENT LITERAL_NUMBER + | ID ASSIGNMENT ID + # e.g. "q = 0" + + cpattern : ID ASSIGNMENT AMPERSAND ID + # e.g. "&var" + + cpattern : RETURN ID + | RETURN LITERAL_STRING + | RETURN LITERAL_NUMBER + | RETURN + # e.g. "return var" + + cpattern : ID ASSIGNMENT ID LPAREN fncall_args RPAREN + # e.g. "ptr = malloc()" + + fncall_arg : ID + | LITERAL_STRING + | LITERAL_NUMBER + + nonempty_fncall_args : fncall_arg + | fncall_args COMMA fncall_arg + + fncall_args : nonempty_fncall_args + + fncall_args : empty + + cpattern : ID LPAREN fncall_args RPAREN + # e.g. "free(ptr)" + + cpattern : ID COMPARISON LITERAL_NUMBER + | ID COMPARISON ID + # e.g. "ptr == 0" + + cpattern : STAR ID + # e.g. "*ptr" + + cpattern : ID LSQUARE ID RSQUARE + # e.g. "arr[x]" + + cpattern : ID + # e.g. "ptr" + +The various outcomes when a pattern matches:: + + outcomes : outcome + | outcome COMMA outcomes + # e.g. "ptr.unknown" + + outcome : statename + # e.g. "ptr.unknown" + + outcome : "true" ASSIGNMENT outcome + | "false" ASSIGNMENT outcome + # e.g. "true=ptr.null" + + outcome : PYTHON + # e.g. "{ error('use of possibly-NULL pointer %s' % ptr)}" + +.. :: + + t_ACTION = r'=>' + t_LPAREN = r'\(' + t_RPAREN = r'\)' + t_LBRACE = r'{' + t_RBRACE = r'}' + t_LSQUARE = r'\[' + t_RSQUARE = r'\]' + t_COMMA = r',' + t_DOT = r'\.' + t_COLON = r':' + t_SEMICOLON = r';' + t_ASSIGNMENT = r'=' + t_STAR = r'\*' + t_PIPE = r'\|' + + def t_COMPARISON(t): + r'<=|<|==|!=|>=|>' + return t + + def t_LITERAL_NUMBER(t): + r'(0x[0-9a-fA-F]+|\d+)' + try: + if t.value.startswith('0x'): + t.value = long(t.value, 16) + else: + t.value = long(t.value) + except ValueError: + raise ParserError(t.value) + return t + + def t_LITERAL_STRING(t): + r'"([^"]*)"|\'([^\']*)\'' + # Drop the quotes: + t.value = t.value[1:-1] + return t diff --git a/docs/tree.rst b/docs/tree.rst index ed52831c..7af1beda 100644 --- a/docs/tree.rst +++ b/docs/tree.rst @@ -313,11 +313,21 @@ Declarations A subclass of :py:class:`gcc.Declaration` indicating the declaration of a parameter to a function or method. + .. py:attribute:: context + + This gives the :py:class:`gcc.FunctionDecl` where the parameter was + declared. + .. py:class:: gcc.ResultDecl A subclass of :py:class:`gcc.Declaration` declararing a dummy variable that will hold the return value from a function. + .. py:attribute:: context + + This gives the :py:class:`gcc.FunctionDecl` for the corresponding + function. + .. py:class:: gcc.VarDecl A subclass of :py:class:`gcc.Declaration` indicating the declaration of a @@ -332,6 +342,14 @@ Declarations (boolean) Is this variable to be allocated with static storage? + .. py:attribute:: context + + This gives the :py:class:`gcc.Tree` where the variable was declared. + + For a local variable, this will be a :py:class:`gcc.FunctionDecl`, and + for a global variable, this will be a + :py:class:`gcc.TranslationUnitDecl` + .. py:class:: gcc.NamespaceDecl A subclass of :py:class:`gcc.Declaration` representing a C++ namespace @@ -418,6 +436,9 @@ Types `sizeof()` this type, as an `int`, or raising `TypeError` for those types which don't have a well-defined size + .. note:: This attribute is not usable from within `lto1`; attempting + to use it there will lead to a `RuntimeError` exception. + The standard C types are accessible via class methods of :py:class:`gcc.Type`. They are only created by GCC after plugins are loaded, and so they're only visible during callbacks, not during the initial run of the code. @@ -468,10 +489,16 @@ Types The gcc.IntegerType for the signed version of this type + .. note:: This attribute is not usable from within `lto1`; attempting + to use it there will lead to a `RuntimeError` exception. + .. py:attribute:: unsigned_equivalent The gcc.IntegerType for the unsigned version of this type + .. note:: This attribute is not usable from within `lto1`; attempting + to use it there will lead to a `RuntimeError` exception. + .. py:attribute:: max_value The maximum possible value for this type, as a diff --git a/examples/show-ivpgraph.py b/examples/show-ivpgraph.py new file mode 100644 index 00000000..7be4812a --- /dev/null +++ b/examples/show-ivpgraph.py @@ -0,0 +1,43 @@ +# Copyright 2012 David Malcolm +# Copyright 2012 Red Hat, Inc. +# +# This is free software: you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see +# . + +# Sample python script, to be run by our gcc plugin +# Show the "IVP graph": the CFG of all functions, linked by +# interprocedural edges, with a copy of each function repeated +# for each N possible callsites at the top of the stack, so that +# only interprocedurally-valid paths are possible (as well as those +# erroneously present due to the truncation of the stack) + +import gcc +from gccutils.graph.supergraph import Supergraph +from gccutils.graph.ivpgraph import IvpGraph +from gccutils import invoke_dot + +# We'll implement this as a custom pass, to be called directly after the +# builtin "cfg" pass, which generates the CFG: + +class ShowIvpgraph(gcc.SimpleIpaPass): + def execute(self): + # (the callgraph should be set up by this point) + sg = Supergraph(split_phi_nodes=False, + add_fake_entry_node=True) + ivpgraph = IvpGraph(sg, maxlength=2) + dot = ivpgraph.to_dot('ivpgraph') + invoke_dot(dot, 'ivpgraph') + +ps = ShowIvpgraph(name='show-ivpgraph') +ps.register_before('early_local_cleanups') diff --git a/examples/show-lto-supergraph.py b/examples/show-lto-supergraph.py new file mode 100644 index 00000000..b269987d --- /dev/null +++ b/examples/show-lto-supergraph.py @@ -0,0 +1,38 @@ +# Copyright 2012 David Malcolm +# Copyright 2012 Red Hat, Inc. +# +# This is free software: you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see +# . + +# Sample python script, to be run by our gcc plugin +# Show the "supergraph": the CFG of all functions, linked by +# interproceduraledges: +import gcc +from gccutils.graph.supergraph import Supergraph +from gccutils import invoke_dot + +# We'll implement this as a custom pass, to be called directly before +# 'whole-program' + +class ShowSupergraph(gcc.IpaPass): + def execute(self): + # (the callgraph should be set up by this point) + if gcc.is_lto(): + sg = Supergraph(split_phi_nodes=False, + add_fake_entry_node=False) + dot = sg.to_dot('supergraph') + invoke_dot(dot, 'supergraph') + +ps = ShowSupergraph(name='show-supergraph') +ps.register_before('whole-program') diff --git a/examples/show-supergraph.py b/examples/show-supergraph.py new file mode 100644 index 00000000..d452f066 --- /dev/null +++ b/examples/show-supergraph.py @@ -0,0 +1,37 @@ +# Copyright 2012 David Malcolm +# Copyright 2012 Red Hat, Inc. +# +# This is free software: you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see +# . + +# Sample python script, to be run by our gcc plugin +# Show the "supergraph": the CFG of all functions, linked by +# interproceduraledges: +import gcc +from gccutils.graph.supergraph import Supergraph +from gccutils import invoke_dot + +# We'll implement this as a custom pass, to be called directly after the +# builtin "cfg" pass, which generates the CFG: + +class ShowSupergraph(gcc.SimpleIpaPass): + def execute(self): + # (the callgraph should be set up by this point) + sg = Supergraph(split_phi_nodes=False, + add_fake_entry_node=False) + dot = sg.to_dot('supergraph') + invoke_dot(dot, 'supergraph') + +ps = ShowSupergraph(name='show-supergraph') +ps.register_before('early_local_cleanups') diff --git a/gcc-python-callbacks.c b/gcc-python-callbacks.c index af113e4d..e3348f57 100644 --- a/gcc-python-callbacks.c +++ b/gcc-python-callbacks.c @@ -168,6 +168,8 @@ gcc_python_finish_invoking_callback(PyGILState_STATE gstate, Py_XDECREF(args); Py_XDECREF(result); + /* We never cleanup "closure"; we don't know if we'll be called again */ + PyGILState_Release(gstate); input_location = saved_loc; } diff --git a/gcc-python-cfg.c b/gcc-python-cfg.c index a73f379e..0714e516 100644 --- a/gcc-python-cfg.c +++ b/gcc-python-cfg.c @@ -111,6 +111,14 @@ VEC_edge_as_PyList(VEC(edge,gc) *vec_edges) return NULL; } +PyObject * +gcc_BasicBlock_repr(struct PyGccBasicBlock * self) +{ + return gcc_python_string_from_format("%s(index=%i)", + Py_TYPE(self)->tp_name, + self->bb->index); +} + PyObject * gcc_BasicBlock_get_preds(PyGccBasicBlock *self, void *closure) @@ -469,7 +477,7 @@ gcc_Cfg_get_basic_blocks(PyGccCfg *self, void *closure) PyObject *result = NULL; int i; - result = PyList_New(self->cfg->x_n_basic_blocks); + result = PyList_New(0); if (!result) { goto error; } @@ -480,7 +488,16 @@ gcc_Cfg_get_basic_blocks(PyGccCfg *self, void *closure) if (!item) { goto error; } - PyList_SetItem(result, i, item); + /* It appears that with optimization there can be occasional NULLs, + which get turned into None. Skip them: + */ + if (item != Py_None) { + if (-1 == PyList_Append(result, item)) { + Py_DECREF(item); + goto error; + } + } + Py_DECREF(item); } return result; diff --git a/gcc-python-closure.c b/gcc-python-closure.c index 2e04d96e..a705302a 100644 --- a/gcc-python-closure.c +++ b/gcc-python-closure.c @@ -133,6 +133,19 @@ gcc_python_closure_make_args(struct callback_closure * closure, int add_cfun, Py return NULL; } + +void +gcc_python_closure_free(struct callback_closure *closure) +{ + assert(closure); + + Py_XDECREF(closure->callback); + Py_XDECREF(closure->extraargs); + Py_XDECREF(closure->kwargs); + + PyMem_Free(closure); +} + /* PEP-7 Local variables: diff --git a/gcc-python-closure.h b/gcc-python-closure.h index 7950fbc5..f0cf5be6 100644 --- a/gcc-python-closure.h +++ b/gcc-python-closure.h @@ -44,6 +44,9 @@ PyObject * gcc_python_closure_make_args(struct callback_closure * closure, int add_cfun, PyObject *wrapped_gcc_data); +void +gcc_python_closure_free(struct callback_closure *closure); + /* PEP-7 Local variables: diff --git a/gcc-python-function.c b/gcc-python-function.c index a4997d63..76830c60 100644 --- a/gcc-python-function.c +++ b/gcc-python-function.c @@ -57,6 +57,52 @@ gcc_Function_repr(struct PyGccFunction * self) return NULL; } +long +gcc_Function_hash(struct PyGccFunction * self) +{ + return (long)self->fun; +} + +PyObject * +gcc_Function_richcompare(PyObject *o1, PyObject *o2, int op) +{ + struct PyGccFunction *functionobj1; + struct PyGccFunction *functionobj2; + int cond; + PyObject *result_obj; + + if (!PyObject_TypeCheck(o1, (PyTypeObject*)&gcc_FunctionType)) { + result_obj = Py_NotImplemented; + goto out; + } + if (!PyObject_TypeCheck(o2, (PyTypeObject*)&gcc_FunctionType)) { + result_obj = Py_NotImplemented; + goto out; + } + + functionobj1 = (struct PyGccFunction *)o1; + functionobj2 = (struct PyGccFunction *)o2; + + switch (op) { + case Py_EQ: + cond = (functionobj1->fun == functionobj2->fun); + break; + + case Py_NE: + cond = (functionobj1->fun != functionobj2->fun); + break; + + default: + result_obj = Py_NotImplemented; + goto out; + } + result_obj = cond ? Py_True : Py_False; + + out: + Py_INCREF(result_obj); + return result_obj; +} + PyObject * gcc_python_make_wrapper_function(struct function *fun) { diff --git a/gcc-python-gimple.c b/gcc-python-gimple.c index da9591ff..3107d37f 100644 --- a/gcc-python-gimple.c +++ b/gcc-python-gimple.c @@ -63,6 +63,52 @@ gcc_Gimple_str(struct PyGccGimple * self) return do_pretty_print(self, 0, 0); } +long +gcc_Gimple_hash(struct PyGccGimple * self) +{ + return (long)self->stmt; +} + +PyObject * +gcc_Gimple_richcompare(PyObject *o1, PyObject *o2, int op) +{ + struct PyGccGimple *gimpleobj1; + struct PyGccGimple *gimpleobj2; + int cond; + PyObject *result_obj; + + if (!PyObject_TypeCheck(o1, (PyTypeObject*)&gcc_GimpleType)) { + result_obj = Py_NotImplemented; + goto out; + } + if (!PyObject_TypeCheck(o2, (PyTypeObject*)&gcc_GimpleType)) { + result_obj = Py_NotImplemented; + goto out; + } + + gimpleobj1 = (struct PyGccGimple *)o1; + gimpleobj2 = (struct PyGccGimple *)o2; + + switch (op) { + case Py_EQ: + cond = (gimpleobj1->stmt == gimpleobj2->stmt); + break; + + case Py_NE: + cond = (gimpleobj1->stmt != gimpleobj2->stmt); + break; + + default: + result_obj = Py_NotImplemented; + goto out; + } + result_obj = cond ? Py_True : Py_False; + + out: + Py_INCREF(result_obj); + return result_obj; +} + static tree gimple_walk_tree_callback(tree *tree_ptr, int *walk_subtrees, void *data) { @@ -135,9 +181,8 @@ gcc_Gimple_walk_tree(struct PyGccGimple * self, PyObject *args, PyObject *kwargs result = walk_gimple_op (self->stmt, gimple_walk_tree_callback, &wi); - Py_XDECREF(closure->callback); - Py_XDECREF(closure->extraargs); - Py_XDECREF(closure->kwargs); + + gcc_python_closure_free(closure); /* Propagate exceptions: */ if (PyErr_Occurred()) { @@ -211,6 +256,35 @@ gcc_GimpleCall_get_args(struct PyGccGimple *self, void *closure) return NULL; } +PyObject * +gcc_GimpleLabel_repr(struct PyGccGimple * self) +{ + PyObject *label_obj = NULL; + PyObject *label_repr = NULL; + PyObject *result = NULL; + + label_obj = gcc_python_make_wrapper_tree(gimple_label_label (self->stmt)); + if (!label_obj) { + goto cleanup; + } + + label_repr = PyObject_Repr(label_obj); + if (!label_repr) { + goto cleanup; + } + + result = gcc_python_string_from_format("%s(label=%s)", + Py_TYPE(self)->tp_name, + gcc_python_string_as_string(label_repr)); + + cleanup: + Py_XDECREF(label_obj); + Py_XDECREF(label_repr); + + return result; +} + + PyObject * gcc_GimplePhi_get_args(struct PyGccGimple *self, void *closure) { @@ -274,11 +348,17 @@ gcc_GimpleSwitch_get_labels(struct PyGccGimple *self, void *closure) } -PyObject* -gcc_python_make_wrapper_gimple(gimple stmt) +/* + Ensure we have a unique PyGccGimple per gimple address (by maintaining a dict): +*/ +static PyObject *gimple_wrapper_cache = NULL; + +static PyObject * +real_make_gimple_wrapper(void *ptr) { struct PyGccGimple *gimple_obj = NULL; PyGccWrapperTypeObject* tp; + gimple stmt = (gimple)ptr; tp = gcc_python_autogenerated_gimple_type_for_stmt(stmt); assert(tp); @@ -297,6 +377,14 @@ gcc_python_make_wrapper_gimple(gimple stmt) return NULL; } +PyObject* +gcc_python_make_wrapper_gimple(gimple stmt) +{ + return gcc_python_lazily_create_wrapper(&gimple_wrapper_cache, + stmt, + real_make_gimple_wrapper); +} + void wrtp_mark_for_PyGccGimple(PyGccGimple *wrapper) { diff --git a/gcc-python-location.c b/gcc-python-location.c index 964c921c..866720d6 100644 --- a/gcc-python-location.c +++ b/gcc-python-location.c @@ -55,6 +55,8 @@ gcc_Location_richcompare(PyObject *o1, PyObject *o2, int op) struct PyGccLocation *locobj2; int cond; PyObject *result_obj; + const char *file1; + const char *file2; assert(Py_TYPE(o1) == (PyTypeObject*)&gcc_LocationType); @@ -66,19 +68,87 @@ gcc_Location_richcompare(PyObject *o1, PyObject *o2, int op) locobj1 = (struct PyGccLocation *)o1; locobj2 = (struct PyGccLocation *)o2; - switch (op) { - case Py_EQ: - cond = (locobj1->loc == locobj2->loc); - break; - - case Py_NE: - cond = (locobj1->loc != locobj2->loc); - break; - - default: - result_obj = Py_NotImplemented; - goto out; + /* First compare by filename, then by line, then by column */ + file1 = LOCATION_FILE(locobj1->loc); + file2 = LOCATION_FILE(locobj2->loc); + + if (file1 != file2) { + /* Compare by file: */ + switch (op) { + case Py_LT: + case Py_LE: + /* we merge the LT and LE cases since we've already + established that the values are not equal */ + cond = (strcmp(file1, file2) < 0); + break; + case Py_GT: + case Py_GE: + cond = (strcmp(file1, file2) > 0); + break; + case Py_EQ: + cond = 0; + break; + case Py_NE: + cond = 1; + break; + default: + result_obj = Py_NotImplemented; + goto out; + } + } else { + /* File equality; compare by line: */ + int line1 = LOCATION_LINE(locobj1->loc); + int line2 = LOCATION_LINE(locobj2->loc); + + if (line1 != line2) { + switch (op) { + case Py_LT: + case Py_LE: + cond = (line1 < line2); + break; + case Py_GT: + case Py_GE: + cond = (line1 > line2); + break; + case Py_EQ: + cond = 0; + break; + case Py_NE: + cond = 1; + break; + default: + result_obj = Py_NotImplemented; + goto out; + } + } else { + /* File and line equality; compare by column: */ + expanded_location exploc1 = expand_location(locobj1->loc); + expanded_location exploc2 = expand_location(locobj2->loc); + int col1 = exploc1.column; + int col2 = exploc2.column; + + switch (op) { + case Py_LT: + case Py_LE: + cond = (col1 < col2); + break; + case Py_GT: + case Py_GE: + cond = (col1 > col2); + break; + case Py_EQ: + cond = (col1 == col2); + break; + case Py_NE: + cond = (col1 != col2); + break; + default: + result_obj = Py_NotImplemented; + goto out; + } + } } + result_obj = cond ? Py_True : Py_False; out: diff --git a/gcc-python-option.c b/gcc-python-option.c index 3adea0c1..135a8c02 100644 --- a/gcc-python-option.c +++ b/gcc-python-option.c @@ -78,6 +78,12 @@ gcc_Option_repr(PyGccOption * self) gcc_python_option_to_cl_option(self)->opt_text); } +/* + Weakly import warn_format; it's not available in lto1 + (during link-time optimization) +*/ +__typeof__ (warn_format) warn_format __attribute__ ((weak)); + int gcc_python_option_is_enabled(enum opt_code opt_code) { /* Returns 1 if option OPT_IDX is enabled in OPTS, 0 if it is disabled, diff --git a/gcc-python-pass.c b/gcc-python-pass.c index 7ea74af8..9ba5e986 100644 --- a/gcc-python-pass.c +++ b/gcc-python-pass.c @@ -44,6 +44,18 @@ static bool impl_gate(void) int result; location_t saved_loc = input_location; + /* + It appears that current_pass is not set by when gcc (4.7 at least) when + it invokes gate for an IPA_PASS within execute_ipa_summary_passes + (in gcc/passes.c), so we don't have a way of figuring out which pass + we were called on. + + Reported as http://gcc.gnu.org/bugzilla/show_bug.cgi?id=54959 + */ + if (NULL == current_pass) { + return true; + } + assert(current_pass); pass_obj = gcc_python_make_wrapper_pass(current_pass); assert(pass_obj); /* we own a ref at this point */ diff --git a/gcc-python-tree.c b/gcc-python-tree.c index 0f561bb4..bbb76497 100644 --- a/gcc-python-tree.c +++ b/gcc-python-tree.c @@ -28,6 +28,7 @@ #include "tree-flow.h" /* for op_symbol_code */ +extern PyGccWrapperTypeObject gcc_IntegerCstType; /* Unfortunately, decl_as_string() is only available from the C++ @@ -55,6 +56,14 @@ raise_cplusplus_only(const char *what) what); } +static PyObject * +raise_not_during_lto(const char *what) +{ + return PyErr_Format(PyExc_RuntimeError, + "%s is not available during link-time optimization", + what); +} + //#include "rtl.h" /* "struct rtx_def" is declarted within rtl.h, c.f: @@ -105,6 +114,22 @@ gcc_Tree_str(struct PyGccTree * self) long gcc_Tree_hash(struct PyGccTree * self) { + if (Py_TYPE(self) == (PyTypeObject*)&gcc_ComponentRefType) { + return (long)TREE_OPERAND(self->t, 0) ^ (long)TREE_OPERAND(self->t, 1); + } + + if (Py_TYPE(self) == (PyTypeObject*)&gcc_IntegerCstType) { + /* Ensure that hash(cst) == hash(int(cst)) */ + PyObject *constant = gcc_IntegerConstant_get_constant(self, NULL); + long result; + if (!constant) { + return -1; + } + result = PyObject_Hash(constant); + Py_DECREF(constant); + return result; + } + /* Use the ptr as the hash value: */ return (long)self->t; } @@ -118,6 +143,22 @@ gcc_Tree_richcompare(PyObject *o1, PyObject *o2, int op) int cond; PyObject *result_obj; + /* Specialcases: */ + if (Py_TYPE(o1) == (PyTypeObject*)&gcc_IntegerCstType) { + o1 = gcc_IntegerConstant_get_constant((struct PyGccTree *)o1, NULL); + if (!o1) { return NULL; } + result_obj = PyObject_RichCompare(o1, o2, op); + Py_DECREF(o1); + return result_obj; + } + if (Py_TYPE(o2) == (PyTypeObject*)&gcc_IntegerCstType) { + o2 = gcc_IntegerConstant_get_constant((struct PyGccTree *)o2, NULL); + if (!o2) { return NULL; } + result_obj = PyObject_RichCompare(o1, o2, op); + Py_DECREF(o2); + return result_obj; + } + if (!PyObject_TypeCheck(o1, (PyTypeObject*)&gcc_TreeType)) { result_obj = Py_NotImplemented; goto out; @@ -130,6 +171,28 @@ gcc_Tree_richcompare(PyObject *o1, PyObject *o2, int op) treeobj1 = (struct PyGccTree *)o1; treeobj2 = (struct PyGccTree *)o2; + if (Py_TYPE(o1) == (PyTypeObject*)&gcc_ComponentRefType) { + if (Py_TYPE(o2) == (PyTypeObject*)&gcc_ComponentRefType) { + switch (op) { + case Py_EQ: + cond = ((TREE_OPERAND(treeobj1->t, 0) == TREE_OPERAND(treeobj2->t, 0)) && + (TREE_OPERAND(treeobj1->t, 1) == TREE_OPERAND(treeobj2->t, 1))); + break; + + case Py_NE: + cond = !((TREE_OPERAND(treeobj1->t, 0) == TREE_OPERAND(treeobj2->t, 0)) && + (TREE_OPERAND(treeobj1->t, 1) == TREE_OPERAND(treeobj2->t, 1))); + break; + + default: + result_obj = Py_NotImplemented; + goto out; + } + result_obj = cond ? Py_True : Py_False; + goto out; + } + } + switch (op) { case Py_EQ: cond = (treeobj1->t == treeobj2->t); @@ -216,6 +279,62 @@ gcc_FunctionDecl_get_fullname(struct PyGccTree *self, void *closure) return gcc_python_string_from_string(str); } +PyObject * +gcc_ArrayRef_repr(PyObject *self) +{ + PyObject *array_repr = NULL; + PyObject *index_repr = NULL; + PyObject *result = NULL; + + array_repr = PyGcc_GetReprOfAttribute(self, "array"); + if (!array_repr) { + goto error; + } + index_repr =PyGcc_GetReprOfAttribute(self, "index"); + if (!index_repr) { + goto error; + } + + result = gcc_python_string_from_format("%s(array=%s, index=%s)", + Py_TYPE(self)->tp_name, + gcc_python_string_as_string(array_repr), + gcc_python_string_as_string(index_repr)); + + error: + Py_XDECREF(array_repr); + Py_XDECREF(index_repr); + + return result; +} + +PyObject * +gcc_ComponentRef_repr(PyObject *self) +{ + PyObject *target_repr = NULL; + PyObject *field_repr = NULL; + PyObject *result = NULL; + + target_repr = PyGcc_GetReprOfAttribute(self, "target"); + if (!target_repr) { + goto error; + } + field_repr = PyGcc_GetReprOfAttribute(self, "field"); + if (!field_repr) { + goto error; + } + + result = gcc_python_string_from_format("%s(target=%s, field=%s)", + Py_TYPE(self)->tp_name, + gcc_python_string_as_string(target_repr), + gcc_python_string_as_string(field_repr)); + + error: + Py_XDECREF(target_repr); + Py_XDECREF(field_repr); + + return result; +} + PyObject * gcc_IdentifierNode_repr(struct PyGccTree * self) { @@ -279,9 +398,19 @@ gcc_Type_get_attributes(struct PyGccTree *self, void *closure) return NULL; } +/* + Weakly import c_sizeof_or_alignof_type; it's not available in lto1 + (link-time optimization) +*/ +__typeof__ (c_sizeof_or_alignof_type) c_sizeof_or_alignof_type __attribute__ ((weak)); + PyObject * gcc_Type_get_sizeof(struct PyGccTree *self, void *closure) { + if (NULL == c_sizeof_or_alignof_type) { + return raise_not_during_lto("Type.sizeof"); + } + /* c_sizeof_or_alignof_type wants a location; we use a fake one */ @@ -308,6 +437,28 @@ gcc_Type_get_sizeof(struct PyGccTree *self, void *closure) return NULL; } +__typeof__ (c_common_signed_type) c_common_signed_type __attribute__ ((weak)); + +PyObject * +gcc_IntegerType_get_signed_equivalent(struct PyGccTree * self, void *closure) +{ + if (NULL == c_common_signed_type) + return raise_not_during_lto("gcc.IntegerType.signed_equivalent"); + + return gcc_python_make_wrapper_tree(c_common_signed_type(self->t)); +} + +__typeof__ (c_common_unsigned_type) c_common_unsigned_type __attribute__ ((weak)); + +PyObject * +gcc_IntegerType_get_unsigned_equivalent(struct PyGccTree * self, void *closure) +{ + if (NULL == c_common_unsigned_type) + return raise_not_during_lto("gcc.IntegerType.unsigned_equivalent"); + + return gcc_python_make_wrapper_tree(c_common_unsigned_type(self->t)); +} + PyObject * gcc_FunctionType_get_argument_types(struct PyGccTree * self, void *closure) { @@ -528,6 +679,30 @@ gcc_TypeDecl_get_pointer(struct PyGccTree *self, void *closure) return gcc_python_make_wrapper_tree(build_pointer_type(decl_type)); } +PyObject * +gcc_SsaName_repr(struct PyGccTree * self) +{ + int version; + PyObject *repr_var = NULL; + PyObject *result = NULL; + + version = SSA_NAME_VERSION(self->t); + repr_var = PyGcc_GetReprOfAttribute((PyObject*)self, "var"); + if (!repr_var) { + goto error; + } + + result = gcc_python_string_from_format("%s(var=%s, version=%i)", + Py_TYPE(self)->tp_name, + gcc_python_string_as_string(repr_var), + version); + + error: + Py_XDECREF(repr_var); + + return result; +} + PyObject * gcc_TreeList_repr(struct PyGccTree * self) { @@ -581,6 +756,40 @@ gcc_TreeList_repr(struct PyGccTree * self) return result; } +PyObject * +gcc_CaseLabelExpr_repr(PyObject * self) +{ + PyObject *low_repr = NULL; + PyObject *high_repr = NULL; + PyObject *target_repr = NULL; + PyObject *result = NULL; + + low_repr = PyGcc_GetReprOfAttribute(self, "low"); + if (!low_repr) { + goto cleanup; + } + high_repr = PyGcc_GetReprOfAttribute(self, "high"); + if (!high_repr) { + goto cleanup; + } + target_repr = PyGcc_GetReprOfAttribute(self, "target"); + if (!target_repr) { + goto cleanup; + } + + result = gcc_python_string_from_format("%s(low=%s, high=%s, target=%s)", + Py_TYPE(self)->tp_name, + gcc_python_string_as_string(low_repr), + gcc_python_string_as_string(high_repr), + gcc_python_string_as_string(target_repr)); + + cleanup: + Py_XDECREF(low_repr); + Py_XDECREF(high_repr); + Py_XDECREF(target_repr); + return result; +} + PyObject * gcc_NamespaceDecl_lookup(struct PyGccTree * self, PyObject *args, PyObject *kwargs) diff --git a/gcc-python-wrappers.h b/gcc-python-wrappers.h index f79a74b6..dc479855 100644 --- a/gcc-python-wrappers.h +++ b/gcc-python-wrappers.h @@ -87,6 +87,9 @@ PyObject * gcc_Location_richcompare(PyObject *o1, PyObject *o2, int op); /* gcc-python-cfg.c: */ +PyObject * +gcc_BasicBlock_repr(struct PyGccBasicBlock * self); + PyObject * gcc_BasicBlock_get_preds(PyGccBasicBlock *self, void *closure); @@ -134,6 +137,18 @@ gcc_Tree_get_symbol(PyObject *cls, PyObject *args); PyObject * gcc_Function_repr(struct PyGccFunction * self); +long +gcc_Function_hash(struct PyGccFunction * self); + +PyObject * +gcc_Function_richcompare(PyObject *o1, PyObject *o2, int op); + +PyObject * +gcc_ArrayRef_repr(PyObject *self); + +PyObject * +gcc_ComponentRef_repr(PyObject *self); + PyObject * gcc_Declaration_get_name(struct PyGccTree *self, void *closure); @@ -170,6 +185,12 @@ gcc_RealCst_get_constant(struct PyGccTree * self, void *closure); PyObject * gcc_RealCst_repr(struct PyGccTree * self); +PyObject * +gcc_IntegerType_get_signed_equivalent(struct PyGccTree * self, void *closure); + +PyObject * +gcc_IntegerType_get_unsigned_equivalent(struct PyGccTree * self, void *closure); + PyObject * gcc_MethodType_get_argument_types(struct PyGccTree * self,void *closure); @@ -179,9 +200,15 @@ gcc_StringConstant_repr(struct PyGccTree * self); PyObject * gcc_TypeDecl_get_pointer(struct PyGccTree *self, void *closure); +PyObject * +gcc_SsaName_repr(struct PyGccTree * self); + PyObject * gcc_TreeList_repr(struct PyGccTree * self); +PyObject * +gcc_CaseLabelExpr_repr(PyObject *self); + PyObject * gcc_NamespaceDecl_lookup(struct PyGccTree * self, PyObject *args, PyObject *kwargs); @@ -202,6 +229,12 @@ gcc_Gimple_repr(struct PyGccGimple * self); PyObject * gcc_Gimple_str(struct PyGccGimple * self); +long +gcc_Gimple_hash(struct PyGccGimple * self); + +PyObject * +gcc_Gimple_richcompare(PyObject *o1, PyObject *o2, int op); + PyObject * gcc_Gimple_walk_tree(struct PyGccGimple * self, PyObject *args, PyObject *kwargs); @@ -214,6 +247,9 @@ gcc_Gimple_get_str_no_uid(struct PyGccGimple *self, void *closure); PyObject * gcc_GimpleCall_get_args(struct PyGccGimple *self, void *closure); +PyObject * +gcc_GimpleLabel_repr(struct PyGccGimple * self); + PyObject * gcc_GimplePhi_get_args(struct PyGccGimple *self, void *closure); diff --git a/gcc-python.c b/gcc-python.c index f2356f3b..664e2733 100644 --- a/gcc-python.c +++ b/gcc-python.c @@ -76,6 +76,12 @@ static void trace_callback_for_##NAME(void *gcc_data, void *user_data) \ # undef DEFEVENT #endif /* GCC_PYTHON_TRACE_ALL_EVENTS */ +/* + Weakly import parse_in; it will be non-NULL in the C and C++ frontend, + but it's not available lto1 (link-time optimization) +*/ +__typeof__ (parse_in) parse_in __attribute__ ((weak)); + static PyObject* gcc_python_define_macro(PyObject *self, PyObject *args, PyObject *kwargs) @@ -348,6 +354,32 @@ gcc_python_get_dump_base_name(PyObject *self, PyObject *noargs) return gcc_python_string_or_none(dump_base_name); } +static PyObject * +gcc_python_get_is_lto(PyObject *self, PyObject *noargs) +{ + /* + The generated gcc/options.h has: + #ifdef GENERATOR_FILE + extern bool in_lto_p; + #else + bool x_in_lto_p; + #define in_lto_p global_options.x_in_lto_p + #endif + */ + return PyBool_FromLong(in_lto_p); +} + +static PyObject * +gcc_python_add_error(PyObject *self, PyObject *noargs) +{ + /* Fake an error, for working around bugs in GCC's error reporting + e.g. http://gcc.gnu.org/bugzilla/show_bug.cgi?id=54962 + */ + errorcount++; + Py_RETURN_NONE; +} + + static PyMethodDef GccMethods[] = { {"register_attribute", (PyCFunction)gcc_python_register_attribute, @@ -435,6 +467,12 @@ static PyMethodDef GccMethods[] = { {"get_dump_base_name", gcc_python_get_dump_base_name, METH_NOARGS, "Get the base name used when writing dump files"}, + {"is_lto", gcc_python_get_is_lto, METH_NOARGS, + "Determine whether or not we're being invoked during link-time optimization"}, + + {"_add_error", gcc_python_add_error, METH_NOARGS, + "Fake an error, for working around bugs in GCC's error reporting"}, + /* Garbage collection */ {"_force_garbage_collection", gcc_python__force_garbage_collection, METH_VARARGS, "Forcibly trigger a single run of GCC's garbage collector"}, @@ -915,6 +953,25 @@ void gcc_python_print_exception(const char *msg) PyErr_PrintEx(1); } +PyObject * +PyGcc_GetReprOfAttribute(PyObject *obj, const char *attrname) +{ + PyObject *attr_obj; + PyObject *attr_repr; + + attr_obj = PyObject_GetAttrString(obj, attrname); + if (!attr_obj) { + return NULL; + } + attr_repr = PyObject_Repr(attr_obj); + if (!attr_repr) { + Py_DECREF(attr_obj); + return NULL; + } + + return attr_repr; +} + /* PEP-7 Local variables: diff --git a/gcc-python.h b/gcc-python.h index 00dbcc65..57e1e166 100644 --- a/gcc-python.h +++ b/gcc-python.h @@ -278,6 +278,8 @@ gcc_python_autogenerated_tree_type_for_tree(tree t, int borrow_ref); PyGccWrapperTypeObject* gcc_python_autogenerated_tree_type_for_tree_code(enum tree_code code, int borrow_ref); +extern PyGccWrapperTypeObject gcc_ComponentRefType; + /* autogenerated-variable.c */ int autogenerated_variable_init_types(void); void autogenerated_variable_add_types(PyObject *m); @@ -313,6 +315,12 @@ char * gcc_python_strdup(const char *str) __attribute__((nonnull)); void gcc_python_print_exception(const char *msg); +/* + Shorthand for: repr(getattr(obj, attrname)) +*/ +PyObject * +PyGcc_GetReprOfAttribute(PyObject *obj, const char *attrname); + /* Python 2 vs Python 3 compat: */ #if PY_MAJOR_VERSION == 3 /* Python 3: use PyUnicode for "str" and PyLong for "int": */ diff --git a/gcc-with-sm b/gcc-with-sm new file mode 100755 index 00000000..1bf1b07f --- /dev/null +++ b/gcc-with-sm @@ -0,0 +1,96 @@ +#!/usr/bin/env python +# Copyright 2012, 2013 David Malcolm +# Copyright 2012, 2013 Red Hat, Inc. +# +# This is free software: you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see +# . + +# Harness for invoking GCC with the sm Python code within the python +# plugin, whilst dealing with some options +# (This code runs under the regular Python interpreter, not within gcc) + +import argparse +import os +import subprocess +import sys +import tempfile + +abspath = os.path.abspath(os.path.dirname(sys.argv[0])) + +# By default, look for the plugin relative to this harness. This is intended +# to make it easier during development, so that we can make other projects with +# CC=../../../gcc-python-plugin/gcc-with-sm +# and similar. +# +# When installed, this should be fixed up. +PLUGIN = os.path.join(abspath, 'python.so') + +# Create arg parser: +parser = argparse.ArgumentParser(usage='%(prog)s [sm-files] [options] gcc-options') + +parser.add_argument('--enable-profile', action='store_true', + help='write out profiling information') +parser.add_argument('--enable-timing', action='store_true', + help='dump timing information to stderr') + +# Only consume args we understand, leaving the rest for gcc: +ns, other_args = parser.parse_known_args() +if 0: + print(ns) + print(other_args) + +sm_paths = [arg for arg in other_args + if arg.endswith('.sm')] + +other_args = [arg for arg in other_args + if not arg.endswith('.sm')] + +with tempfile.NamedTemporaryFile() as tmpfile: + tmpfile.write('from sm import main\n') + tmpfile.write('from sm.options import Options\n') + tmpfile.write('from sm.parser import parse_file\n') + tmpfile.write('checkers = [\n') + for smpath in sm_paths: + tmpfile.write(' parse_file(%r),\n' % smpath) + tmpfile.write(']\n') + tmpfile.write('options = Options(during_lto=%s,\n' + % ('-flto' in other_args)) + tmpfile.write(' enable_profile=%s,\n' + % (ns.enable_profile)) + tmpfile.write(' enable_timing=%s,\n' + % (ns.enable_timing)) + tmpfile.write(' )\n') + tmpfile.write('main(checkers, options=options)\n') + tmpfile.flush() + + if 0: + with open(tmpfile.name, 'r') as f: + print(f.read()) + + args = [os.environ.get('CC', 'gcc'), + ('-fplugin=%s' % PLUGIN), + ('-fplugin-arg-python-script=%s' % tmpfile.name)] + args += other_args # (the args we didn't consume) + + if 0: + print(' '.join(args)) + p = subprocess.Popen(args) + + try: + r = p.wait() + except KeyboardInterrupt: + r = 1 + + +sys.exit(r) diff --git a/gccutils/__init__.py b/gccutils/__init__.py index fab993d7..58b62b63 100644 --- a/gccutils/__init__.py +++ b/gccutils/__init__.py @@ -15,9 +15,9 @@ # along with this program. If not, see # . -import gcc +import sys -from six.moves import xrange +import gcc def sorted_dict_repr(d): return '{' + ', '.join(['%r: %r' % (k, d[k]) @@ -101,7 +101,7 @@ def get_nonnull_arguments(funtype): # No "nonnull" attribute was given: return frozenset() -def invoke_dot(dot): +def invoke_dot(dot, name='test'): from subprocess import Popen, PIPE if 1: @@ -116,12 +116,12 @@ def invoke_dot(dot): # # Presumably a font selection/font metrics issue fmt = 'svg' - - p = Popen(['dot', '-T%s' % fmt, '-o', 'test.%s' % fmt], + filename = '%s.%s' % (name, fmt) + p = Popen(['dot', '-T%s' % fmt, '-o', filename], stdin=PIPE) p.communicate(dot.encode('ascii')) - p = Popen(['xdg-open', 'test.%s' % fmt]) + p = Popen(['xdg-open', filename]) p.communicate() def pprint(obj): @@ -232,20 +232,8 @@ class DotPrettyPrinter(PrettyPrinter): # Base class for various kinds of data visualizations that use graphviz # (aka ".dot" source files) def to_html(self, text): - html_escape_table = { - "&": "&", - '"': """, - "'": "'", - ">": ">", - "<": "<", - - # 'dot' doesn't seem to like these: - '{': '{', - '}': '}', - - ']': ']', - } - return "".join(html_escape_table.get(c,c) for c in str(text)) + from gccutils.dot import to_html + return to_html(text) def _dot_td(self, text, align="left", colspan=1, escape=1, bgcolor=None, port=None): @@ -575,7 +563,7 @@ def write(self, out): def _calc_col_widths(self): result = [] - for colIndex in xrange(self.numcolumns): + for colIndex in range(self.numcolumns): result.append(self._calc_col_width(colIndex)) return result @@ -675,8 +663,21 @@ def visit(n): print('already visited %s' % n.decl) for n in nodes: - if not get_dsts(n): + if not list(get_dsts(n)): visit(n) return result +# Replacement for gcc.error() and gcc.inform() for use with LTO +# as a workaround for http://gcc.gnu.org/bugzilla/show_bug.cgi?id=54962 +def diagnostic(loc, kind, msg): + sys.stderr.write('%s:%i:%i: %s: %s\n' + % (loc.file, loc.line, loc.column, + kind, msg)) + +def error(loc, msg): + diagnostic(loc, 'error', msg) + gcc._add_error() + +def inform(loc, msg): + diagnostic(loc, 'note', msg) diff --git a/generate-cfg-c.py b/generate-cfg-c.py index f855316a..cdcd53b5 100644 --- a/generate-cfg-c.py +++ b/generate-cfg-c.py @@ -121,7 +121,7 @@ def generate_basic_block(): tp_dealloc = 'gcc_python_wrapper_dealloc', struct_name = 'PyGccBasicBlock', tp_new = 'PyType_GenericNew', - #tp_repr = '(reprfunc)gcc_BasicBlock_repr', + tp_repr = '(reprfunc)gcc_BasicBlock_repr', #tp_str = '(reprfunc)gcc_BasicBlock_repr', tp_getset = getsettable.identifier, ) diff --git a/generate-function-c.py b/generate-function-c.py index 6aea87ee..c3b61bb3 100644 --- a/generate-function-c.py +++ b/generate-function-c.py @@ -76,6 +76,8 @@ def generate_function(): tp_new = 'PyType_GenericNew', tp_repr = '(reprfunc)gcc_Function_repr', tp_str = '(reprfunc)gcc_Function_repr', + tp_hash = '(hashfunc)gcc_Function_hash', + tp_richcompare = 'gcc_Function_richcompare', tp_getset = getsettable.identifier, ) cu.add_defn(pytype.c_defn()) diff --git a/generate-gimple-c.py b/generate-gimple-c.py index 9643b006..ab463de5 100644 --- a/generate-gimple-c.py +++ b/generate-gimple-c.py @@ -227,6 +227,8 @@ def generate_gimple(): tp_getset = getsettable.identifier, tp_repr = '(reprfunc)gcc_Gimple_repr', tp_str = '(reprfunc)gcc_Gimple_str', + tp_hash = '(hashfunc)gcc_Gimple_hash', + tp_richcompare = 'gcc_Gimple_richcompare', tp_flags = 'Py_TPFLAGS_BASETYPE', ) methods = PyMethodTable('gcc_Gimple_methods', []) @@ -383,6 +385,7 @@ def make_getset_Switch(): for gt in gimple_types: cc = gt.camel_cased_string() + tp_repr = None getsettable = None if cc == 'GimpleAsm': getsettable = make_getset_Asm() @@ -398,6 +401,8 @@ def make_getset_Switch(): getsettable = make_getset_Phi() elif cc == 'GimpleSwitch': getsettable = make_getset_Switch() + elif cc == 'GimpleLabel': + tp_repr = '(reprfunc)gcc_GimpleLabel_repr' if getsettable: cu.add_defn(getsettable.c_defn()) @@ -411,7 +416,7 @@ def make_getset_Switch(): tp_new = 'PyType_GenericNew', tp_base = '&gcc_GimpleType', tp_getset = getsettable.identifier if getsettable else None, - #tp_repr = '(reprfunc)gcc_Gimple_repr', + tp_repr = tp_repr, #tp_str = '(reprfunc)gcc_Gimple_str', ) cu.add_defn(pytype.c_defn()) diff --git a/generate-tree-c.py b/generate-tree-c.py index 2c07c717..7e4484a3 100644 --- a/generate-tree-c.py +++ b/generate-tree-c.py @@ -386,11 +386,9 @@ def add_complex_getter(name, doc): add_simple_getter('unsigned', 'PyBool_FromLong(TYPE_UNSIGNED(self->t))', "Boolean: True for 'unsigned', False for 'signed'") - add_simple_getter('signed_equivalent', - 'gcc_python_make_wrapper_tree(c_common_signed_type(self->t))', + add_complex_getter('signed_equivalent', 'The gcc.IntegerType for the signed version of this type') - add_simple_getter('unsigned_equivalent', - 'gcc_python_make_wrapper_tree(c_common_unsigned_type(self->t))', + add_complex_getter('unsigned_equivalent', 'The gcc.IntegerType for the unsigned version of this type') add_simple_getter('max_value', 'gcc_python_make_wrapper_tree(TYPE_MAX_VALUE(self->t))', @@ -421,6 +419,7 @@ def add_complex_getter(name, doc): add_simple_getter('index', 'gcc_python_make_wrapper_tree(TREE_OPERAND(self->t, 1))', "The gcc.Tree for index being referenced'") + tp_repr = '(reprfunc)gcc_ArrayRef_repr' if tree_type.SYM == 'COMPONENT_REF': add_simple_getter('target', @@ -429,6 +428,7 @@ def add_complex_getter(name, doc): add_simple_getter('field', 'gcc_python_make_wrapper_tree(TREE_OPERAND(self->t, 1))', "The gcc.FieldDecl for the field within the target'") + tp_repr = '(reprfunc)gcc_ComponentRef_repr' if tree_type.SYM == 'MEM_REF': add_simple_getter('operand', @@ -465,6 +465,12 @@ def add_complex_getter(name, doc): 'PyBool_FromLong(TREE_STATIC(self->t))', "Boolean: is this variable to be allocated with static storage") + if tree_type.SYM in ('VAR_DECL', 'PARM_DECL', 'FUNCTION_DECL', + 'LABEL_DECL', 'RESULT_DECL', 'CONST_DECL'): + add_simple_getter('context', + 'gcc_python_make_wrapper_tree(DECL_CONTEXT(self->t))', + 'The context of the declaration: a gcc.FunctionDecl or gcc.TranslationUnitDecl') + if tree_type.SYM == 'CONSTRUCTOR': add_complex_getter('elements', "The elements of this constructor, as a list of (index, gcc.Tree) pairs") @@ -554,6 +560,8 @@ def add_complex_getter(name, doc): add_simple_getter('version', 'gcc_python_int_from_long(SSA_NAME_VERSION(self->t))', "The SSA version number of this SSA name'") + tp_repr = '(reprfunc)gcc_SsaName_repr' + if tree_type.SYM == 'TREE_LIST': # c.f. "struct GTY(()) tree_list": @@ -569,6 +577,7 @@ def add_complex_getter(name, doc): add_simple_getter('target', 'gcc_python_make_wrapper_tree(CASE_LABEL(self->t))', "The target of the case label, as a gcc.LabelDecl") + tp_repr = '(reprfunc)gcc_CaseLabelExpr_repr' cu.add_defn(getsettable.c_defn()) cu.add_defn(methods.c_defn()) diff --git a/libcpychecker/absinterp.py b/libcpychecker/absinterp.py index c66e9ed2..71814718 100644 --- a/libcpychecker/absinterp.py +++ b/libcpychecker/absinterp.py @@ -1119,6 +1119,15 @@ def describe_stmt(stmt): else: return str(stmt.loc) +def get_locations(fun): + # given a gcc.Function, get all Location instances within it + result = [] + for bb in fun.cfg.basic_blocks: + if bb.gimple: + for idx, stmt in enumerate(bb.gimple): + result.append(Location(bb, idx)) + return result + class Location(object): """A location within a CFG: a gcc.BasicBlock together with an index into the gimple list. (We don't support SSA passes)""" @@ -1130,6 +1139,9 @@ def __init__(self, bb, idx): self.bb = bb self.idx = idx + def __hash__(self): + return hash(self.bb) ^ hash(self.idx) + def __repr__(self): return ('Location(bb=%i, idx=%i)' % (self.bb.index, self.idx)) @@ -1139,14 +1151,26 @@ def __str__(self): return ('block %i stmt:%i : %20r : %s' % (self.bb.index, self.idx, stmt, stmt)) + def prev_locs(self): + """Get a list of (Location, gcc.Edge) instances, for where we came from""" + if self.bb.gimple and self.idx > 0: + # Previous gimple statement within this BB: + return [(Location(self.bb, self.idx - 1), None)] + else: + # At start of gimple statements: prior BBs: + return [(Location.get_block_end(inedge.src), inedge) + for inedge in self.bb.preds + if inedge.src.gimple] + def next_locs(self): - """Get a list of Location instances, for what can happen next""" + """Get a list of (Location, gcc.Edge) instances, for what can happen next""" if self.bb.gimple and len(self.bb.gimple) > self.idx + 1: # Next gimple statement: - return [Location(self.bb, self.idx + 1)] + return [(Location(self.bb, self.idx + 1), None)] else: # At end of gimple statements: successor BBs: - return [Location.get_block_start(outedge.dest) for outedge in self.bb.succs] + return [(Location.get_block_start(outedge.dest), outedge) + for outedge in self.bb.succs] def next_loc(self): """Get the next Location, for when it's unique""" @@ -1161,6 +1185,11 @@ def next_loc(self): def __eq__(self, other): return self.bb == other.bb and self.idx == other.idx + @classmethod + def get_block_end(cls, bb): + # Don't bother iterating through phi_nodes if there aren't any: + return Location(bb, len(bb.gimple) - 1) + @classmethod def get_block_start(cls, bb): # Don't bother iterating through phi_nodes if there aren't any: @@ -2089,7 +2118,7 @@ def get_transitions(self): return self._get_transitions_for_stmt(stmt) else: result = [] - for loc in self.loc.next_locs(): + for loc, edge in self.loc.next_locs(): newstate = self.copy() newstate.loc = loc result.append(Transition(self, newstate, '')) diff --git a/libcpychecker/constraints.py b/libcpychecker/constraints.py new file mode 100644 index 00000000..f6694135 --- /dev/null +++ b/libcpychecker/constraints.py @@ -0,0 +1,636 @@ +import gcc +from libcpychecker.absinterp import Location, get_locations + +############################################################################ +# Hierarchy of Constraint classes. Instances are immutable +############################################################################ + +class Constraint: + def __and__(self, other): + return And([self, other]) + + def __or__(self, other): + return Or([self, other]) + + def simplify(self, fubar): + return self + + def delete(self, term): + # Recursively delete the constraints on the given term + # For use when handling assignment, to remove the constraints from + # the old value of the LHS. + raise NotImplementedError() + + def as_html(self): + raise NotImplementedError() + +class Boolean(Constraint): + def __init__(self, terms): + assert isinstance(terms, (set, frozenset, list)) + self.terms = frozenset(terms) + + def __eq__(self, other): + if self.__class__ == other.__class__: + if self.terms == other.terms: + return True + + def __hash__(self): + return hash(self.terms) + + def __repr__(self): + return '%s(%s)' % (self.__class__.__name__, + ', ' .join(repr(term) for term in self.terms)) + + def __str__(self): + return '(' + (' %s ' % self.name).join(str(term) for term in self.terms) + ')' + + def simplify(self, fubar): + newterms = set() + for term in self.terms: + term = term.simplify(fubar) + + # promote + # And(..., And(a, b, c), ....) + # to: + # And(..., a, b, c, ...) + # and analogously for Or(..., Or(), ...) + if term.__class__ == self.__class__: + for innerterm in term.terms: + newterms.add(innerterm) + else: + # Eliminate redundant + # and Top() + # and + # or Bottom() + # terms: + if isinstance(term, Top): + if isinstance(self, Or): + # "or Top()" is always True: + return Top() + elif isinstance(term, Bottom): + if isinstance(self, And): + # "and Bottom()" is impossible: + return Bottom() + newterms.add(term) + + # Now that we've handled the "always True" and "impossible" case, strip + # remaining "Top()" and "Bottom()" terms, as long as there are + # other terms (in which case they are redundant): + if len(newterms) > 1: + newterms = {term for term in newterms + if not isinstance(term, Top)} + if len(newterms) > 1: + newterms = {term for term in newterms + if not isinstance(term, Bottom)} + + # If we have just a single term, eliminate this And() or Or() clause + # around it, just using the term itself: + if len(newterms) == 1: + return newterms.pop() + + # Verify that And() clauses are actually possible: + # This isn't a full solver, but will catch some cases that are + # impossible + if isinstance(self, And) and fubar: + # Gather predicates by LHS: + # dict of expr -> And() condition affecting that expr: + exprpreds = {} + satisfiableterms = set() + for term in newterms: + if isinstance(term, Predicate): + print(term) + # FIXME: only the lhs for now + if term.lhs in exprpreds: + exprpreds[term.lhs] = exprpreds[term.lhs] & term + if isinstance(exprpreds[term.lhs], Bottom): + return Bottom() + else: + exprpreds[term.lhs] = term + else: + satisfiableterms.add(term) + for expr in exprpreds: + satisfiableterms.add(exprpreds[expr]) + return And(satisfiableterms).simplify(False) + + return self.__class__(newterms) + + def delete(self, term): + newterms = set() + for t in self.terms: + t = t.delete(term) + newterms.add(t) + return self.__class__(newterms).simplify(True) + + def as_html(self): + return ('(\n' + + ('\n %s\n' % self.name).join( + ['\n'.join(' %s' % line + for line in term.as_html().splitlines()) + for term in self.terms]) + + '\n)') + +class And(Boolean): + name = 'and' + + def __and__(self, other): + return And(list(self.terms) + [other]) + + #def __or__(self, other): + # return And([term | other for term in self.terms]) + +class Or(Boolean): + name = 'or' + + def __and__(self, other): + return Or([term & other for term in self.terms]) + + def __or__(self, other): + return Or(list(self.terms) + [other]) + +class Predicate(Constraint): + def __init__(self, lhs, op, rhs): + self.lhs = lhs + self.op = op + self.rhs = rhs + + def __repr__(self): + return 'Predicate(%r, %r, %r)' % (self.lhs, self.op, self.rhs) + + def __str__(self): + return '%s %s %s' % (self.lhs, self.op, self.rhs) + + def __eq__(self, other): + if isinstance(other, Predicate): + if self.lhs == other.lhs: + if self.op == other.op: + if self.rhs == other.rhs: + return True + + def __hash__(self): + return hash(self.lhs) ^ hash(self.op) ^ hash(self.rhs) + + def __and__(self, other): + print('%s __and__ %s' % (self, other)) + if isinstance(other, Predicate): + if self.lhs == other.lhs: + if isinstance(self.rhs, (int, long)) and isinstance(other.rhs, (int, long)): + if self.op == '==' and other.op == '==': + # We have (EXPR == valA) AND (EXPR == valB) + if self.rhs == other.rhs: + # second clause has no effect: + raise 'foo' + return self + else: + # impossible: + return Bottom() + elif self.op == '==' and other.op == '!=': + if self.rhs == other.rhs: + # impossible: + return Bottom() + else: + # second clause has no effect: + raise 'foo' + return self + elif self.op == '!=' and other.op == '==': + if self.rhs == other.rhs: + # impossible: + raise 'foo' + return Bottom() + else: + # second clause is a better condition: + return other + elif self.op == '==' and other.op == '<=': + if self.rhs <= other.rhs: + # second clause is redundant: + return self + else: + # impossible: + return Bottom() + elif self.op == '==' and other.op == '<': + if self.rhs < other.rhs: + # second clause is redundant: + return self + else: + # impossible: + return Bottom() + elif self.op == '==' and other.op == '>=': + if self.rhs >= other.rhs: + # second clause is redundant: + return self + else: + # impossible: + return Bottom() + elif self.op == '==' and other.op == '>': + if self.rhs > other.rhs: + # second clause is redundant: + return self + else: + # impossible: + return Bottom() + + return Constraint.__and__(self, other) + + def delete(self, term): + if self.lhs == term or self.rhs == term: + return Top() + return self + + def as_html(self): + return '%s %s %s' % (self.lhs, self.op, self.rhs) + +class IsUnitialized(Constraint): + def __init__(self, var): + self.var = var + + def __repr__(self): + return 'IsUnitialized(%r)' % self.var + + def __str__(self): + return 'IsUnitialized(%r)' % self.var + +class Note(Constraint): + def __init__(self, msg): + self.msg = msg + + def __hash__(self): + return hash(self.msg) + + def __eq__(self, other): + if isinstance(other, Note): + if self.msg == other.msg: + return True + + def __repr__(self): + return 'Note(%r)' % self.msg + + def __str__(self): + return repr(self.msg) + + def delete(self, term): + return self + + def as_html(self): + return '%s' % self.msg + +class Top(Constraint): + # no constraints, and reachable + def __repr__(self): + return 'Top()' + + def __str__(self): + return 'Top()' + + def __eq__(self, other): + return isinstance(other, Top) + + def __hash__(self): + return 1 + + def delete(self, term): + return self + + def as_html(self): + return 'Top()' + +class Bottom(Constraint): + # no constraints, but not reachable + def __repr__(self): + return 'Bottom()' + + def __str__(self): + return 'Bottom()' + + def __eq__(self, other): + return isinstance(other, Bottom) + + def __hash__(self): + return 0 + + def delete(self, term): + return self + + def as_html(self): + return 'Bottom()' + +############################################################################ + +class DummyExpr: + def __init__(self, text): + self.text = text + def __repr__(self): + return 'DummyExpr(%r)' % self.text + def __str__(self): + return self.text + + def __hash__(self): + return hash(self.text) + + def __eq__(self, other): + if isinstance(other, DummyExpr): + return self.text == other.text + +class Solution: + def __init__(self, fun): + # a mapping from Location to Constraint + # i.e. a snapshot of what we know at each location within the function + self.fun = fun + self.locations = get_locations(fun) + self.loc_to_constraint = {loc:Bottom() for loc in self.locations} + + # The initial state is the first block after entry (which has no statements): + initbb = fun.cfg.entry.succs[0].dest + initloc = Location(initbb, 0) + self.loc_to_constraint[initloc] = Top() + + # FIXME: initial state of vars + + def __eq__(self, other): + if not isinstance(other, Solution): + return False + return self.loc_to_constraint == other.loc_to_constraint + + def as_html_tr(self, out, stage, oldsol): + out.write('') + out.write('%s' % stage) + for loc in self.locations: + if oldsol: + oldconstraint = oldsol.loc_to_constraint[loc] + else: + oldconstraint = None + constraint = self.loc_to_constraint[loc] + if not (constraint == oldconstraint): + out.write('
%s
' % constraint.as_html()) + else: + out.write('
%s
' % constraint.as_html()) + out.write('\n') + + def eval(self, expr): + print('expr: %r' % expr) + if isinstance(expr, gcc.VarDecl): + return expr + if isinstance(expr, gcc.Constant): + return expr.constant + if isinstance(expr, gcc.ArrayRef): + return expr + if isinstance(expr, DummyExpr): + return expr + if expr is None: + return None + raise foo + + + def get_constraint_for_edge(self, srcloc, dstloc, edge): + class NewObj: + def __repr__(self): + return 'NewObj()' + def __str__(self): + return 'NewObj()' + class DerefField: + def __init__(self, ptr, fieldname): + self.ptr = ptr + self.fieldname = fieldname + def __repr__(self): + return 'DerefField(%r, %r)' % (self.ptr, self.fieldname) + def __str__(self): + return '%s->%s' % (self.ptr, self.fieldname) + + stmt = srcloc.get_stmt() + print(' %s ' % stmt) + + srcconstraint = self.loc_to_constraint[srcloc] + + if isinstance(stmt, gcc.GimpleAssign): + print(' %r %r %r' % (stmt.lhs, stmt.rhs, stmt.exprcode)) + if stmt.exprcode == gcc.IntegerCst: + rhs = self.eval(stmt.rhs[0]) + elif stmt.exprcode == gcc.VarDecl: + rhs = DummyExpr(stmt.rhs[0].name) # FIXME + elif stmt.exprcode == gcc.PlusExpr: + rhs = DummyExpr(str(stmt)) # FIXME + elif stmt.exprcode == gcc.Constructor: + rhs = DummyExpr(str(stmt)) # FIXME + else: + raise UnhandledAssignment() + lhs = self.eval(stmt.lhs) + # Remove old value from srcconstraint: + return srcconstraint.delete(lhs) & Predicate(lhs, '==', rhs) + + elif isinstance(stmt, gcc.GimpleCall): + print('%r %r %r' % (stmt.lhs, stmt.fn, stmt.args)) + if isinstance(stmt.fn, gcc.AddrExpr): + if isinstance(stmt.fn.operand, gcc.FunctionDecl): + print('stmt.fn.operand.name: %r' % stmt.fn.operand.name) + fnname = stmt.fn.operand.name + def make_result(message, op, value): + note = Note(message) + if stmt.lhs: + return note & Predicate(self.eval(stmt.lhs), op, value) + else: + return note + + def make_success(op, value): + return make_result('%s() succeeded' % fnname, op, value) + + def make_failure(op, value): + return make_result('%s() failed' % fnname, op, value) + + if fnname == 'PyArg_ParseTuple': + success = make_success('==', 1) + # FIXME: also update the args ^^^ + failure = make_failure('==', 0) + return srcconstraint & (success | failure) + elif fnname == 'PyList_New': + + newobj = NewObj() # FIXME + success = make_success('!=', 0) + """ + success = (Note('%s() succeeded' % fnname) & + Predicate(self.eval(stmt.lhs), '!=', 0) + #Predicate(self.eval(stmt.lhs), '==', newobj) & + #Predicate(newobj, '!=', 0) #& + #Predicate(DerefField(newobj, 'ob_refcnt'), '==', 1) & # FIXME + #Predicate(DerefField(newobj, 'ob_type'), '==', 'PyList_Type') + ) + """ + failure = make_failure('==', 0) + return srcconstraint & (success | failure) + elif fnname == 'PyList_Append': + # etc + success = make_success('==', 0) + failure = make_failure('==', -1) + return srcconstraint & (success | failure) + elif fnname == 'PyLong_FromLong': + newobj = NewObj() # FIXME + success = make_success('!=', 0) + """ + success = (Note('%s() succeeded' % fnname) & + Predicate(self.eval(stmt.lhs), '!=', 0) + #Predicate(self.eval(stmt.lhs), '==', newobj) & + #Predicate(newobj, '!=', 0) #& + #Predicate(DerefField(newobj, 'ob_refcnt'), '==', 1) & # FIXME + #Predicate(DerefField(newobj, 'ob_type'), '==', 'PyLong_Type') + ) + """ + failure = make_failure('==', 0) + return srcconstraint & (success | failure) + + elif fnname == 'random': + # FIXME: only listing this one for completeness + return srcconstraint & Top() # FIXME: make lhs not be uninitialized + else: + # Unknown function: + + # FIXME: + raise UnknownFunction() + + return Top() # FIXME: make lhs not be uninitialized + raise CantHandlePointerToFunctionYet() + elif isinstance(stmt, gcc.GimpleCond): + print(' %r %r %r %r %r' % (stmt.lhs, stmt.rhs, stmt.exprcode, stmt.true_label, stmt.false_label)) + print('edge: %r' % edge) + + if stmt.exprcode == gcc.EqExpr: + op = '==' if edge.true_value else '!=' + elif stmt.exprcode == gcc.LtExpr: + op = '<' if edge.true_value else '>=' + elif stmt.exprcode == gcc.LeExpr: + op = '<=' if edge.true_value else '>' + else: + raise UnhandledConditional() # FIXME + + cond = Predicate(self.eval(stmt.lhs), op, self.eval(stmt.rhs)) + return srcconstraint & cond + elif isinstance(stmt, gcc.GimpleLabel): + return srcconstraint & Top() + else: + raise UnhandledStatementType() + raise ShouldntGetHere() + +class HtmlLog: + def __init__(self, out, solver): + self.out = out + out.write('\n') + + # Write headings: + out.write('') + out.write('') + for loc in solver.locations: + out.write('' + % (loc.bb.index, loc.idx)) + out.write('\n') + out.write('') + out.write('') + for loc in solver.locations: + out.write('' % loc.get_stmt()) + out.write('\n') + out.write('') + out.write('') + for loc in solver.locations: + out.write('' % loc.get_stmt()) + out.write('\n') + +class Solver: + def __init__(self, fun): + self.fun = fun + self.locations = get_locations(fun) + self.solutions = [] + + def solve(self): + # calculate least fixed point + with open('constraints.html', 'w') as out: + html = HtmlLog(out, self) + while True: + idx = len(self.solutions) + if self.solutions: + oldsol = self.solutions[-1] + else: + oldsol = None + newsol = Solution(self.fun) + if oldsol: + # FIXME: optimize using a worklist: + for loc in self.locations: + newval = oldsol.loc_to_constraint[loc] + for prevloc, edge in loc.prev_locs(): + print(' edge from: %s' % prevloc) + print(' to: %s' % loc) + value = oldsol.get_constraint_for_edge(prevloc, loc, edge) + print(' str(value): %s' % value) + print('repr(value): %r' % value) + newval = newval | value + newval = newval.simplify(True) + print(' new value: %s' % newval) + + newsol.loc_to_constraint[loc] = newval + # TODO: update based on transfer functions + self.solutions.append(newsol) + print(newsol.loc_to_constraint) + newsol.as_html_tr(out, idx, oldsol) + if oldsol == newsol: + # We've reached a fixed point + break + + if len(self.solutions) > 20: + # bail out: termination isn't working for some reason + raise BailOut() + + +class ConstraintPass(gcc.GimplePass): + def __init__(self): + gcc.GimplePass.__init__(self, 'constraint-pass-gimple') + + def execute(self, fun): + print(fun) + + if 0: + # Dump location information + for loc in get_locations(fun): + print(loc) + for prevloc in loc.prev_locs(): + print(' prev: %s' % prevloc) + for nextloc in loc.next_locs(): + print(' next: %s' % nextloc) + + solver = Solver(fun) + solver.solve() + #with open('solution.html', 'w') as out: + # constraint.dump_as_html(out) + +def main(): + gimple_ps = ConstraintPass() + + """ + c1 = IsUnitialized('count') + print('c1') + print(c1) + print(repr(c1)) + + c2 = Or([And([Predicate('D1', '!=', 0), + Predicate('count', '>=', -0x80000000), + Predicate('count', '<', 0x80000000), + Note('PyArg_ParseTuple() succeeded')]), + And([Predicate('D1', '==', 0), + IsUnitialized('count'), + Note('PyArg_ParseTuple() failed')]) + ]) + print('c2') + print(c2) + print(repr(c2)) + + c3 = ((Predicate('D1', '!=', 0) & + Predicate('count', '>=', -0x80000000) & + Predicate('count', '<', 0x80000000) & + Note('PyArg_ParseTuple() succeeded')) | + (Predicate('D1', '==', 0) & + IsUnitialized('count') & + Note('PyArg_ParseTuple() failed'))) + print('c3') + print(c3) + print(repr(c3)) + """ + + if 1: + # non-SSA version: + gimple_ps.register_before('*warn_function_return') + else: + # SSA version: + gimple_ps.register_after('ssa') diff --git a/run-test-suite.py b/run-test-suite.py index e4281cbf..0c5c9756 100644 --- a/run-test-suite.py +++ b/run-test-suite.py @@ -19,7 +19,7 @@ # subdirectory containing a "script.py" is regarded as a test case. # # A test consists of: -# input.c/cc: C/C++ source code to be compiled +# input*.c/cc: C/C++ source code to be compiled # script.py: a Python script to be run by GCC during said compilation # stdout.txt: (optional) the expected stdout from GCC (empty if not present) # stderr.txt: (optional) as per stdout.txt @@ -161,6 +161,11 @@ def _cleanup(self, text): # to narrow/wide implementations ("UCS2"/"UCS4") line = re.sub('PyUnicodeUCS4_AsUTF8String', 'PyUnicode_AsUTF8String', line) + # Avoid hardcoding timings from unittest's output: + line = re.sub(r'Ran ([0-9]+ tests?) in ([0-9]+\.[0-9]+s)', + r'Ran \1 in #s', + line) + result += line + '\n' return result @@ -182,7 +187,7 @@ def diff(self, label): from difflib import unified_diff result = '' for line in unified_diff(self.expdata.splitlines(), - self.actual.splitlines(), + self._cleanup(self.actual).splitlines(), fromfile='Expected %s' % label, tofile='Actual %s' % label, lineterm=""): @@ -199,15 +204,20 @@ def _extra_info(self): return self.stream.diff(self.label) -def get_source_file(testdir): - # Locate the source file within the test directory, - # trying various different suffixes (by programming language) +def get_source_files(testdir): + """ + Locate source files within the test directory, + of the form "input*.c", "input*.cc" etc + trying various different suffixes by programming language + """ + inputfiles = [] suffixes = ['.c', '.cc', '.java', '.f', '.f90'] for suffix in suffixes: - inputfile = os.path.join(testdir, 'input%s' % suffix) - if os.path.exists(inputfile): - return inputfile - raise RuntimeError('Source file not found') + from glob import glob + inputfiles += glob(os.path.join(testdir, 'input*%s' % suffix)) + if not inputfiles: + raise RuntimeError('Source file not found') + return inputfiles config_h = 'autogenerated-config.h' def parse_autogenerated_config_h(): @@ -232,7 +242,7 @@ def __init__(self, reason): def run_test(testdir): # Compile each 'input.c', using 'script.py' # Assume success and empty stdout; compare against expected stderr, or empty if file not present - inputfile = get_source_file(testdir) + inputfiles = get_source_files(testdir) outfile = os.path.join(testdir, 'output.o') script_py = os.path.join(testdir, 'script.py') out = TestStream(os.path.join(testdir, 'stdout.txt')) @@ -258,16 +268,26 @@ def run_test(testdir): # Generate the command-line for invoking gcc: args = [os.environ.get('CC', 'gcc')] - args += ['-c'] # (don't run the linker) + if len(inputfiles) == 1: + args += ['-c'] # (don't run the linker) + else: + args += ['-fPIC', '-shared'] + # Force LTO when there's more than one source file: + args += ['-flto', '-flto-partition=none'] args += ['-o', outfile] args += ['-fplugin=%s' % os.path.abspath('python.so'), '-fplugin-arg-python-script=%s' % script_py] # Special-case: add the python include dir (for this runtime) if the C code # uses Python.h: - with open(inputfile, 'r') as f: - code = f.read() - if '#include ' in code: + def uses_python_headers(): + for inputfile in inputfiles: + with open(inputfile, 'r') as f: + code = f.read() + if '#include ' in code: + return True + + if uses_python_headers(): args += ['-I' + get_python_inc()] # If there's a getopts.py, run it to get additional test-specific @@ -284,8 +304,12 @@ def run_test(testdir): raise CommandError() args += opts_out.split() - # and the source file goes at the end: - args += [inputfile] + # and the source files go at the end: + args += inputfiles + + if options.show: + # Show the gcc invocation: + print(' '.join(args)) # Invoke the compiler: p = Popen(args, env=env, stdout=PIPE, stderr=PIPE) @@ -297,6 +321,11 @@ def run_test(testdir): #print 'err: %r' % err.actual exitcode_actual = p.wait() + if options.show: + # then the user wants to see the gcc invocation directly + sys.stdout.write(out.actual) + sys.stderr.write(err.actual) + # Expected exit code # By default, we expect success if the expected stderr is empty, and # and failure if it's non-empty. @@ -330,6 +359,9 @@ def run_test(testdir): type="string", dest="excluded_dirs", help="exclude tests in DIR and below", metavar="DIR") +parser.add_option("-s", "--show", + action="store_true", dest="show", default=False, + help="Show stdout, stderr and the command line for each test") (options, args) = parser.parse_args() # print (options, args) diff --git a/slow-tests/sm/integration/lemon/getopts.py b/slow-tests/sm/integration/lemon/getopts.py new file mode 100644 index 00000000..2e1b1f00 --- /dev/null +++ b/slow-tests/sm/integration/lemon/getopts.py @@ -0,0 +1 @@ +print("-D=_U_") diff --git a/slow-tests/sm/integration/lemon/input.c b/slow-tests/sm/integration/lemon/input.c new file mode 100644 index 00000000..edb14fe1 --- /dev/null +++ b/slow-tests/sm/integration/lemon/input.c @@ -0,0 +1,4920 @@ +/* +** Copyright (c) 1991, 1994, 1997, 1998 D. Richard Hipp +** +** This file contains all sources (including headers) to the LEMON +** LALR(1) parser generator. The sources have been combined into a +** single file to make it easy to include LEMON as part of another +** program. +** +** This program is free software; you can redistribute it and/or +** modify it under the terms of the GNU General Public +** License as published by the Free Software Foundation; either +** version 2 of the License, or (at your option) any later version. +** +** This program is distributed in the hope that it will be useful, +** but WITHOUT ANY WARRANTY; without even the implied warranty of +** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +** General Public License for more details. +** +** You should have received a copy of the GNU General Public +** License along with this library; if not, write to the +** Free Software Foundation, Inc., 59 Temple Place - Suite 330, +** Boston, MA 02111-1307, USA. +** +** Author contact information: +** drh@acm.org +** http://www.hwaci.com/drh/ +** +** Updated to sqlite lemon version 1.59 +** $Id: lemon.c 40806 2012-02-02 11:54:33Z jmayer $ +*/ +#ifdef HAVE_CONFIG_H +# include "config.h" +#endif + +#include +#include +#include +#include +#include +#include + +/* + * Wrapper around "isupper()", "islower()", etc. to cast the argument to + * "unsigned char", so that they at least handle non-ASCII 8-bit characters + * (and don't provoke a pile of warnings from GCC). + */ +#define safe_isupper(c) isupper((unsigned char)(c)) +#define safe_islower(c) islower((unsigned char)(c)) +#define safe_isalpha(c) isalpha((unsigned char)(c)) +#define safe_isalnum(c) isalnum((unsigned char)(c)) +#define safe_isspace(c) isspace((unsigned char)(c)) + +/* + * XXX - on modern UN*Xes, this is declared in , but that's + * not available on Windows; what header declares it on Windows? + */ + +#ifndef __WIN32__ +# if defined(_WIN32) || defined(WIN32) +# define __WIN32__ +# endif +#endif + +#ifdef __WIN32__ +extern int access(); +#else +#include +#endif + +#define PRIVATE static +/* #define PRIVATE */ + +#ifdef TEST +#define MAXRHS 5 /* Set low to exercise exception code */ +#else +#define MAXRHS 1000 +#endif + +static char *msort(char *, char **, int (*)(const void *, const void *)); + +/********** From the file "struct.h" *************************************/ +/* +** Principal data structures for the LEMON parser generator. +*/ + +typedef enum {LEMON_FALSE=0, LEMON_TRUE} Boolean; + +/* Symbols (terminals and nonterminals) of the grammar are stored +** in the following: */ +struct symbol { + char *name; /* Name of the symbol */ + int index; /* Index number for this symbol */ + enum { + TERMINAL, + NONTERMINAL, + MULTITERMINAL + } type; /* Symbols are all either TERMINALS or NTs */ + struct rule *rule; /* Linked list of rules of this (if an NT) */ + struct symbol *fallback; /* fallback token in case this token doesn't parse */ + int prec; /* Precedence if defined (-1 otherwise) */ + enum e_assoc { + LEFT, + RIGHT, + NONE, + UNK + } assoc; /* Associativity if precedence is defined */ + char *firstset; /* First-set for all rules of this symbol */ + Boolean lambda; /* True if NT and can generate an empty string */ + int useCnt; /* Number of times used */ + char *destructor; /* Code which executes whenever this symbol is + ** popped from the stack during error processing */ + int destLineno; /* Line number for start of destructor */ + char *datatype; /* The data type of information held by this + ** object. Only used if type==NONTERMINAL */ + int dtnum; /* The data type number. In the parser, the value + ** stack is a union. The .yy%d element of this + ** union is the correct data type for this object */ + /* The following fields are used by MULTITERMINALs only */ + int nsubsym; /* Number of constituent symbols in the MULTI */ + struct symbol **subsym; /* Array of constituent symbols */ +}; + +/* Each production rule in the grammar is stored in the following +** structure. */ +struct rule { + struct symbol *lhs; /* Left-hand side of the rule */ + char *lhsalias; /* Alias for the LHS (NULL if none) */ + int lhsStart; /* True if left-hand side is the start symbol */ + int ruleline; /* Line number for the rule */ + int nrhs; /* Number of RHS symbols */ + struct symbol **rhs; /* The RHS symbols */ + char **rhsalias; /* An alias for each RHS symbol (NULL if none) */ + int line; /* Line number at which code begins */ + char *code; /* The code executed when this rule is reduced */ + struct symbol *precsym; /* Precedence symbol for this rule */ + int index; /* An index number for this rule */ + Boolean canReduce; /* True if this rule is ever reduced */ + struct rule *nextlhs; /* Next rule with the same LHS */ + struct rule *next; /* Next rule in the global list */ +}; + +/* A configuration is a production rule of the grammar together with +** a mark (dot) showing how much of that rule has been processed so far. +** Configurations also contain a follow-set which is a list of terminal +** symbols which are allowed to immediately follow the end of the rule. +** Every configuration is recorded as an instance of the following: */ +struct config { + struct rule *rp; /* The rule upon which the configuration is based */ + int dot; /* The parse point */ + char *fws; /* Follow-set for this configuration only */ + struct plink *fplp; /* Follow-set forward propagation links */ + struct plink *bplp; /* Follow-set backwards propagation links */ + struct state *stp; /* Pointer to state which contains this */ + enum { + COMPLETE, /* The status is used during followset and */ + INCOMPLETE /* shift computations */ + } status; + struct config *next; /* Next configuration in the state */ + struct config *bp; /* The next basis configuration */ +}; + +/* Every shift or reduce operation is stored as one of the following */ +struct action { + struct symbol *sp; /* The look-ahead symbol */ + enum e_action { + SHIFT, + ACCEPT, + REDUCE, + ERROR, + SSCONFLICT, /* A shift/shift conflict */ + SRCONFLICT, /* Was a reduce, but part of a conflict */ + RRCONFLICT, /* Was a reduce, but part of a conflict */ + SH_RESOLVED, /* Was a shift. Precedence resolved conflict */ + RD_RESOLVED, /* Was reduce. Precedence resolved conflict */ + NOT_USED /* Deleted by compression */ + } type; + union { + struct state *stp; /* The new state, if a shift */ + struct rule *rp; /* The rule, if a reduce */ + } x; + struct action *next; /* Next action for this state */ + struct action *collide; /* Next action with the same hash */ +}; + +/* Each state of the generated parser's finite state machine +** is encoded as an instance of the following structure. */ +struct state { + struct config *bp; /* The basis configurations for this state */ + struct config *cfp; /* All configurations in this set */ + int statenum; /* Sequential number for this state */ + struct action *ap; /* Array of actions for this state */ + int nTknAct, nNtAct; /* Number of actions on terminals and nonterminals */ + int iTknOfst, iNtOfst; /* yy_action[] offset for terminals and nonterms */ + int iDflt; /* Default action */ +}; +#define NO_OFFSET (-2147483647) + +/* A followset propagation link indicates that the contents of one +** configuration followset should be propagated to another whenever +** the first changes. */ +struct plink { + struct config *cfp; /* The configuration to which linked */ + struct plink *next; /* The next propagate link */ +}; + +/* The state vector for the entire parser generator is recorded as +** follows. (LEMON uses no global variables and makes little use of +** static variables. Fields in the following structure can be thought +** of as begin global variables in the program.) */ +struct lemon { + struct state **sorted; /* Table of states sorted by state number */ + struct rule *rule; /* List of all rules */ + int nstate; /* Number of states */ + int nrule; /* Number of rules */ + int nsymbol; /* Number of terminal and nonterminal symbols */ + int nterminal; /* Number of terminal symbols */ + struct symbol **symbols; /* Sorted array of pointers to symbols */ + int errorcnt; /* Number of errors */ + struct symbol *errsym; /* The error symbol */ + struct symbol *wildcard; /* Token that matches anything */ + char *name; /* Name of the generated parser */ + char *arg; /* Declaration of the 3th argument to parser */ + char *tokentype; /* Type of terminal symbols in the parser stack */ + char *vartype; /* The default type of non-terminal symbols */ + char *start; /* Name of the start symbol for the grammar */ + char *stacksize; /* Size of the parser stack */ + char *include; /* Code to put at the start of the C file */ + char *error; /* Code to execute when an error is seen */ + char *overflow; /* Code to execute on a stack overflow */ + char *failure; /* Code to execute on parser failure */ + char *accept; /* Code to execute when the parser excepts */ + char *extracode; /* Code appended to the generated file */ + char *tokendest; /* Code to execute to destroy token data */ + char *vardest; /* Code for the default non-terminal destructor */ + char *filename; /* Name of the input file */ + char *basename; /* Basename of inputer file (no directory or path */ + char *outname; /* Name of the current output file */ + char *outdirname; /* Name of the output directory, specified by user */ + char *templatename; /* Name of template file to use, specified by user */ + char *tokenprefix; /* A prefix added to token names in the .h file */ + int nconflict; /* Number of parsing conflicts */ + int tablesize; /* Size of the parse tables */ + int basisflag; /* Print only basis configurations */ + int has_fallback; /* True if any %fallback is seen in the grammar */ + char *argv0; /* Name of the program */ +}; + +void memory_error(void); +#define MemoryCheck(X) if((X)==0){ \ + memory_error(); \ +} + +/********* From the file "assert.h" ************************************/ +static struct action *Action_new(void); +static struct action *Action_sort(struct action *); + + +/********** From the file "build.h" ************************************/ +void FindRulePrecedences(struct lemon *); +void FindFirstSets(struct lemon *); +void FindStates(struct lemon *); +void FindLinks(struct lemon *); +void FindFollowSets(struct lemon *); +void FindActions(struct lemon *); + +/********* From the file "configlist.h" *********************************/ +void Configlist_init(void); +struct config *Configlist_add(struct rule *, int); +struct config *Configlist_addbasis(struct rule *, int); +void Configlist_closure(struct lemon *); +void Configlist_sort(void); +void Configlist_sortbasis(void); +struct config *Configlist_return(void); +struct config *Configlist_basis(void); +void Configlist_eat(struct config *); +void Configlist_reset(void); + +/********* From the file "error.h" ***************************************/ +#if __GNUC__ >= 2 +void ErrorMsg( const char *, int, const char *, ... ) + __attribute__((format (printf, 3, 4))); +#else +void ErrorMsg( const char *, int, const char *, ... ); +#endif + +/****** From the file "option.h" ******************************************/ +struct s_options { + enum { OPT_FLAG=1, OPT_INT, OPT_DBL, OPT_STR, + OPT_FFLAG, OPT_FINT, OPT_FDBL, OPT_FSTR} type; + const char *label; + char *arg; + const char *message; +}; +int optinit(char**,struct s_options*,FILE*); +int optnargs(void); +char *get_optarg(int); +void get_opterr(int); +void optprint(void); + +/******** From the file "parse.h" *****************************************/ +void Parse(struct lemon *lemp); + +/********* From the file "plink.h" ***************************************/ +struct plink *Plink_new(void); +void Plink_add(struct plink **, struct config *); +void Plink_copy(struct plink **, struct plink *); +void Plink_delete(struct plink *); + +/********** From the file "report.h" *************************************/ +void Reprint(struct lemon *); +void ReportOutput(struct lemon *); +void ReportTable(struct lemon *, int); +void ReportHeader(struct lemon *); +void CompressTables(struct lemon *); +void ResortStates(struct lemon *); + +/********** From the file "set.h" ****************************************/ +void SetSize(int N); /* All sets will be of size N */ +char *SetNew(void); /* A new set for element 0..N */ +void SetFree(char*); /* Deallocate a set */ + +int SetAdd(char*,int); /* Add element to a set */ +int SetUnion(char *A,char *B); /* A <- A U B, thru element N */ + +#define SetFind(X,Y) (X[Y]) /* True if Y is in set X */ + +/**************** From the file "table.h" *********************************/ +/* +** All code in this file has been automatically generated +** from a specification in the file +** "table.q" +** by the associative array code building program "aagen". +** Do not edit this file! Instead, edit the specification +** file, then rerun aagen. +*/ +/* +** Code for processing tables in the LEMON parser generator. +*/ + +/* Routines for handling a strings */ + +char *Strsafe(const char *); + +void Strsafe_init(void); +int Strsafe_insert(char *); +char *Strsafe_find(const char *); + +/* Routines for handling symbols of the grammar */ + +struct symbol *Symbol_new(const char *x); +int Symbolcmpp(const void *, const void *); +void Symbol_init(void); +int Symbol_insert(struct symbol *, char *); +struct symbol *Symbol_find(const char *); +struct symbol *Symbol_Nth(int); +int Symbol_count(void); +struct symbol **Symbol_arrayof(void); + +/* Routines to manage the state table */ + +int Configcmp(const void *, const void *); +struct state *State_new(void); +void State_init(void); +int State_insert(struct state *, struct config *); +struct state *State_find(struct config *); +struct state **State_arrayof(void); + +/* Routines used for efficiency in Configlist_add */ + +void Configtable_init(void); +int Configtable_insert(struct config *); +struct config *Configtable_find(struct config *); +void Configtable_clear(int(*)(struct config *)); +/****************** From the file "action.c" *******************************/ +/* +** Routines processing parser actions in the LEMON parser generator. +*/ + +/* Allocate a new parser action */ +static struct action *Action_new(void){ + static struct action *freelist = 0; + struct action *new; + + if( freelist==0 ){ + int i; + int amt = 100; + freelist = (struct action *)calloc(amt, sizeof(struct action)); + if( freelist==0 ){ + fprintf(stderr,"Unable to allocate memory for a new parser action."); + exit(1); + } + for(i=0; inext; + return new; +} + +/* Compare two actions for sorting purposes. Return negative, zero, or +** positive if the first action is less than, equal to, or greater than +** the first +*/ +static int actioncmp(const void *ap1_arg, const void *ap2_arg) +{ + const struct action *ap1 = ap1_arg, *ap2 = ap2_arg; + int rc; + rc = ap1->sp->index - ap2->sp->index; + if( rc==0 ){ + rc = (int)ap1->type - (int)ap2->type; + } + if( rc==0 && ap1->type==REDUCE ){ + rc = ap1->x.rp->index - ap2->x.rp->index; + } + return rc; +} + +/* Sort parser actions */ +struct action *Action_sort(struct action *ap) +{ + ap = (struct action *)msort((char *)ap,(char **)&ap->next,actioncmp); + return ap; +} + +static void Action_add(struct action **app, enum e_action type, struct symbol *sp, + void *arg) +{ + struct action *new; + new = Action_new(); + new->next = *app; + *app = new; + new->type = type; + new->sp = sp; + if( type==SHIFT ){ + new->x.stp = (struct state *)arg; + }else{ + new->x.rp = (struct rule *)arg; + } +} +/********************** New code to implement the "acttab" module ***********/ +/* +** This module implements routines use to construct the yy_action[] table. +*/ + +/* +** The state of the yy_action table under construction is an instance of +** the following structure +*/ +typedef struct acttab acttab; +struct acttab { + int nAction; /* Number of used slots in aAction[] */ + int nActionAlloc; /* Slots allocated for aAction[] */ + struct { + int lookahead; /* Value of the lookahead token */ + int action; /* Action to take on the given lookahead */ + } *aAction, /* The yy_action[] table under construction */ + *aLookahead; /* A single new transaction set */ + int mnLookahead; /* Minimum aLookahead[].lookahead */ + int mnAction; /* Action associated with mnLookahead */ + int mxLookahead; /* Maximum aLookahead[].lookahead */ + int nLookahead; /* Used slots in aLookahead[] */ + int nLookaheadAlloc; /* Slots allocated in aLookahead[] */ +}; + +/* Return the number of entries in the yy_action table */ +#define acttab_size(X) ((X)->nAction) + +/* The value for the N-th entry in yy_action */ +#define acttab_yyaction(X,N) ((X)->aAction[N].action) + +/* The value for the N-th entry in yy_lookahead */ +#define acttab_yylookahead(X,N) ((X)->aAction[N].lookahead) + +/* Allocate a new acttab structure */ +static acttab *acttab_alloc(void){ + acttab *p = malloc( sizeof(*p) ); + if( p==0 ){ + fprintf(stderr,"Unable to allocate memory for a new acttab."); + exit(1); + } + memset(p, 0, sizeof(*p)); + return p; +} + +/* Add a new action to the current transaction set +*/ +static void acttab_action(acttab *p, int lookahead, int action){ + if( p->nLookahead>=p->nLookaheadAlloc ){ + p->nLookaheadAlloc += 25; + p->aLookahead = realloc( p->aLookahead, + sizeof(p->aLookahead[0])*p->nLookaheadAlloc ); + if( p->aLookahead==0 ){ + fprintf(stderr,"malloc failed\n"); + exit(1); + } + } + if( p->nLookahead==0 ){ + p->mxLookahead = lookahead; + p->mnLookahead = lookahead; + p->mnAction = action; + }else{ + if( p->mxLookaheadmxLookahead = lookahead; + if( p->mnLookahead>lookahead ){ + p->mnLookahead = lookahead; + p->mnAction = action; + } + } + p->aLookahead[p->nLookahead].lookahead = lookahead; + p->aLookahead[p->nLookahead].action = action; + p->nLookahead++; +} + +/* +** Add the transaction set built up with prior calls to acttab_action() +** into the current action table. Then reset the transaction set back +** to an empty set in preparation for a new round of acttab_action() calls. +** +** Return the offset into the action table of the new transaction. +*/ +static int acttab_insert(acttab *p){ + int i, j, k, n; + assert( p->nLookahead>0 ); + + /* Make sure we have enough space to hold the expanded action table + ** in the worst case. The worst case occurs if the transaction set + ** must be appended to the current action table + */ + n = p->mxLookahead + 1; + if( p->nAction + n >= p->nActionAlloc ){ + int oldAlloc = p->nActionAlloc; + p->nActionAlloc = p->nAction + n + p->nActionAlloc + 20; + p->aAction = realloc( p->aAction, + sizeof(p->aAction[0])*p->nActionAlloc); + if( p->aAction==0 ){ + fprintf(stderr,"malloc failed\n"); + exit(1); + } + for(i=oldAlloc; inActionAlloc; i++){ + p->aAction[i].lookahead = -1; + p->aAction[i].action = -1; + } + } + + /* Scan the existing action table looking for an offset where we can + ** insert the current transaction set. Fall out of the loop when that + ** offset is found. In the worst case, we fall out of the loop when + ** i reaches p->nAction, which means we append the new transaction set. + ** + ** i is the index in p->aAction[] where p->mnLookahead is inserted. + */ + for(i=0; inAction+p->mnLookahead; i++){ + if( p->aAction[i].lookahead<0 ){ + for(j=0; jnLookahead; j++){ + k = p->aLookahead[j].lookahead - p->mnLookahead + i; + if( k<0 ) break; + if( p->aAction[k].lookahead>=0 ) break; + } + if( jnLookahead ) continue; + for(j=0; jnAction; j++){ + if( p->aAction[j].lookahead==j+p->mnLookahead-i ) break; + } + if( j==p->nAction ){ + break; /* Fits in empty slots */ + } + }else if( p->aAction[i].lookahead==p->mnLookahead ){ + if( p->aAction[i].action!=p->mnAction ) continue; + for(j=0; jnLookahead; j++){ + k = p->aLookahead[j].lookahead - p->mnLookahead + i; + if( k<0 || k>=p->nAction ) break; + if( p->aLookahead[j].lookahead!=p->aAction[k].lookahead ) break; + if( p->aLookahead[j].action!=p->aAction[k].action ) break; + } + if( jnLookahead ) continue; + n = 0; + for(j=0; jnAction; j++){ + if( p->aAction[j].lookahead<0 ) continue; + if( p->aAction[j].lookahead==j+p->mnLookahead-i ) n++; + } + if( n==p->nLookahead ){ + break; /* Same as a prior transaction set */ + } + } + } + /* Insert transaction set at index i. */ + for(j=0; jnLookahead; j++){ + k = p->aLookahead[j].lookahead - p->mnLookahead + i; + p->aAction[k] = p->aLookahead[j]; + if( k>=p->nAction ) p->nAction = k+1; + } + p->nLookahead = 0; + + /* Return the offset that is added to the lookahead in order to get the + ** index into yy_action of the action */ + return i - p->mnLookahead; +} + +/********************** From the file "build.c" *****************************/ +/* +** Routines to construction the finite state machine for the LEMON +** parser generator. +*/ + +/* Find a precedence symbol of every rule in the grammar. +** +** Those rules which have a precedence symbol coded in the input +** grammar using the "[symbol]" construct will already have the +** rp->precsym field filled. Other rules take as their precedence +** symbol the first RHS symbol with a defined precedence. If there +** are not RHS symbols with a defined precedence, the precedence +** symbol field is left blank. +*/ +void FindRulePrecedences(struct lemon *xp) +{ + struct rule *rp; + for(rp=xp->rule; rp; rp=rp->next){ + if( rp->precsym==0 ){ + int i, j; + for(i=0; inrhs && rp->precsym==0; i++){ + struct symbol *sp = rp->rhs[i]; + if( sp->type==MULTITERMINAL ){ + for(j=0; jnsubsym; j++){ + if( sp->subsym[j]->prec>=0 ){ + rp->precsym = sp->subsym[j]; + break; + } + } + }else if( sp->prec>=0 ){ + rp->precsym = rp->rhs[i]; + } + } + } + } + return; +} + +/* Find all nonterminals which will generate the empty string. +** Then go back and compute the first sets of every nonterminal. +** The first set is the set of all terminal symbols which can begin +** a string generated by that nonterminal. +*/ +void FindFirstSets(struct lemon *lemp) +{ + int i, j; + struct rule *rp; + int progress; + + for(i=0; insymbol; i++){ + lemp->symbols[i]->lambda = LEMON_FALSE; + } + for(i=lemp->nterminal; insymbol; i++){ + lemp->symbols[i]->firstset = SetNew(); + } + + /* First compute all lambdas */ + do{ + progress = 0; + for(rp=lemp->rule; rp; rp=rp->next){ + if( rp->lhs->lambda ) continue; + for(i=0; inrhs; i++){ + struct symbol *sp = rp->rhs[i]; + if( sp->type!=TERMINAL || sp->lambda==LEMON_FALSE ) break; + } + if( i==rp->nrhs ){ + rp->lhs->lambda = LEMON_TRUE; + progress = 1; + } + } + }while( progress ); + + /* Now compute all first sets */ + do{ + struct symbol *s1, *s2; + progress = 0; + for(rp=lemp->rule; rp; rp=rp->next){ + s1 = rp->lhs; + for(i=0; inrhs; i++){ + s2 = rp->rhs[i]; + if( s2->type==TERMINAL ){ + progress += SetAdd(s1->firstset,s2->index); + break; + }else if( s2->type==MULTITERMINAL ){ + for(j=0; jnsubsym; j++){ + progress += SetAdd(s1->firstset,s2->subsym[j]->index); + } + break; + }else if( s1==s2 ){ + if( s1->lambda==LEMON_FALSE ) break; + }else{ + progress += SetUnion(s1->firstset,s2->firstset); + if( s2->lambda==LEMON_FALSE ) break; + } + } + } + }while( progress ); + return; +} + +/* Compute all LR(0) states for the grammar. Links +** are added to between some states so that the LR(1) follow sets +** can be computed later. +*/ +PRIVATE struct state *getstate(struct lemon *); /* forward reference */ +void FindStates(struct lemon *lemp) +{ + struct symbol *sp; + struct rule *rp; + + Configlist_init(); + + /* Find the start symbol */ + if( lemp->start ){ + sp = Symbol_find(lemp->start); + if( sp==0 ){ + ErrorMsg(lemp->filename,0, +"The specified start symbol \"%s\" is not \ +in a nonterminal of the grammar. \"%s\" will be used as the start \ +symbol instead.",lemp->start,lemp->rule->lhs->name); + lemp->errorcnt++; + sp = lemp->rule->lhs; + } + }else{ + sp = lemp->rule->lhs; + } + + /* Make sure the start symbol doesn't occur on the right-hand side of + ** any rule. Report an error if it does. (YACC would generate a new + ** start symbol in this case.) */ + for(rp=lemp->rule; rp; rp=rp->next){ + int i; + for(i=0; inrhs; i++){ + if( rp->rhs[i]==sp ){ /* FIX ME: Deal with multiterminals */ + ErrorMsg(lemp->filename,0, +"The start symbol \"%s\" occurs on the \ +right-hand side of a rule. This will result in a parser which \ +does not work properly.",sp->name); + lemp->errorcnt++; + } + } + } + + /* The basis configuration set for the first state + ** is all rules which have the start symbol as their + ** left-hand side */ + for(rp=sp->rule; rp; rp=rp->nextlhs){ + struct config *newcfp; + rp->lhsStart = 1; + newcfp = Configlist_addbasis(rp,0); + SetAdd(newcfp->fws,0); + } + + /* Compute the first state. All other states will be + ** computed automatically during the computation of the first one. + ** The returned pointer to the first state is not used. */ + (void)getstate(lemp); + return; +} + +/* Return a pointer to a state which is described by the configuration +** list which has been built from calls to Configlist_add. +*/ +PRIVATE void buildshifts(struct lemon *, struct state *); /* Forwd ref */ +PRIVATE struct state *getstate(struct lemon *lemp) +{ + struct config *cfp, *bp; + struct state *stp; + + /* Extract the sorted basis of the new state. The basis was constructed + ** by prior calls to "Configlist_addbasis()". */ + Configlist_sortbasis(); + bp = Configlist_basis(); + + /* Get a state with the same basis */ + stp = State_find(bp); + if( stp ){ + /* A state with the same basis already exists! Copy all the follow-set + ** propagation links from the state under construction into the + ** preexisting state, then return a pointer to the preexisting state */ + struct config *x, *y; + for(x=bp, y=stp->bp; x && y; x=x->bp, y=y->bp){ + Plink_copy(&y->bplp,x->bplp); + Plink_delete(x->fplp); + x->fplp = x->bplp = 0; + } + cfp = Configlist_return(); + Configlist_eat(cfp); + }else{ + /* This really is a new state. Construct all the details */ + Configlist_closure(lemp); /* Compute the configuration closure */ + Configlist_sort(); /* Sort the configuration closure */ + cfp = Configlist_return(); /* Get a pointer to the config list */ + stp = State_new(); /* A new state structure */ + MemoryCheck(stp); + stp->bp = bp; /* Remember the configuration basis */ + stp->cfp = cfp; /* Remember the configuration closure */ + stp->statenum = lemp->nstate++; /* Every state gets a sequence number */ + stp->ap = 0; /* No actions, yet. */ + State_insert(stp,stp->bp); /* Add to the state table */ + buildshifts(lemp,stp); /* Recursively compute successor states */ + } + return stp; +} + +/* +** Return true if two symbols are the same. +*/ +static int same_symbol(struct symbol *a,struct symbol *b) +{ + int i; + if( a==b ) return 1; + if( a->type!=MULTITERMINAL ) return 0; + if( b->type!=MULTITERMINAL ) return 0; + if( a->nsubsym!=b->nsubsym ) return 0; + for(i=0; insubsym; i++){ + if( a->subsym[i]!=b->subsym[i] ) return 0; + } + return 1; +} + +/* Construct all successor states to the given state. A "successor" +** state is any state which can be reached by a shift action. +*/ +PRIVATE void buildshifts( + struct lemon *lemp, + struct state *stp) /* The state from which successors are computed */ +{ + struct config *cfp; /* For looping thru the config closure of "stp" */ + struct config *bcfp; /* For the inner loop on config closure of "stp" */ + struct config *new; /* */ + struct symbol *sp; /* Symbol following the dot in configuration "cfp" */ + struct symbol *bsp; /* Symbol following the dot in configuration "bcfp" */ + struct state *newstp; /* A pointer to a successor state */ + + /* Each configuration becomes complete after it contibutes to a successor + ** state. Initially, all configurations are incomplete */ + for(cfp=stp->cfp; cfp; cfp=cfp->next) cfp->status = INCOMPLETE; + + /* Loop through all configurations of the state "stp" */ + for(cfp=stp->cfp; cfp; cfp=cfp->next){ + if( cfp->status==COMPLETE ) continue; /* Already used by inner loop */ + if( cfp->dot>=cfp->rp->nrhs ) continue; /* Can't shift this config */ + Configlist_reset(); /* Reset the new config set */ + sp = cfp->rp->rhs[cfp->dot]; /* Symbol after the dot */ + + /* For every configuration in the state "stp" which has the symbol "sp" + ** following its dot, add the same configuration to the basis set under + ** construction but with the dot shifted one symbol to the right. */ + for(bcfp=cfp; bcfp; bcfp=bcfp->next){ + if( bcfp->status==COMPLETE ) continue; /* Already used */ + if( bcfp->dot>=bcfp->rp->nrhs ) continue; /* Can't shift this one */ + bsp = bcfp->rp->rhs[bcfp->dot]; /* Get symbol after dot */ + if( !same_symbol(bsp,sp) ) continue; /* Must be same as for "cfp" */ + bcfp->status = COMPLETE; /* Mark this config as used */ + new = Configlist_addbasis(bcfp->rp,bcfp->dot+1); + Plink_add(&new->bplp,bcfp); + } + + /* Get a pointer to the state described by the basis configuration set + ** constructed in the preceding loop */ + newstp = getstate(lemp); + + /* The state "newstp" is reached from the state "stp" by a shift action + ** on the symbol "sp" */ + if( sp->type==MULTITERMINAL ){ + int i; + for(i=0; insubsym; i++){ + Action_add(&stp->ap,SHIFT,sp->subsym[i],(char*)newstp); + } + }else{ + Action_add(&stp->ap,SHIFT,sp,(char *)newstp); + } + } +} + +/* +** Construct the propagation links +*/ +void FindLinks(struct lemon *lemp) +{ + int i; + struct config *cfp, *other; + struct state *stp; + struct plink *plp; + + /* Housekeeping detail: + ** Add to every propagate link a pointer back to the state to + ** which the link is attached. */ + for(i=0; instate; i++){ + stp = lemp->sorted[i]; + for(cfp=stp->cfp; cfp; cfp=cfp->next){ + cfp->stp = stp; + } + } + + /* Convert all backlinks into forward links. Only the forward + ** links are used in the follow-set computation. */ + for(i=0; instate; i++){ + stp = lemp->sorted[i]; + for(cfp=stp->cfp; cfp; cfp=cfp->next){ + for(plp=cfp->bplp; plp; plp=plp->next){ + other = plp->cfp; + Plink_add(&other->fplp,cfp); + } + } + } +} + +/* Compute all followsets. +** +** A followset is the set of all symbols which can come immediately +** after a configuration. +*/ +void FindFollowSets(struct lemon *lemp) +{ + int i; + struct config *cfp; + struct plink *plp; + int progress; + int change; + + for(i=0; instate; i++){ + for(cfp=lemp->sorted[i]->cfp; cfp; cfp=cfp->next){ + cfp->status = INCOMPLETE; + } + } + + do{ + progress = 0; + for(i=0; instate; i++){ + for(cfp=lemp->sorted[i]->cfp; cfp; cfp=cfp->next){ + if( cfp->status==COMPLETE ) continue; + for(plp=cfp->fplp; plp; plp=plp->next){ + change = SetUnion(plp->cfp->fws,cfp->fws); + if( change ){ + plp->cfp->status = INCOMPLETE; + progress = 1; + } + } + cfp->status = COMPLETE; + } + } + }while( progress ); +} + +static int resolve_conflict(struct action *, struct action *,struct symbol *errsym); + +/* Compute the reduce actions, and resolve conflicts. +*/ +void FindActions(struct lemon *lemp) +{ + int i,j; + struct config *cfp; + struct state *stp; + struct symbol *sp; + struct rule *rp; + + /* Add all of the reduce actions + ** A reduce action is added for each element of the followset of + ** a configuration which has its dot at the extreme right. + */ + for(i=0; instate; i++){ /* Loop over all states */ + stp = lemp->sorted[i]; + for(cfp=stp->cfp; cfp; cfp=cfp->next){ /* Loop over all configurations */ + if( cfp->rp->nrhs==cfp->dot ){ /* Is dot at extreme right? */ + for(j=0; jnterminal; j++){ + if( SetFind(cfp->fws,j) ){ + /* Add a reduce action to the state "stp" which will reduce by the + ** rule "cfp->rp" if the lookahead symbol is "lemp->symbols[j]" */ + Action_add(&stp->ap,REDUCE,lemp->symbols[j],(char *)cfp->rp); + } + } + } + } + } + + /* Add the accepting token */ + if( lemp->start ){ + sp = Symbol_find(lemp->start); + if( sp==0 ) sp = lemp->rule->lhs; + }else{ + sp = lemp->rule->lhs; + } + /* Add to the first state (which is always the starting state of the + ** finite state machine) an action to ACCEPT if the lookahead is the + ** start nonterminal. */ + Action_add(&lemp->sorted[0]->ap,ACCEPT,sp,0); + + /* Resolve conflicts */ + for(i=0; instate; i++){ + struct action *ap, *nap; + stp = lemp->sorted[i]; + /* assert( stp->ap ); */ + stp->ap = Action_sort(stp->ap); + for(ap=stp->ap; ap && ap->next; ap=ap->next){ + for(nap=ap->next; nap && nap->sp==ap->sp; nap=nap->next){ + /* The two actions "ap" and "nap" have the same lookahead. + ** Figure out which one should be used */ + lemp->nconflict += resolve_conflict(ap,nap,lemp->errsym); + } + } + } + + /* Report an error for each rule that can never be reduced. */ + for(rp=lemp->rule; rp; rp=rp->next) rp->canReduce = LEMON_FALSE; + for(i=0; instate; i++){ + struct action *ap; + for(ap=lemp->sorted[i]->ap; ap; ap=ap->next){ + if( ap->type==REDUCE ) ap->x.rp->canReduce = LEMON_TRUE; + } + } + for(rp=lemp->rule; rp; rp=rp->next){ + if( rp->canReduce ) continue; + ErrorMsg(lemp->filename,rp->ruleline,"This rule can not be reduced.\n"); + lemp->errorcnt++; + } +} + +/* Resolve a conflict between the two given actions. If the +** conflict can't be resolved, return non-zero. +** +** NO LONGER TRUE: +** To resolve a conflict, first look to see if either action +** is on an error rule. In that case, take the action which +** is not associated with the error rule. If neither or both +** actions are associated with an error rule, then try to +** use precedence to resolve the conflict. +** +** If either action is a SHIFT, then it must be apx. This +** function won't work if apx->type==REDUCE and apy->type==SHIFT. +*/ +static int resolve_conflict( + struct action *apx, + struct action *apy, + struct symbol *errsym _U_) +{ + struct symbol *spx, *spy; + int errcnt = 0; + assert( apx->sp==apy->sp ); /* Otherwise there would be no conflict */ + if( apx->type==SHIFT && apy->type==SHIFT ){ + apy->type = SSCONFLICT; + errcnt++; + } + if( apx->type==SHIFT && apy->type==REDUCE ){ + spx = apx->sp; + spy = apy->x.rp->precsym; + if( spy==0 || spx->prec<0 || spy->prec<0 ){ + /* Not enough precedence information. */ + apy->type = SRCONFLICT; + errcnt++; + }else if( spx->prec>spy->prec ){ /* Lower precedence wins */ + apy->type = RD_RESOLVED; + }else if( spx->precprec ){ + apx->type = SH_RESOLVED; + }else if( spx->prec==spy->prec && spx->assoc==RIGHT ){ /* Use operator */ + apy->type = RD_RESOLVED; /* associativity */ + }else if( spx->prec==spy->prec && spx->assoc==LEFT ){ /* to break tie */ + apx->type = SH_RESOLVED; + }else{ + assert( spx->prec==spy->prec && spx->assoc==NONE ); + apy->type = SRCONFLICT; + errcnt++; + } + }else if( apx->type==REDUCE && apy->type==REDUCE ){ + spx = apx->x.rp->precsym; + spy = apy->x.rp->precsym; + if( spx==0 || spy==0 || spx->prec<0 || + spy->prec<0 || spx->prec==spy->prec ){ + apy->type = RRCONFLICT; + errcnt++; + }else if( spx->prec>spy->prec ){ + apy->type = RD_RESOLVED; + }else if( spx->precprec ){ + apx->type = RD_RESOLVED; + } + }else{ + assert( + apx->type==SH_RESOLVED || + apx->type==RD_RESOLVED || + apx->type==SSCONFLICT || + apx->type==SRCONFLICT || + apx->type==RRCONFLICT || + apy->type==SH_RESOLVED || + apy->type==RD_RESOLVED || + apy->type==SSCONFLICT || + apy->type==SRCONFLICT || + apy->type==RRCONFLICT + ); + /* The REDUCE/SHIFT case cannot happen because SHIFTs come before + ** REDUCEs on the list. If we reach this point it must be because + ** the parser conflict had already been resolved. */ + } + return errcnt; +} +/********************* From the file "configlist.c" *************************/ +/* +** Routines to processing a configuration list and building a state +** in the LEMON parser generator. +*/ + +static struct config *freelist = 0; /* List of free configurations */ +static struct config *current = 0; /* Top of list of configurations */ +static struct config **currentend = 0; /* Last on list of configs */ +static struct config *basis = 0; /* Top of list of basis configs */ +static struct config **basisend = 0; /* End of list of basis configs */ + +/* Return a pointer to a new configuration */ +PRIVATE struct config *newconfig(void){ + struct config *new; + if( freelist==0 ){ + int i; + int amt = 3; + freelist = (struct config *)calloc( amt, sizeof(struct config) ); + if( freelist==0 ){ + fprintf(stderr,"Unable to allocate memory for a new configuration."); + exit(1); + } + for(i=0; inext; + return new; +} + +/* The configuration "old" is no longer used */ +PRIVATE void deleteconfig(struct config *old) +{ + old->next = freelist; + freelist = old; +} + +/* Initialized the configuration list builder */ +void Configlist_init(void){ + current = 0; + currentend = ¤t; + basis = 0; + basisend = &basis; + Configtable_init(); + return; +} + +/* Initialized the configuration list builder */ +void Configlist_reset(void){ + current = 0; + currentend = ¤t; + basis = 0; + basisend = &basis; + Configtable_clear(0); + return; +} + +/* Add another configuration to the configuration list */ +struct config *Configlist_add( + struct rule *rp, /* The rule */ + int dot) /* Index into the RHS of the rule where the dot goes */ +{ + struct config *cfp, model; + + assert( currentend!=0 ); + model.rp = rp; + model.dot = dot; + cfp = Configtable_find(&model); + if( cfp==0 ){ + cfp = newconfig(); + cfp->rp = rp; + cfp->dot = dot; + cfp->fws = SetNew(); + cfp->stp = 0; + cfp->fplp = cfp->bplp = 0; + cfp->next = 0; + cfp->bp = 0; + *currentend = cfp; + currentend = &cfp->next; + Configtable_insert(cfp); + } + return cfp; +} + +/* Add a basis configuration to the configuration list */ +struct config *Configlist_addbasis(struct rule *rp, int dot) +{ + struct config *cfp, model; + + assert( basisend!=0 ); + assert( currentend!=0 ); + model.rp = rp; + model.dot = dot; + cfp = Configtable_find(&model); + if( cfp==0 ){ + cfp = newconfig(); + cfp->rp = rp; + cfp->dot = dot; + cfp->fws = SetNew(); + cfp->stp = 0; + cfp->fplp = cfp->bplp = 0; + cfp->next = 0; + cfp->bp = 0; + *currentend = cfp; + currentend = &cfp->next; + *basisend = cfp; + basisend = &cfp->bp; + Configtable_insert(cfp); + } + return cfp; +} + +/* Compute the closure of the configuration list */ +void Configlist_closure(struct lemon *lemp) +{ + struct config *cfp, *newcfp; + struct rule *rp, *newrp; + struct symbol *sp, *xsp; + int i, dot; + + assert( currentend!=0 ); + for(cfp=current; cfp; cfp=cfp->next){ + rp = cfp->rp; + dot = cfp->dot; + if( dot>=rp->nrhs ) continue; + sp = rp->rhs[dot]; + if( sp->type==NONTERMINAL ){ + if( sp->rule==0 && sp!=lemp->errsym ){ + ErrorMsg(lemp->filename,rp->line,"Nonterminal \"%s\" has no rules.", + sp->name); + lemp->errorcnt++; + } + for(newrp=sp->rule; newrp; newrp=newrp->nextlhs){ + newcfp = Configlist_add(newrp,0); + for(i=dot+1; inrhs; i++){ + xsp = rp->rhs[i]; + if( xsp->type==TERMINAL ){ + SetAdd(newcfp->fws,xsp->index); + break; + }else if( xsp->type==MULTITERMINAL ){ + int k; + for(k=0; knsubsym; k++){ + SetAdd(newcfp->fws, xsp->subsym[k]->index); + } + break; + }else{ + SetUnion(newcfp->fws,xsp->firstset); + if( xsp->lambda==LEMON_FALSE ) break; + } + } + if( i==rp->nrhs ) Plink_add(&cfp->fplp,newcfp); + } + } + } + return; +} + +/* Sort the configuration list */ +void Configlist_sort(void){ + current = (struct config *)msort((char *)current,(char **)&(current->next),Configcmp); + currentend = 0; + return; +} + +/* Sort the basis configuration list */ +void Configlist_sortbasis(void){ + basis = (struct config *)msort((char *)current,(char **)&(current->bp),Configcmp); + basisend = 0; + return; +} + +/* Return a pointer to the head of the configuration list and +** reset the list */ +struct config *Configlist_return(void){ + struct config *old; + old = current; + current = 0; + currentend = 0; + return old; +} + +/* Return a pointer to the head of the configuration list and +** reset the list */ +struct config *Configlist_basis(void){ + struct config *old; + old = basis; + basis = 0; + basisend = 0; + return old; +} + +/* Free all elements of the given configuration list */ +void Configlist_eat(struct config *cfp) +{ + struct config *nextcfp; + for(; cfp; cfp=nextcfp){ + nextcfp = cfp->next; + assert( cfp->fplp==0 ); + assert( cfp->bplp==0 ); + if( cfp->fws ) SetFree(cfp->fws); + deleteconfig(cfp); + } + return; +} +/***************** From the file "error.c" *********************************/ +/* +** Code for printing error message. +*/ + +/* Find a good place to break "msg" so that its length is at least "min" +** but no more than "max". Make the point as close to max as possible. +*/ +static int findbreak(char *msg, int min, int max) +{ + int i,spot; + char c; + for(i=spot=min; i<=max; i++){ + c = msg[i]; + if( c=='\t' ) msg[i] = ' '; + if( c=='\n' ){ msg[i] = ' '; spot = i; break; } + if( c==0 ){ spot = i; break; } + if( c=='-' && i0 ){ + sprintf(prefix,"%.*s:%d: ",PREFIXLIMIT-10,filename,lineno); + }else{ + sprintf(prefix,"%.*s: ",PREFIXLIMIT-10,filename); + } + prefixsize = (int) strlen(prefix); + availablewidth = LINEWIDTH - prefixsize; + + /* Generate the error message */ + va_start(ap, format); + vsprintf(errmsg,format,ap); + va_end(ap); + errmsgsize = (int) strlen(errmsg); + /* Remove trailing '\n's from the error message. */ + while( errmsgsize>0 && errmsg[errmsgsize-1]=='\n' ){ + errmsg[--errmsgsize] = 0; + } + + /* Print the error message */ + base = 0; + while( errmsg[base]!=0 ){ + end = restart = findbreak(&errmsg[base],0,availablewidth); + restart += base; + while( errmsg[restart]==' ' ) restart++; + fprintf(stdout,"%s%.*s\n",prefix,end,&errmsg[base]); + base = restart; + } +} +/**************** From the file "main.c" ************************************/ +/* +** Main program file for the LEMON parser generator. +*/ + +/* Report an out-of-memory condition and abort. This function +** is used mostly by the "MemoryCheck" macro in struct.h +*/ +void memory_error(void){ + fprintf(stderr,"Out of memory. Aborting...\n"); + exit(1); +} + +/* Locates the basename in a string possibly containing paths, + * including forward-slash and backward-slash delimiters on Windows, + * and allocates a new string containing just the basename. + * Returns the pointer to that string. + */ +PRIVATE char* +make_basename(char* fullname) +{ + char *cp; + char *new_string; + + /* Find the last forward slash */ + cp = strrchr(fullname, '/'); + +#ifdef _WIN32 + /* On Windows, if no forward slash was found, look ofr + * backslash also */ + if (!cp) + cp = strrchr(fullname, '\\'); +#endif + + if (!cp) { + new_string = malloc( strlen(fullname) + 1 ); + strcpy(new_string, fullname); + } + else { + /* skip the slash */ + cp++; + new_string = malloc( strlen(cp) + 1 ); + strcpy(new_string, cp); + } + + return new_string; +} + +static int nDefine = 0; /* Number of -D options on the command line */ +static char **azDefine = 0; /* Name of the -D macros */ + +/* This routine is called with the argument to each -D command-line option. +** Add the macro defined to the azDefine array. +*/ +static void handle_D_option(char *z){ + char **paz; + nDefine++; + azDefine = realloc(azDefine, sizeof(azDefine[0])*nDefine); + if( azDefine==0 ){ + fprintf(stderr,"out of memory\n"); + exit(1); + } + paz = &azDefine[nDefine-1]; + *paz = malloc( strlen(z)+1 ); + if( *paz==0 ){ + fprintf(stderr,"out of memory\n"); + exit(1); + } + strcpy(*paz, z); + for(z=*paz; *z && *z!='='; z++){} + *z = 0; +} + + +/* The main program. Parse the command line and do it... */ +int main(int argc _U_, char **argv) +{ + static int version = 0; + static int rpflag = 0; + static int basisflag = 0; + static int compress = 0; + static int quiet = 0; + static int statistics = 0; + static int mhflag = 0; + static char *outdirname = NULL; + static char *templatename = NULL; + static struct s_options options[] = { + {OPT_FLAG, "b", (char*)&basisflag, "Print only the basis in report."}, + {OPT_FLAG, "c", (char*)&compress, "Don't compress the action table."}, + {OPT_STR, "d", (char*)&outdirname, "Output directory name."}, + {OPT_FSTR, "D", (char*)handle_D_option, "Define an %ifdef macro."}, + {OPT_FLAG, "g", (char*)&rpflag, "Print grammar without actions."}, + {OPT_FLAG, "m", (char*)&mhflag, "Output a makeheaders compatible file"}, + {OPT_FLAG, "q", (char*)&quiet, "(Quiet) Don't print the report file."}, + {OPT_FLAG, "s", (char*)&statistics, + "Print parser stats to standard output."}, + {OPT_STR, "t", (char*)&templatename, "Template file to use."}, + {OPT_FLAG, "x", (char*)&version, "Print the version number."}, + {OPT_FLAG,0,0,0} + }; + int i; + struct lemon lem; + + optinit(argv,options,stderr); + if( version ){ + printf("Lemon version 1.0\n" + "Copyright 1991-1997 by D. Richard Hipp\n" + "Freely distributable under the GNU Public License.\n" + ); + exit(0); + } + if( optnargs()!=1 ){ + fprintf(stderr,"Exactly one filename argument is required.\n"); + exit(1); + } + memset(&lem, 0, sizeof(lem)); + lem.errorcnt = 0; + + /* Initialize the machine */ + Strsafe_init(); + Symbol_init(); + State_init(); + lem.argv0 = argv[0]; + lem.filename = get_optarg(0); + lem.basisflag = basisflag; + Symbol_new("$"); + lem.errsym = Symbol_new("error"); + /* + ** Resetting useCnt in errsym seems to disable some error checking we + ** need to validate the filter syntax. So we remove this resetting for now. + ** + ** lem.errsym->useCnt = 0; + */ + lem.outdirname = outdirname; + lem.templatename = templatename; + lem.basename = make_basename(lem.filename); + + /* Parse the input file */ + Parse(&lem); + if( lem.errorcnt ) exit(lem.errorcnt); + if( lem.nrule==0 ){ + fprintf(stderr,"Empty grammar.\n"); + exit(1); + } + + /* Count and index the symbols of the grammar */ + lem.nsymbol = Symbol_count(); + Symbol_new("{default}"); + lem.symbols = Symbol_arrayof(); + for(i=0; i<=lem.nsymbol; i++) lem.symbols[i]->index = i; + qsort(lem.symbols,lem.nsymbol+1,sizeof(struct symbol*), + Symbolcmpp); + for(i=0; i<=lem.nsymbol; i++) lem.symbols[i]->index = i; + for(i=1; safe_isupper(lem.symbols[i]->name[0]); i++); + lem.nterminal = i; + + /* Generate a reprint of the grammar, if requested on the command line */ + if( rpflag ){ + Reprint(&lem); + }else{ + /* Initialize the size for all follow and first sets */ + SetSize(lem.nterminal+1); + + /* Find the precedence for every production rule (that has one) */ + FindRulePrecedences(&lem); + + /* Compute the lambda-nonterminals and the first-sets for every + ** nonterminal */ + FindFirstSets(&lem); + + /* Compute all LR(0) states. Also record follow-set propagation + ** links so that the follow-set can be computed later */ + lem.nstate = 0; + FindStates(&lem); + lem.sorted = State_arrayof(); + + /* Tie up loose ends on the propagation links */ + FindLinks(&lem); + + /* Compute the follow set of every reducible configuration */ + FindFollowSets(&lem); + + /* Compute the action tables */ + FindActions(&lem); + + /* Compress the action tables */ + if( compress==0 ) CompressTables(&lem); + + /* Reorder and renumber the states so that states with fewer choices + ** occur at the end. */ + ResortStates(&lem); + + /* Generate a report of the parser generated. (the "y.output" file) */ + if( !quiet ) ReportOutput(&lem); + + /* Generate the source code for the parser */ + ReportTable(&lem, mhflag); + + /* Produce a header file for use by the scanner. (This step is + ** omitted if the "-m" option is used because makeheaders will + ** generate the file for us.) */ + if( !mhflag ) ReportHeader(&lem); + } + if( statistics ){ + printf("Parser statistics: %d terminals, %d nonterminals, %d rules\n", + lem.nterminal, lem.nsymbol - lem.nterminal, lem.nrule); + printf(" %d states, %d parser table entries, %d conflicts\n", + lem.nstate, lem.tablesize, lem.nconflict); + } + if( lem.nconflict ){ + fprintf(stderr,"%d parsing conflicts.\n",lem.nconflict); + } + exit(lem.errorcnt + lem.nconflict); + return (lem.errorcnt + lem.nconflict); +} +/******************** From the file "msort.c" *******************************/ +/* +** A generic merge-sort program. +** +** USAGE: +** Let "ptr" be a pointer to some structure which is at the head of +** a null-terminated list. Then to sort the list call: +** +** ptr = msort(ptr,&(ptr->next),cmpfnc); +** +** In the above, "cmpfnc" is a pointer to a function which compares +** two instances of the structure and returns an integer, as in +** strcmp. The second argument is a pointer to the pointer to the +** second element of the linked list. This address is used to compute +** the offset to the "next" field within the structure. The offset to +** the "next" field must be constant for all structures in the list. +** +** The function returns a new pointer which is the head of the list +** after sorting. +** +** ALGORITHM: +** Merge-sort. +*/ + +/* +** Return a pointer to the next structure in the linked list. +*/ +#define NEXT(A) (*(char**)(((char *)A)+offset)) + +/* +** Inputs: +** a: A sorted, null-terminated linked list. (May be null). +** b: A sorted, null-terminated linked list. (May be null). +** cmp: A pointer to the comparison function. +** offset: Offset in the structure to the "next" field. +** +** Return Value: +** A pointer to the head of a sorted list containing the elements +** of both a and b. +** +** Side effects: +** The "next" pointers for elements in the lists a and b are +** changed. +*/ +static char *merge(char *a, char *b, int (*cmp)(const void *, const void *), + int offset) +{ + char *ptr, *head; + + if( a==0 ){ + head = b; + }else if( b==0 ){ + head = a; + }else{ + if( (*cmp)(a,b)<0 ){ + ptr = a; + a = NEXT(a); + }else{ + ptr = b; + b = NEXT(b); + } + head = ptr; + while( a && b ){ + if( (*cmp)(a,b)<0 ){ + NEXT(ptr) = a; + ptr = a; + a = NEXT(a); + }else{ + NEXT(ptr) = b; + ptr = b; + b = NEXT(b); + } + } + if( a ) NEXT(ptr) = a; + else NEXT(ptr) = b; + } + return head; +} + +/* +** Inputs: +** list: Pointer to a singly-linked list of structures. +** next: Pointer to pointer to the second element of the list. +** cmp: A comparison function. +** +** Return Value: +** A pointer to the head of a sorted list containing the elements +** orginally in list. +** +** Side effects: +** The "next" pointers for elements in list are changed. +*/ +#define LISTSIZE 30 +char *msort(char *list, char **next, int (*cmp)(const void *, const void *)) +{ + int offset; + char *ep; + char *set[LISTSIZE]; + int i; + offset = (int) ((char *)next - (char *)list); + for(i=0; istate = WAITING_FOR_DECL_KEYWORD; + }else if( safe_islower(x[0]) ){ + psp->lhs = Symbol_new(x); + psp->nrhs = 0; + psp->lhsalias = 0; + psp->state = WAITING_FOR_ARROW; + }else if( x[0]=='{' ){ + if( psp->prevrule==0 ){ + ErrorMsg(psp->filename,psp->tokenlineno, +"There is no prior rule opon which to attach the code \ +fragment which begins on this line."); + psp->errorcnt++; + }else if( psp->prevrule->code!=0 ){ + ErrorMsg(psp->filename,psp->tokenlineno, +"Code fragment beginning on this line is not the first \ +to follow the previous rule."); + psp->errorcnt++; + }else{ + psp->prevrule->line = psp->tokenlineno; + psp->prevrule->code = &x[1]; + } + }else if( x[0]=='[' ){ + psp->state = PRECEDENCE_MARK_1; + }else{ + ErrorMsg(psp->filename,psp->tokenlineno, + "Token \"%s\" should be either \"%%\" or a nonterminal name.", + x); + psp->errorcnt++; + } + break; + case PRECEDENCE_MARK_1: + if( !safe_isupper(x[0]) ){ + ErrorMsg(psp->filename,psp->tokenlineno, + "The precedence symbol must be a terminal."); + psp->errorcnt++; + }else if( psp->prevrule==0 ){ + ErrorMsg(psp->filename,psp->tokenlineno, + "There is no prior rule to assign precedence \"[%s]\".",x); + psp->errorcnt++; + }else if( psp->prevrule->precsym!=0 ){ + ErrorMsg(psp->filename,psp->tokenlineno, +"Precedence mark on this line is not the first \ +to follow the previous rule."); + psp->errorcnt++; + }else{ + psp->prevrule->precsym = Symbol_new(x); + } + psp->state = PRECEDENCE_MARK_2; + break; + case PRECEDENCE_MARK_2: + if( x[0]!=']' ){ + ErrorMsg(psp->filename,psp->tokenlineno, + "Missing \"]\" on precedence mark."); + psp->errorcnt++; + } + psp->state = WAITING_FOR_DECL_OR_RULE; + break; + case WAITING_FOR_ARROW: + if( x[0]==':' && x[1]==':' && x[2]=='=' ){ + psp->state = IN_RHS; + }else if( x[0]=='(' ){ + psp->state = LHS_ALIAS_1; + }else{ + ErrorMsg(psp->filename,psp->tokenlineno, + "Expected to see a \":\" following the LHS symbol \"%s\".", + psp->lhs->name); + psp->errorcnt++; + psp->state = RESYNC_AFTER_RULE_ERROR; + } + break; + case LHS_ALIAS_1: + if( safe_isalpha(x[0]) ){ + psp->lhsalias = x; + psp->state = LHS_ALIAS_2; + }else{ + ErrorMsg(psp->filename,psp->tokenlineno, + "\"%s\" is not a valid alias for the LHS \"%s\"\n", + x,psp->lhs->name); + psp->errorcnt++; + psp->state = RESYNC_AFTER_RULE_ERROR; + } + break; + case LHS_ALIAS_2: + if( x[0]==')' ){ + psp->state = LHS_ALIAS_3; + }else{ + ErrorMsg(psp->filename,psp->tokenlineno, + "Missing \")\" following LHS alias name \"%s\".",psp->lhsalias); + psp->errorcnt++; + psp->state = RESYNC_AFTER_RULE_ERROR; + } + break; + case LHS_ALIAS_3: + if( x[0]==':' && x[1]==':' && x[2]=='=' ){ + psp->state = IN_RHS; + }else{ + ErrorMsg(psp->filename,psp->tokenlineno, + "Missing \"->\" following: \"%s(%s)\".", + psp->lhs->name,psp->lhsalias); + psp->errorcnt++; + psp->state = RESYNC_AFTER_RULE_ERROR; + } + break; + case IN_RHS: + if( x[0]=='.' ){ + struct rule *rp; + rp = (struct rule *)calloc( sizeof(struct rule) + + sizeof(struct symbol*)*psp->nrhs + sizeof(char*)*psp->nrhs, 1); + if( rp==0 ){ + ErrorMsg(psp->filename,psp->tokenlineno, + "Can't allocate enough memory for this rule."); + psp->errorcnt++; + psp->prevrule = 0; + }else{ + int i; + rp->ruleline = psp->tokenlineno; + rp->rhs = (struct symbol**)&rp[1]; + rp->rhsalias = (char**)&(rp->rhs[psp->nrhs]); + for(i=0; inrhs; i++){ + rp->rhs[i] = psp->rhs[i]; + rp->rhsalias[i] = psp->alias[i]; + } + rp->lhs = psp->lhs; + rp->lhsalias = psp->lhsalias; + rp->nrhs = psp->nrhs; + rp->code = 0; + rp->precsym = 0; + rp->index = psp->gp->nrule++; + rp->nextlhs = rp->lhs->rule; + rp->lhs->rule = rp; + rp->next = 0; + if( psp->firstrule==0 ){ + psp->firstrule = psp->lastrule = rp; + }else{ + psp->lastrule->next = rp; + psp->lastrule = rp; + } + psp->prevrule = rp; + } + psp->state = WAITING_FOR_DECL_OR_RULE; + }else if( safe_isalpha(x[0]) ){ + if( psp->nrhs>=MAXRHS ){ + ErrorMsg(psp->filename,psp->tokenlineno, + "Too many symbols on RHS of rule beginning at \"%s\".", + x); + psp->errorcnt++; + psp->state = RESYNC_AFTER_RULE_ERROR; + }else{ + psp->rhs[psp->nrhs] = Symbol_new(x); + psp->alias[psp->nrhs] = 0; + psp->nrhs++; + } + }else if( (x[0]=='|' || x[0]=='/') && psp->nrhs>0 ){ + struct symbol *msp = psp->rhs[psp->nrhs-1]; + if( msp->type!=MULTITERMINAL ){ + struct symbol *origsp = msp; + msp = calloc(1,sizeof(*msp)); + memset(msp, 0, sizeof(*msp)); + msp->type = MULTITERMINAL; + msp->nsubsym = 1; + msp->subsym = calloc(1,sizeof(struct symbol*)); + msp->subsym[0] = origsp; + msp->name = origsp->name; + psp->rhs[psp->nrhs-1] = msp; + } + msp->nsubsym++; + msp->subsym = realloc(msp->subsym, sizeof(struct symbol*)*msp->nsubsym); + msp->subsym[msp->nsubsym-1] = Symbol_new(&x[1]); + if( safe_islower(x[1]) || safe_islower(msp->subsym[0]->name[0]) ){ + ErrorMsg(psp->filename,psp->tokenlineno, + "Cannot form a compound containing a non-terminal"); + psp->errorcnt++; + } + }else if( x[0]=='(' && psp->nrhs>0 ){ + psp->state = RHS_ALIAS_1; + }else{ + ErrorMsg(psp->filename,psp->tokenlineno, + "Illegal character on RHS of rule: \"%s\".",x); + psp->errorcnt++; + psp->state = RESYNC_AFTER_RULE_ERROR; + } + break; + case RHS_ALIAS_1: + if( safe_isalpha(x[0]) ){ + psp->alias[psp->nrhs-1] = x; + psp->state = RHS_ALIAS_2; + }else{ + ErrorMsg(psp->filename,psp->tokenlineno, + "\"%s\" is not a valid alias for the RHS symbol \"%s\"\n", + x,psp->rhs[psp->nrhs-1]->name); + psp->errorcnt++; + psp->state = RESYNC_AFTER_RULE_ERROR; + } + break; + case RHS_ALIAS_2: + if( x[0]==')' ){ + psp->state = IN_RHS; + }else{ + ErrorMsg(psp->filename,psp->tokenlineno, + "Missing \")\" following LHS alias name \"%s\".",psp->lhsalias); + psp->errorcnt++; + psp->state = RESYNC_AFTER_RULE_ERROR; + } + break; + case WAITING_FOR_DECL_KEYWORD: + if( safe_isalpha(x[0]) ){ + psp->declkeyword = x; + psp->declargslot = 0; + psp->decllinenoslot = 0; + psp->insertLineMacro = 1; + psp->state = WAITING_FOR_DECL_ARG; + if( strcmp(x,"name")==0 ){ + psp->declargslot = &(psp->gp->name); + psp->insertLineMacro = 0; + }else if( strcmp(x,"include")==0 ){ + psp->declargslot = &(psp->gp->include); + }else if( strcmp(x,"code")==0 ){ + psp->declargslot = &(psp->gp->extracode); + }else if( strcmp(x,"token_destructor")==0 ){ + psp->declargslot = &psp->gp->tokendest; + }else if( strcmp(x,"default_destructor")==0 ){ + psp->declargslot = &psp->gp->vardest; + }else if( strcmp(x,"token_prefix")==0 ){ + psp->declargslot = &psp->gp->tokenprefix; + psp->insertLineMacro = 0; + }else if( strcmp(x,"syntax_error")==0 ){ + psp->declargslot = &(psp->gp->error); + }else if( strcmp(x,"parse_accept")==0 ){ + psp->declargslot = &(psp->gp->accept); + }else if( strcmp(x,"parse_failure")==0 ){ + psp->declargslot = &(psp->gp->failure); + }else if( strcmp(x,"stack_overflow")==0 ){ + psp->declargslot = &(psp->gp->overflow); + }else if( strcmp(x,"extra_argument")==0 ){ + psp->declargslot = &(psp->gp->arg); + psp->insertLineMacro = 0; + }else if( strcmp(x,"token_type")==0 ){ + psp->declargslot = &(psp->gp->tokentype); + psp->insertLineMacro = 0; + }else if( strcmp(x,"default_type")==0 ){ + psp->declargslot = &(psp->gp->vartype); + psp->insertLineMacro = 0; + }else if( strcmp(x,"stack_size")==0 ){ + psp->declargslot = &(psp->gp->stacksize); + psp->insertLineMacro = 0; + }else if( strcmp(x,"start_symbol")==0 ){ + psp->declargslot = &(psp->gp->start); + psp->insertLineMacro = 0; + }else if( strcmp(x,"left")==0 ){ + psp->preccounter++; + psp->declassoc = LEFT; + psp->state = WAITING_FOR_PRECEDENCE_SYMBOL; + }else if( strcmp(x,"right")==0 ){ + psp->preccounter++; + psp->declassoc = RIGHT; + psp->state = WAITING_FOR_PRECEDENCE_SYMBOL; + }else if( strcmp(x,"nonassoc")==0 ){ + psp->preccounter++; + psp->declassoc = NONE; + psp->state = WAITING_FOR_PRECEDENCE_SYMBOL; + }else if( strcmp(x,"destructor")==0 ){ + psp->state = WAITING_FOR_DESTRUCTOR_SYMBOL; + }else if( strcmp(x,"type")==0 ){ + psp->state = WAITING_FOR_DATATYPE_SYMBOL; + }else if( strcmp(x,"fallback")==0 ){ + psp->fallback = 0; + psp->state = WAITING_FOR_FALLBACK_ID; + }else if( strcmp(x,"wildcard")==0 ){ + psp->state = WAITING_FOR_WILDCARD_ID; + }else{ + ErrorMsg(psp->filename,psp->tokenlineno, + "Unknown declaration keyword: \"%%%s\".",x); + psp->errorcnt++; + psp->state = RESYNC_AFTER_DECL_ERROR; + } + }else{ + ErrorMsg(psp->filename,psp->tokenlineno, + "Illegal declaration keyword: \"%s\".",x); + psp->errorcnt++; + psp->state = RESYNC_AFTER_DECL_ERROR; + } + break; + case WAITING_FOR_DESTRUCTOR_SYMBOL: + if( !safe_isalpha(x[0]) ){ + ErrorMsg(psp->filename,psp->tokenlineno, + "Symbol name missing after %%destructor keyword"); + psp->errorcnt++; + psp->state = RESYNC_AFTER_DECL_ERROR; + }else{ + struct symbol *sp = Symbol_new(x); + psp->declargslot = &sp->destructor; + psp->decllinenoslot = &sp->destLineno; + psp->insertLineMacro = 1; + psp->state = WAITING_FOR_DECL_ARG; + } + break; + case WAITING_FOR_DATATYPE_SYMBOL: + if( !safe_isalpha(x[0]) ){ + ErrorMsg(psp->filename,psp->tokenlineno, + "Symbol name missing after %%destructor keyword"); + psp->errorcnt++; + psp->state = RESYNC_AFTER_DECL_ERROR; + }else{ + struct symbol *sp = Symbol_new(x); + psp->declargslot = &sp->datatype; + psp->insertLineMacro = 0; + psp->state = WAITING_FOR_DECL_ARG; + } + break; + case WAITING_FOR_PRECEDENCE_SYMBOL: + if( x[0]=='.' ){ + psp->state = WAITING_FOR_DECL_OR_RULE; + }else if( safe_isupper(x[0]) ){ + struct symbol *sp; + sp = Symbol_new(x); + if( sp->prec>=0 ){ + ErrorMsg(psp->filename,psp->tokenlineno, + "Symbol \"%s\" has already be given a precedence.",x); + psp->errorcnt++; + }else{ + sp->prec = psp->preccounter; + sp->assoc = psp->declassoc; + } + }else{ + ErrorMsg(psp->filename,psp->tokenlineno, + "Can't assign a precedence to \"%s\".",x); + psp->errorcnt++; + } + break; + case WAITING_FOR_DECL_ARG: + if( (x[0]=='{' || x[0]=='\"' || safe_isalnum(x[0])) ){ + char *zOld, *zNew, *zBuf, *z; + int nOld, n, nLine, nNew, nBack; + int addLineMacro; + char zLine[50]; + zNew = x; + if( zNew[0]=='"' || zNew[0]=='{' ) zNew++; + nNew = (int) strlen(zNew); + if( *psp->declargslot ){ + zOld = *psp->declargslot; + }else{ + zOld = ""; + } + nOld = (int) strlen(zOld); + n = nOld + nNew + 20; + addLineMacro = psp->insertLineMacro && + (psp->decllinenoslot==0 || psp->decllinenoslot[0]!=0); + if( addLineMacro ){ + for(z=psp->filename, nBack=0; *z; z++){ + if( *z=='\\' ) nBack++; + } + sprintf(zLine, "#line %d ", psp->tokenlineno); + nLine = (int) strlen(zLine); + n += nLine + (int) strlen(psp->filename) + nBack; + } + *psp->declargslot = zBuf = realloc(*psp->declargslot, n); + zBuf += nOld; + if( addLineMacro ){ + if( nOld && zBuf[-1]!='\n' ){ + *(zBuf++) = '\n'; + } + memcpy(zBuf, zLine, nLine); + zBuf += nLine; + *(zBuf++) = '"'; + for(z=psp->filename; *z; z++){ + if( *z=='\\' ){ + *(zBuf++) = '\\'; + } + *(zBuf++) = *z; + } + *(zBuf++) = '"'; + *(zBuf++) = '\n'; + } + if( psp->decllinenoslot && psp->decllinenoslot[0]==0 ){ + psp->decllinenoslot[0] = psp->tokenlineno; + } + memcpy(zBuf, zNew, nNew); + zBuf += nNew; + *zBuf = 0; + psp->state = WAITING_FOR_DECL_OR_RULE; + }else{ + ErrorMsg(psp->filename,psp->tokenlineno, + "Illegal argument to %%%s: %s",psp->declkeyword,x); + psp->errorcnt++; + psp->state = RESYNC_AFTER_DECL_ERROR; + } + break; + case WAITING_FOR_FALLBACK_ID: + if( x[0]=='.' ){ + psp->state = WAITING_FOR_DECL_OR_RULE; + }else if( !safe_isupper(x[0]) ){ + ErrorMsg(psp->filename, psp->tokenlineno, + "%%fallback argument \"%s\" should be a token", x); + psp->errorcnt++; + }else{ + struct symbol *sp = Symbol_new(x); + if( psp->fallback==0 ){ + psp->fallback = sp; + }else if( sp->fallback ){ + ErrorMsg(psp->filename, psp->tokenlineno, + "More than one fallback assigned to token %s", x); + psp->errorcnt++; + }else{ + sp->fallback = psp->fallback; + psp->gp->has_fallback = 1; + } + } + break; + case WAITING_FOR_WILDCARD_ID: + if( x[0]=='.' ){ + psp->state = WAITING_FOR_DECL_OR_RULE; + }else if( !isupper(x[0]) ){ + ErrorMsg(psp->filename, psp->tokenlineno, + "%%wildcard argument \"%s\" should be a token", x); + psp->errorcnt++; + }else{ + struct symbol *sp = Symbol_new(x); + if( psp->gp->wildcard==0 ){ + psp->gp->wildcard = sp; + }else{ + ErrorMsg(psp->filename, psp->tokenlineno, + "Extra wildcard to token: %s", x); + psp->errorcnt++; + } + } + break; + + case RESYNC_AFTER_RULE_ERROR: +/* if( x[0]=='.' ) psp->state = WAITING_FOR_DECL_OR_RULE; +** break; */ + case RESYNC_AFTER_DECL_ERROR: + if( x[0]=='.' ) psp->state = WAITING_FOR_DECL_OR_RULE; + if( x[0]=='%' ) psp->state = WAITING_FOR_DECL_KEYWORD; + break; + } +} + +/* Run the preprocessor over the input file text. The global variables +** azDefine[0] through azDefine[nDefine-1] contains the names of all defined +** macros. This routine looks for "%ifdef" and "%ifndef" and "%endif" and +** comments them out. Text in between is also commented out as appropriate. +*/ +static void preprocess_input(char *z){ + int i, j, k, n; + int exclude = 0; + int start = 0; + int lineno = 1; + int start_lineno = 1; + for(i=0; z[i]; i++){ + if( z[i]=='\n' ) lineno++; + if( z[i]!='%' || (i>0 && z[i-1]!='\n') ) continue; + if( strncmp(&z[i],"%endif",6)==0 && safe_isspace(z[i+6]) ){ + if( exclude ){ + exclude--; + if( exclude==0 ){ + for(j=start; jfilename; + ps.errorcnt = 0; + ps.state = INITIALIZE; + ps.prevrule = NULL; + ps.preccounter = 0; + ps.lastrule = NULL; + ps.firstrule = NULL; + ps.lhs = NULL; + ps.nrhs = 0; + ps.lhsalias = NULL; + ps.declkeyword = NULL; + ps.declargslot = NULL; + ps.declassoc = UNK; + ps.fallback = NULL; + + /* Begin by reading the input file */ + fp = fopen(ps.filename,"rb"); + if( fp==0 ){ + ErrorMsg(ps.filename,0,"Can't open this file for reading."); + gp->errorcnt++; + return; + } + fseek(fp,0,2); + filesize = ftell(fp); + rewind(fp); + /* XXX - what if filesize is bigger than the maximum size_t value? */ + filebuf = (char *)malloc( filesize+1 ); + if( filebuf==0 ){ + ErrorMsg(ps.filename,0,"Can't allocate %ld of memory to hold this file.", + filesize+1); + fclose(fp); + gp->errorcnt++; + return; + } + if( fread(filebuf,1,filesize,fp)!=(size_t)filesize ){ + ErrorMsg(ps.filename,0,"Can't read in all %ld bytes of this file.", + filesize); + free(filebuf); + fclose(fp); + gp->errorcnt++; + return; + } + fclose(fp); + filebuf[filesize] = 0; + + /* Make an initial pass through the file to handle %ifdef and %ifndef */ + preprocess_input(filebuf); + + /* Now scan the text of the input file */ + lineno = 1; + for(cp=filebuf; (c= *cp)!=0; ){ + if( c=='\n' ) lineno++; /* Keep track of the line number */ + if( safe_isspace(c) ){ cp++; continue; } /* Skip all white space */ + if( c=='/' && cp[1]=='/' ){ /* Skip C++ style comments */ + cp+=2; + while( (c= *cp)!=0 && c!='\n' ) cp++; + continue; + } + if( c=='/' && cp[1]=='*' ){ /* Skip C style comments */ + cp+=2; + while( (c= *cp)!=0 && (c!='/' || cp[-1]!='*') ){ + if( c=='\n' ) lineno++; + cp++; + } + if( c ) cp++; + continue; + } + ps.tokenstart = cp; /* Mark the beginning of the token */ + ps.tokenlineno = lineno; /* Linenumber on which token begins */ + if( c=='\"' ){ /* String literals */ + cp++; + while( (c= *cp)!=0 && c!='\"' ){ + if( c=='\n' ) lineno++; + cp++; + } + if( c==0 ){ + ErrorMsg(ps.filename,startline, +"String starting on this line is not terminated before the end of the file."); + ps.errorcnt++; + nextcp = cp; + }else{ + nextcp = cp+1; + } + }else if( c=='{' ){ /* A block of C code */ + int level; + cp++; + for(level=1; (c= *cp)!=0 && (level>1 || c!='}'); cp++){ + if( c=='\n' ) lineno++; + else if( c=='{' ) level++; + else if( c=='}' ) level--; + else if( c=='/' && cp[1]=='*' ){ /* Skip comments */ + char prevc; + cp = &cp[2]; + prevc = 0; + while( (c= *cp)!=0 && (c!='/' || prevc!='*') ){ + if( c=='\n' ) lineno++; + prevc = c; + cp++; + } + }else if( c=='/' && cp[1]=='/' ){ /* Skip C++ style comments too */ + cp = &cp[2]; + while( (c= *cp)!=0 && c!='\n' ) cp++; + if( c ) lineno++; + }else if( c=='\'' || c=='\"' ){ /* String a character literals */ + char startchar, prevc; + startchar = c; + prevc = 0; + for(cp++; (c= *cp)!=0 && (c!=startchar || prevc=='\\'); cp++){ + if( c=='\n' ) lineno++; + if( prevc=='\\' ) prevc = 0; + else prevc = c; + } + } + } + if( c==0 ){ + ErrorMsg(ps.filename,ps.tokenlineno, +"C code starting on this line is not terminated before the end of the file."); + ps.errorcnt++; + nextcp = cp; + }else{ + nextcp = cp+1; + } + }else if( safe_isalnum(c) ){ /* Identifiers */ + while( (c= *cp)!=0 && (safe_isalnum(c) || c=='_') ) cp++; + nextcp = cp; + }else if( c==':' && cp[1]==':' && cp[2]=='=' ){ /* The operator "::=" */ + cp += 3; + nextcp = cp; + }else if( (c=='/' || c=='|') && safe_isalpha(cp[1]) ){ + cp += 2; + while( (c = *cp)!=0 && (safe_isalnum(c) || c=='_') ) cp++; + nextcp = cp; + }else{ /* All other (one character) operators */ + cp++; + nextcp = cp; + } + c = *cp; + *cp = 0; /* Null terminate the token */ + parseonetoken(&ps); /* Parse the token */ + *cp = c; /* Restore the buffer */ + cp = nextcp; + } + free(filebuf); /* Release the buffer after parsing */ + gp->rule = ps.firstrule; + gp->errorcnt = ps.errorcnt; +} +/*************************** From the file "plink.c" *********************/ +/* +** Routines processing configuration follow-set propagation links +** in the LEMON parser generator. +*/ +static struct plink *plink_freelist = 0; + +/* Allocate a new plink */ +struct plink *Plink_new(void){ + struct plink *new; + + if( plink_freelist==0 ){ + int i; + int amt = 100; + plink_freelist = (struct plink *)calloc( amt, sizeof(struct plink) ); + if( plink_freelist==0 ){ + fprintf(stderr, + "Unable to allocate memory for a new follow-set propagation link.\n"); + exit(1); + } + for(i=0; inext; + return new; +} + +/* Add a plink to a plink list */ +void Plink_add(struct plink **plpp, struct config *cfp) +{ + struct plink *new; + new = Plink_new(); + new->next = *plpp; + *plpp = new; + new->cfp = cfp; +} + +/* Transfer every plink on the list "from" to the list "to" */ +void Plink_copy(struct plink **to, struct plink *from) +{ + struct plink *nextpl; + while( from ){ + nextpl = from->next; + from->next = *to; + *to = from; + from = nextpl; + } +} + +/* Delete every plink on the list */ +void Plink_delete(struct plink *plp) +{ + struct plink *nextpl; + + while( plp ){ + nextpl = plp->next; + plp->next = plink_freelist; + plink_freelist = plp; + plp = nextpl; + } +} +/*********************** From the file "report.c" **************************/ +/* +** Procedures for generating reports and tables in the LEMON parser generator. +*/ + +/* Generate a filename with the given suffix. Space to hold the +** name comes from malloc() and must be freed by the calling +** function. +*/ +PRIVATE char *file_makename(char *pattern, const char *suffix) +{ + char *name; + char *cp; + + name = malloc( strlen(pattern) + strlen(suffix) + 5 ); + if( name==0 ){ + fprintf(stderr,"Can't allocate space for a filename.\n"); + exit(1); + } + strcpy(name,pattern); + cp = strrchr(name,'.'); + if( cp ) *cp = 0; + strcat(name,suffix); + return name; +} + +/* Generate a filename with the given suffix. Uses only +** the basename of the input file, not the entire path. This +** is useful for creating output files when using outdirname. +** Space to hold this name comes from malloc() and must be +** freed by the calling function. +*/ +PRIVATE char *file_makename_using_basename(struct lemon *lemp, const char *suffix) +{ + return file_makename(lemp->basename, suffix); +} + +/* Open a file with a name based on the name of the input file, +** but with a different (specified) suffix, and return a pointer +** to the stream. Prepend outdirname for both reads and writes, because +** the only time we read is when checking for an already-produced +** header file, which should exist in the output directory, not the +** input directory. If we ever need to file_open(,,"r") on the input +** side, we should add another arg to file_open() indicating which +** directory, ("input, "output", or "other") we should deal with. +*/ +PRIVATE FILE *file_open(struct lemon *lemp, const char *suffix, const char *mode) +{ + FILE *fp; + char *name; + + if( lemp->outname ) free(lemp->outname); + name = file_makename_using_basename(lemp, suffix); + + if ( lemp->outdirname != NULL ) { + lemp->outname = malloc( strlen(lemp->outdirname) + strlen(name) + 2); + if ( lemp->outname == 0 ) { + fprintf(stderr, "Can't allocate space for dir/filename"); + exit(1); + } + strcpy(lemp->outname, lemp->outdirname); +#ifdef __WIN32__ + strcat(lemp->outname, "\\"); +#else + strcat(lemp->outname, "/"); +#endif + strcat(lemp->outname, name); + free(name); + } + else { + lemp->outname = name; + } + + fp = fopen(lemp->outname,mode); + if( fp==0 && *mode=='w' ){ + fprintf(stderr,"Can't open file \"%s\".\n",lemp->outname); + lemp->errorcnt++; + return 0; + } + return fp; +} + +/* Duplicate the input file without comments and without actions +** on rules */ +void Reprint(struct lemon *lemp) +{ + struct rule *rp; + struct symbol *sp; + int i, j, maxlen, len, ncolumns, skip; + printf("// Reprint of input file \"%s\".\n// Symbols:\n",lemp->filename); + maxlen = 10; + for(i=0; insymbol; i++){ + sp = lemp->symbols[i]; + len = (int) strlen(sp->name); + if( len>maxlen ) maxlen = len; + } + ncolumns = 76/(maxlen+5); + if( ncolumns<1 ) ncolumns = 1; + skip = (lemp->nsymbol + ncolumns - 1)/ncolumns; + for(i=0; insymbol; j+=skip){ + sp = lemp->symbols[j]; + assert( sp->index==j ); + printf(" %3d %-*.*s",j,maxlen,maxlen,sp->name); + } + printf("\n"); + } + for(rp=lemp->rule; rp; rp=rp->next){ + printf("%s",rp->lhs->name); + /* if( rp->lhsalias ) printf("(%s)",rp->lhsalias); */ + printf(" ::="); + for(i=0; inrhs; i++){ + sp = rp->rhs[i]; + printf(" %s", sp->name); + if( sp->type==MULTITERMINAL ){ + for(j=1; jnsubsym; j++){ + printf("|%s", sp->subsym[j]->name); + } + } + /* if( rp->rhsalias[i] ) printf("(%s)",rp->rhsalias[i]); */ + } + printf("."); + if( rp->precsym ) printf(" [%s]",rp->precsym->name); + /* if( rp->code ) printf("\n %s",rp->code); */ + printf("\n"); + } +} + +PRIVATE void ConfigPrint(FILE *fp, struct config *cfp) +{ + struct rule *rp; + struct symbol *sp; + int i, j; + rp = cfp->rp; + fprintf(fp,"%s ::=",rp->lhs->name); + for(i=0; i<=rp->nrhs; i++){ + if( i==cfp->dot ) fprintf(fp," *"); + if( i==rp->nrhs ) break; + sp = rp->rhs[i]; + fprintf(fp," %s", sp->name); + if( sp->type==MULTITERMINAL ){ + for(j=1; jnsubsym; j++){ + fprintf(fp,"|%s",sp->subsym[j]->name); + } + } + } +} + +/* #define TEST */ +#if 0 +/* Print a set */ +PRIVATE void SetPrint(FILE *out, char *set, struct lemon *lemp) +{ + int i; + char *spacer; + spacer = ""; + fprintf(out,"%12s[",""); + for(i=0; interminal; i++){ + if( SetFind(set,i) ){ + fprintf(out,"%s%s",spacer,lemp->symbols[i]->name); + spacer = " "; + } + } + fprintf(out,"]\n"); +} + +/* Print a plink chain */ +PRIVATE void PlinkPrint(FILE *out, struct plink *plp, char *tag) +{ + while( plp ){ + fprintf(out,"%12s%s (state %2d) ","",tag,plp->cfp->stp->statenum); + ConfigPrint(out,plp->cfp); + fprintf(out,"\n"); + plp = plp->next; + } +} +#endif + +/* Print an action to the given file descriptor. Return FALSE if +** nothing was actually printed. +*/ +PRIVATE int PrintAction(struct action *ap, FILE *fp, int indent){ + int result = 1; + switch( ap->type ){ + case SHIFT: + fprintf(fp,"%*s shift %d",indent,ap->sp->name,ap->x.stp->statenum); + break; + case REDUCE: + fprintf(fp,"%*s reduce %d",indent,ap->sp->name,ap->x.rp->index); + break; + case ACCEPT: + fprintf(fp,"%*s accept",indent,ap->sp->name); + break; + case ERROR: + fprintf(fp,"%*s error",indent,ap->sp->name); + break; + case SRCONFLICT: + case RRCONFLICT: + fprintf(fp,"%*s reduce %-3d ** Parsing conflict **", + indent,ap->sp->name,ap->x.rp->index); + break; + case SSCONFLICT: + fprintf(fp,"%*s shift %d ** Parsing conflict **", + indent,ap->sp->name,ap->x.stp->statenum); + break; + case SH_RESOLVED: + case RD_RESOLVED: + case NOT_USED: + result = 0; + break; + } + return result; +} + +/* Generate the "y.output" log file */ +void ReportOutput(struct lemon *lemp) +{ + int i; + struct state *stp; + struct config *cfp; + struct action *ap; + FILE *fp; + + fp = file_open(lemp,".out","wb"); + if( fp==0 ) return; + for(i=0; instate; i++){ + stp = lemp->sorted[i]; + fprintf(fp,"State %d:\n",stp->statenum); + if( lemp->basisflag ) cfp=stp->bp; + else cfp=stp->cfp; + while( cfp ){ + char buf[20]; + if( cfp->dot==cfp->rp->nrhs ){ + sprintf(buf,"(%d)",cfp->rp->index); + fprintf(fp," %5s ",buf); + }else{ + fprintf(fp," "); + } + ConfigPrint(fp,cfp); + fprintf(fp,"\n"); +#if 0 + SetPrint(fp,cfp->fws,lemp); + PlinkPrint(fp,cfp->fplp,"To "); + PlinkPrint(fp,cfp->bplp,"From"); +#endif + if( lemp->basisflag ) cfp=cfp->bp; + else cfp=cfp->next; + } + fprintf(fp,"\n"); + for(ap=stp->ap; ap; ap=ap->next){ + if( PrintAction(ap,fp,30) ) fprintf(fp,"\n"); + } + fprintf(fp,"\n"); + } + fprintf(fp, "----------------------------------------------------\n"); + fprintf(fp, "Symbols:\n"); + for(i=0; insymbol; i++){ + int j; + struct symbol *sp; + + sp = lemp->symbols[i]; + fprintf(fp, " %3d: %s", i, sp->name); + if( sp->type==NONTERMINAL ){ + fprintf(fp, ":"); + if( sp->lambda ){ + fprintf(fp, " "); + } + for(j=0; jnterminal; j++){ + if( sp->firstset && SetFind(sp->firstset, j) ){ + fprintf(fp, " %s", lemp->symbols[j]->name); + } + } + } + fprintf(fp, "\n"); + } + fclose(fp); + return; +} + +/* Search for the file "name" which is in the same directory as +** the exacutable */ +PRIVATE char *pathsearch(char *argv0, char *name, int modemask) +{ + char *pathlist; + char *path,*cp; + char c; + +#ifdef __WIN32__ + cp = strrchr(argv0,'\\'); +#else + cp = strrchr(argv0,'/'); +#endif + if( cp ){ + c = *cp; + *cp = 0; + path = (char *)malloc( strlen(argv0) + strlen(name) + 2 ); + if( path ) sprintf(path,"%s/%s",argv0,name); + *cp = c; + }else{ + pathlist = getenv("PATH"); + if( pathlist==0 ) pathlist = ".:/bin:/usr/bin"; + path = (char *)malloc( strlen(pathlist)+strlen(name)+2 ); + if( path!=0 ){ + while( *pathlist ){ + cp = strchr(pathlist,':'); + if( cp==0 ) cp = &pathlist[strlen(pathlist)]; + c = *cp; + *cp = 0; + sprintf(path,"%s/%s",pathlist,name); + *cp = c; + if( c==0 ) pathlist = ""; + else pathlist = &cp[1]; + if( access(path,modemask)==0 ) break; + } + } + } + return path; +} + +/* Given an action, compute the integer value for that action +** which is to be put in the action table of the generated machine. +** Return negative if no action should be generated. +*/ +PRIVATE int compute_action(struct lemon *lemp, struct action *ap) +{ + int act; + switch( ap->type ){ + case SHIFT: act = ap->x.stp->statenum; break; + case REDUCE: act = ap->x.rp->index + lemp->nstate; break; + case ERROR: act = lemp->nstate + lemp->nrule; break; + case ACCEPT: act = lemp->nstate + lemp->nrule + 1; break; + default: act = -1; break; + } + return act; +} + +#define LINESIZE 1000 +/* The next cluster of routines are for reading the template file +** and writing the results to the generated parser */ +/* The first function transfers data from "in" to "out" until +** a line is seen which begins with "%%". The line number is +** tracked. +** +** if name!=0, then any word that begin with "Parse" is changed to +** begin with *name instead. +*/ +PRIVATE void tplt_xfer(const char *name, FILE *in, FILE *out, int *lineno) +{ + int i, iStart; + char line[LINESIZE]; + while( fgets(line,LINESIZE,in) && (line[0]!='%' || line[1]!='%') ){ + (*lineno)++; + iStart = 0; + if( name ){ + for(i=0; line[i] && iiStart ) fprintf(out,"%.*s",i-iStart,&line[iStart]); + fprintf(out,"%s",name); + i += 4; + iStart = i+1; + } + } + } + fprintf(out,"%s",&line[iStart]); + } +} + +/* The next function finds the template file and opens it, returning +** a pointer to the opened file. */ +PRIVATE FILE *tplt_open(struct lemon *lemp) +{ + static char templatename[] = "lempar.c"; + FILE *in; + char *tpltname = NULL; + char *cp; + + if (lemp->templatename) { + tpltname = strdup(lemp->templatename); + } else { + char* buf; + + cp = strrchr(lemp->filename,'.'); + buf = malloc(1000); + if( cp ){ + sprintf(buf,"%.*s.lt",(int)(cp - lemp->filename),lemp->filename); + }else{ + sprintf(buf,"%s.lt",lemp->filename); + } + if( access(buf,004)==0 ){ + tpltname = strdup(buf); + }else if( access(templatename,004)==0 ){ + tpltname = strdup(templatename); + }else{ + tpltname = pathsearch(lemp->argv0,templatename,0); + } + free(buf); + } + if( tpltname==0 ){ + fprintf(stderr,"Can't find the parser driver template file \"%s\".\n", + templatename); + lemp->errorcnt++; + free(tpltname); + return 0; + } + in = fopen(tpltname,"rb"); + free(tpltname); + + if( in==0 ){ + fprintf(stderr,"Can't open the template file \"%s\".\n",templatename); + lemp->errorcnt++; + return 0; + } + return in; +} + +/* Print a #line directive line to the output file. */ +PRIVATE void tplt_linedir(FILE *out, int lineno, char *filename) +{ + fprintf(out,"#line %d \"",lineno); + while( *filename ){ + if( *filename == '\\' ) putc('\\',out); + putc(*filename,out); + filename++; + } + fprintf(out,"\"\n"); +} + +/* Print a string to the file and keep the linenumber up to date */ +PRIVATE void tplt_print(FILE *out, struct lemon *lemp, char *str, + int *lineno) +{ + if( str==0 ) return; + (*lineno)++; + while( *str ){ + if( *str=='\n' ) (*lineno)++; + putc(*str,out); + str++; + } + if( str[-1]!='\n' ){ + putc('\n',out); + (*lineno)++; + } + tplt_linedir(out,*lineno+2,lemp->outname); + (*lineno)+=2; + return; +} + +/* +** The following routine emits code for the destructor for the +** symbol sp +*/ +PRIVATE void emit_destructor_code(FILE *out, struct symbol *sp, struct lemon *lemp, + int *lineno) +{ + char *cp = 0; + + int linecnt = 0; + if( sp->type==TERMINAL ){ + cp = lemp->tokendest; + if( cp==0 ) return; + fprintf(out,"{\n"); (*lineno)++; + }else if( sp->destructor ){ + cp = sp->destructor; + fprintf(out,"{\n"); (*lineno)++; + tplt_linedir(out,sp->destLineno,lemp->outname); (*lineno)++; + }else if( lemp->vardest ){ + cp = lemp->vardest; + if( cp==0 ) return; + fprintf(out,"{\n"); (*lineno)++; + }else{ + assert( 0 ); /* Cannot happen */ + } + for(; *cp; cp++){ + if( *cp=='$' && cp[1]=='$' ){ + fprintf(out,"(yypminor->yy%d)",sp->dtnum); + cp++; + continue; + } + if( *cp=='\n' ) linecnt++; + fputc(*cp,out); + } + (*lineno) += 3 + linecnt; + fprintf(out,"\n"); + tplt_linedir(out,*lineno,lemp->outname); + fprintf(out,"}\n"); + return; +} + +/* +** Return TRUE (non-zero) if the given symbol has a destructor. +*/ +PRIVATE int has_destructor(struct symbol *sp, struct lemon *lemp) +{ + int ret; + if( sp->type==TERMINAL ){ + ret = lemp->tokendest!=0; + }else{ + ret = lemp->vardest!=0 || sp->destructor!=0; + } + return ret; +} + +/* +** Append text to a dynamically allocated string. If zText is 0 then +** reset the string to be empty again. Always return the complete text +** of the string (which is overwritten with each call). +** +** n bytes of zText are stored. If n==0 then all of zText up to the first +** \000 terminator is stored. zText can contain up to two instances of +** %d. The values of p1 and p2 are written into the first and second +** %d. +** +** If n==-1, then the previous character is overwritten. +*/ +PRIVATE char *append_str(const char *zText, int n, int p1, int p2){ + static char *z = 0; + static int alloced = 0; + static int used = 0; + int c; + char zInt[40]; + + if( zText==0 ){ + used = 0; + return z; + } + if( n<=0 ){ + if( n<0 ){ + used += n; + assert( used>=0 ); + } + n = (int) strlen(zText); + } + if( n+(int)sizeof(zInt)*2+used >= alloced ){ + alloced = n + sizeof(zInt)*2 + used + 200; + z = realloc(z, alloced); + } + if( z==0 ) return ""; + while( n-- > 0 ){ + c = *(zText++); + if( c=='%' && n>0 && zText[0]=='d' ){ + sprintf(zInt, "%d", p1); + p1 = p2; + strcpy(&z[used], zInt); + used += (int) strlen(&z[used]); + zText++; + n--; + }else{ + z[used++] = c; + } + } + z[used] = 0; + return z; +} + +/* +** zCode is a string that is the action associated with a rule. Expand +** the symbols in this string so that the refer to elements of the parser +** stack. +*/ +PRIVATE void translate_code(struct lemon *lemp, struct rule *rp){ + char *cp, *xp; + int i; + char lhsused = 0; /* True if the LHS element has been used */ + char used[MAXRHS]; /* True for each RHS element which is used */ + + for(i=0; inrhs; i++) used[i] = 0; + lhsused = 0; + + if (!rp->code) { + rp->code = "\n"; + rp->line = rp->ruleline; + } + + append_str(0,0,0,0); + for(cp=rp->code; *cp; cp++){ + if( safe_isalpha(*cp) && (cp==rp->code || (!safe_isalnum(cp[-1]) && cp[-1]!='_')) ){ + char saved; + for(xp= &cp[1]; safe_isalnum(*xp) || *xp=='_'; xp++); + saved = *xp; + *xp = 0; + if( rp->lhsalias && strcmp(cp,rp->lhsalias)==0 ){ + append_str("yygotominor.yy%d",0,rp->lhs->dtnum,0); + cp = xp; + lhsused = 1; + }else{ + for(i=0; inrhs; i++){ + if( rp->rhsalias[i] && strcmp(cp,rp->rhsalias[i])==0 ){ + if( cp!=rp->code && cp[-1]=='@' ){ + /* If the argument is of the form @X then substituted + ** the token number of X, not the value of X */ + append_str("yymsp[%d].major",-1,i-rp->nrhs+1,0); + }else{ + struct symbol *sp = rp->rhs[i]; + int dtnum; + if( sp->type==MULTITERMINAL ){ + dtnum = sp->subsym[0]->dtnum; + }else{ + dtnum = sp->dtnum; + } + append_str("yymsp[%d].minor.yy%d",0,i-rp->nrhs+1, dtnum); + } + cp = xp; + used[i] = 1; + break; + } + } + } + *xp = saved; + } + append_str(cp, 1, 0, 0); + } /* End loop */ + + /* Check to make sure the LHS has been used */ + if( rp->lhsalias && !lhsused ){ + ErrorMsg(lemp->filename,rp->ruleline, + "Label \"%s\" for \"%s(%s)\" is never used.", + rp->lhsalias,rp->lhs->name,rp->lhsalias); + lemp->errorcnt++; + } + + /* Generate destructor code for RHS symbols which are not used in the + ** reduce code */ + for(i=0; inrhs; i++){ + if( rp->rhsalias[i] && !used[i] ){ + ErrorMsg(lemp->filename,rp->ruleline, + "Label %s for \"%s(%s)\" is never used.", + rp->rhsalias[i],rp->rhs[i]->name,rp->rhsalias[i]); + lemp->errorcnt++; + }else if( rp->rhsalias[i]==0 ){ + if( has_destructor(rp->rhs[i],lemp) ){ + append_str(" yy_destructor(%d,&yymsp[%d].minor);\n", 0, + rp->rhs[i]->index,i-rp->nrhs+1); + }else{ + /* No destructor defined for this term */ + } + } + } + if( rp->code ){ + cp = append_str(0,0,0,0); + rp->code = Strsafe(cp?cp:""); + } +} + +/* +** Generate code which executes when the rule "rp" is reduced. Write +** the code to "out". Make sure lineno stays up-to-date. +*/ +PRIVATE void emit_code(FILE *out, struct rule *rp, struct lemon *lemp, + int *lineno) +{ + char *cp; + int linecnt = 0; + + /* Generate code to do the reduce action */ + if( rp->code ){ + tplt_linedir(out,rp->line,lemp->filename); + fprintf(out,"{%s",rp->code); + for(cp=rp->code; *cp; cp++){ + if( *cp=='\n' ) linecnt++; + } /* End loop */ + (*lineno) += 3 + linecnt; + fprintf(out,"}\n"); + tplt_linedir(out,*lineno,lemp->outname); + } /* End if( rp->code ) */ + + return; +} + +/* +** Print the definition of the union used for the parser's data stack. +** This union contains fields for every possible data type for tokens +** and nonterminals. In the process of computing and printing this +** union, also set the ".dtnum" field of every terminal and nonterminal +** symbol. +*/ +PRIVATE void print_stack_union( + FILE *out, /* The output stream */ + struct lemon *lemp, /* The main info structure for this parser */ + int *plineno, /* Pointer to the line number */ + int mhflag) /* True if generating makeheaders output */ +{ + int lineno; /* The line number of the output */ + char **types; /* A hash table of datatypes */ + int arraysize; /* Size of the "types" array */ + int maxdtlength; /* Maximum length of any ".datatype" field. */ + char *stddt; /* Standardized name for a datatype */ + int i,j; /* Loop counters */ + int hash; /* For hashing the name of a type */ + const char *name; /* Name of the parser */ + + /* Allocate and initialize types[] and allocate stddt[] */ + arraysize = lemp->nsymbol * 2; + types = (char**)calloc( arraysize, sizeof(char*) ); + for(i=0; ivartype ){ + maxdtlength = (int) strlen(lemp->vartype); + } + for(i=0; insymbol; i++){ + int len; + struct symbol *sp = lemp->symbols[i]; + if( sp->datatype==0 ) continue; + len = (int) strlen(sp->datatype); + if( len>maxdtlength ) maxdtlength = len; + } + stddt = (char*)malloc( maxdtlength*2 + 1 ); + if( types==0 || stddt==0 ){ + fprintf(stderr,"Out of memory.\n"); + exit(1); + } + + /* Build a hash table of datatypes. The ".dtnum" field of each symbol + ** is filled in with the hash index plus 1. A ".dtnum" value of 0 is + ** used for terminal symbols. If there is no %default_type defined then + ** 0 is also used as the .dtnum value for nonterminals which do not specify + ** a datatype using the %type directive. + */ + for(i=0; insymbol; i++){ + struct symbol *sp = lemp->symbols[i]; + char *cp; + if( sp==lemp->errsym ){ + sp->dtnum = arraysize+1; + continue; + } + if( sp->type!=NONTERMINAL || (sp->datatype==0 && lemp->vartype==0) ){ + sp->dtnum = 0; + continue; + } + cp = sp->datatype; + if( cp==0 ) cp = lemp->vartype; + j = 0; + while( safe_isspace(*cp) ) cp++; + while( *cp ) stddt[j++] = *cp++; + while( j>0 && safe_isspace(stddt[j-1]) ) j--; + stddt[j] = 0; + if( strcmp(stddt, lemp->tokentype)==0 ){ + sp->dtnum = 0; + continue; + } + hash = 0; + for(j=0; stddt[j]; j++){ + hash = hash*53 + stddt[j]; + } + hash = (hash & 0x7fffffff)%arraysize; + while( types[hash] ){ + if( strcmp(types[hash],stddt)==0 ){ + sp->dtnum = hash + 1; + break; + } + hash++; + if( hash>=arraysize ) hash = 0; + } + if( types[hash]==0 ){ + sp->dtnum = hash + 1; + types[hash] = (char*)malloc( strlen(stddt)+1 ); + if( types[hash]==0 ){ + fprintf(stderr,"Out of memory.\n"); + exit(1); + } + strcpy(types[hash],stddt); + } + } + + /* Print out the definition of YYTOKENTYPE and YYMINORTYPE */ + name = lemp->name ? lemp->name : "Parse"; + lineno = *plineno; + if( mhflag ){ fprintf(out,"#if INTERFACE\n"); lineno++; } + fprintf(out,"#define %sTOKENTYPE %s\n",name, + lemp->tokentype?lemp->tokentype:"void*"); lineno++; + if( mhflag ){ fprintf(out,"#endif\n"); lineno++; } + fprintf(out,"typedef union {\n"); lineno++; + fprintf(out," %sTOKENTYPE yy0;\n",name); lineno++; + for(i=0; ierrsym->useCnt ){ + fprintf(out," int yy%d;\n",lemp->errsym->dtnum); lineno++; + } + + free(stddt); + free(types); + fprintf(out,"} YYMINORTYPE;\n"); lineno++; + *plineno = lineno; +} + +/* +** Return the name of a C datatype able to represent values between +** lwr and upr, inclusive. +*/ +static const char *minimum_size_type(int lwr, int upr){ + if( lwr>=0 ){ + if( upr<=255 ){ + return "unsigned char"; + }else if( upr<65535 ){ + return "unsigned short int"; + }else{ + return "unsigned int"; + } + }else if( lwr>=-127 && upr<=127 ){ + return "signed char"; + }else if( lwr>=-32767 && upr<32767 ){ + return "short"; + }else{ + return "int"; + } +} + +static const char *minimum_signed_size_type(int lwr, int upr){ + if( lwr>=-127 && upr<=127 ){ + return "signed char"; + }else if( lwr>=-32767 && upr<32767 ){ + return "short"; + }else{ + return "int"; + } +} + +/* +** Each state contains a set of token transaction and a set of +** nonterminal transactions. Each of these sets makes an instance +** of the following structure. An array of these structures is used +** to order the creation of entries in the yy_action[] table. +*/ +struct axset { + struct state *stp; /* A pointer to a state */ + int isTkn; /* True to use tokens. False for non-terminals */ + int nAction; /* Number of actions */ +}; + +/* +** Compare to axset structures for sorting purposes +*/ +static int axset_compare(const void *a, const void *b){ + const struct axset *p1 = (const struct axset*)a; + const struct axset *p2 = (const struct axset*)b; + return p2->nAction - p1->nAction; +} + +/* +** Write text on "out" that describes the rule "rp". +*/ +static void writeRuleText(FILE *out, struct rule *rp){ + int j; + fprintf(out,"%s ::=", rp->lhs->name); + for(j=0; jnrhs; j++){ + struct symbol *sp = rp->rhs[j]; + fprintf(out," %s", sp->name); + if( sp->type==MULTITERMINAL ){ + int k; + for(k=1; knsubsym; k++){ + fprintf(out,"|%s",sp->subsym[k]->name); + } + } + } +} + +/* Generate C source code for the parser */ +void ReportTable( + struct lemon *lemp, + int mhflag) /* Output in makeheaders format if true */ +{ + FILE *out, *in; + char line[LINESIZE]; + int lineno; + struct state *stp; + struct action *ap; + struct rule *rp; + struct acttab *pActtab; + int i, j, n; + const char *name; + int mnTknOfst, mxTknOfst; + int mnNtOfst, mxNtOfst; + struct axset *ax; + +#if 0 + in = tplt_open(lemp); + if( in==0 ) return; + out = file_open(lemp,".c","wb"); + if( out==0 ){ + fclose(in); + return; + } + lineno = 1; + tplt_xfer(lemp->name,in,out,&lineno); + + /* Generate the include code, if any */ + tplt_print(out,lemp,lemp->include,&lineno); + if( mhflag ){ + char *makename = file_makename_using_basename(lemp, ".h"); + fprintf(out,"#include \"%s\"\n", makename); lineno++; + free(makename); + } +#endif + tplt_xfer(lemp->name,in,out,&lineno); + /* Generate #defines for all tokens */ + if( mhflag ){ + const char *prefix; + fprintf(out,"#if INTERFACE\n"); lineno++; + if( lemp->tokenprefix ) prefix = lemp->tokenprefix; + else prefix = ""; + for(i=1; interminal; i++){ + fprintf(out,"#define %s%-30s %2d\n",prefix,lemp->symbols[i]->name,i); + lineno++; + } + fprintf(out,"#endif\n"); lineno++; + } + tplt_xfer(lemp->name,in,out,&lineno); +#if 0 + /* Generate the defines */ + fprintf(out,"#define YYCODETYPE %s\n", + minimum_signed_size_type(0, lemp->nsymbol+5)); lineno++; + fprintf(out,"#define YYNOCODE %d\n",lemp->nsymbol+1); lineno++; + fprintf(out,"#define YYACTIONTYPE %s\n", + minimum_signed_size_type(0, lemp->nstate+lemp->nrule+5)); lineno++; + if( lemp->wildcard ){ + fprintf(out,"#define YYWILDCARD %d\n", + lemp->wildcard->index); lineno++; + } + print_stack_union(out,lemp,&lineno,mhflag); + fprintf(out, "#ifndef YYSTACKDEPTH\n"); lineno++; + if( lemp->stacksize ){ + fprintf(out,"#define YYSTACKDEPTH %s\n",lemp->stacksize); lineno++; + }else{ + fprintf(out,"#define YYSTACKDEPTH 100\n"); lineno++; + } + fprintf(out, "#endif\n"); lineno++; + if( mhflag ){ + fprintf(out,"#if INTERFACE\n"); lineno++; + } + name = lemp->name ? lemp->name : "Parse"; + if( lemp->arg && lemp->arg[0] ){ + i = (int) strlen(lemp->arg); + while( i>=1 && safe_isspace(lemp->arg[i-1]) ) i--; + while( i>=1 && (safe_isalnum(lemp->arg[i-1]) || lemp->arg[i-1]=='_') ) i--; + fprintf(out,"#define %sARG_SDECL %s;\n",name,lemp->arg); lineno++; + fprintf(out,"#define %sARG_PDECL ,%s\n",name,lemp->arg); lineno++; + fprintf(out,"#define %sARG_FETCH %s = yypParser->%s\n", + name,lemp->arg,&lemp->arg[i]); lineno++; + fprintf(out,"#define %sARG_STORE yypParser->%s = %s\n", + name,&lemp->arg[i],&lemp->arg[i]); lineno++; + }else{ + fprintf(out,"#define %sARG_SDECL\n",name); lineno++; + fprintf(out,"#define %sARG_PDECL\n",name); lineno++; + fprintf(out,"#define %sARG_FETCH\n",name); lineno++; + fprintf(out,"#define %sARG_STORE\n",name); lineno++; + } + if( mhflag ){ + fprintf(out,"#endif\n"); lineno++; + } + fprintf(out,"#define YYNSTATE %d\n",lemp->nstate); lineno++; + fprintf(out,"#define YYNRULE %d\n",lemp->nrule); lineno++; + if( lemp->errsym->useCnt ){ + fprintf(out,"#define YYERRORSYMBOL %d\n",lemp->errsym->index); lineno++; + fprintf(out,"#define YYERRSYMDT yy%d\n",lemp->errsym->dtnum); lineno++; + } + if( lemp->has_fallback ){ + fprintf(out,"#define YYFALLBACK 1\n"); lineno++; + } + tplt_xfer(lemp->name,in,out,&lineno); + + /* Generate the action table and its associates: + ** + ** yy_action[] A single table containing all actions. + ** yy_lookahead[] A table containing the lookahead for each entry in + ** yy_action. Used to detect hash collisions. + ** yy_shift_ofst[] For each state, the offset into yy_action for + ** shifting terminals. + ** yy_reduce_ofst[] For each state, the offset into yy_action for + ** shifting non-terminals after a reduce. + ** yy_default[] Default action for each state. + */ + + /* Compute the actions on all states and count them up */ + ax = calloc(lemp->nstate*2, sizeof(ax[0])); + if( ax==0 ){ + fprintf(stderr,"malloc failed\n"); + exit(1); + } + for(i=0; instate; i++){ + stp = lemp->sorted[i]; + ax[i*2].stp = stp; + ax[i*2].isTkn = 1; + ax[i*2].nAction = stp->nTknAct; + ax[i*2+1].stp = stp; + ax[i*2+1].isTkn = 0; + ax[i*2+1].nAction = stp->nNtAct; + } + mxTknOfst = mnTknOfst = 0; + mxNtOfst = mnNtOfst = 0; + + /* Compute the action table. In order to try to keep the size of the + ** action table to a minimum, the heuristic of placing the largest action + ** sets first is used. + */ + qsort(ax, lemp->nstate*2, sizeof(ax[0]), axset_compare); + pActtab = acttab_alloc(); + for(i=0; instate*2 && ax[i].nAction>0; i++){ + stp = ax[i].stp; + if( ax[i].isTkn ){ + for(ap=stp->ap; ap; ap=ap->next){ + int action; + if( ap->sp->index>=lemp->nterminal ) continue; + action = compute_action(lemp, ap); + if( action<0 ) continue; + acttab_action(pActtab, ap->sp->index, action); + } + stp->iTknOfst = acttab_insert(pActtab); + if( stp->iTknOfstiTknOfst; + if( stp->iTknOfst>mxTknOfst ) mxTknOfst = stp->iTknOfst; + }else{ + for(ap=stp->ap; ap; ap=ap->next){ + int action; + if( ap->sp->indexnterminal ) continue; + if( ap->sp->index==lemp->nsymbol ) continue; + action = compute_action(lemp, ap); + if( action<0 ) continue; + acttab_action(pActtab, ap->sp->index, action); + } + stp->iNtOfst = acttab_insert(pActtab); + if( stp->iNtOfstiNtOfst; + if( stp->iNtOfst>mxNtOfst ) mxNtOfst = stp->iNtOfst; + } + } + free(ax); + + /* Output the yy_action table */ + fprintf(out,"static const YYACTIONTYPE yy_action[] = {\n"); lineno++; + n = acttab_size(pActtab); + for(i=j=0; instate + lemp->nrule + 2; + if( j==0 ) fprintf(out," /* %5d */ ", i); + fprintf(out, " %4d,", action); + if( j==9 || i==n-1 ){ + fprintf(out, "\n"); lineno++; + j = 0; + }else{ + j++; + } + } + fprintf(out, "};\n"); lineno++; + + /* Output the yy_lookahead table */ + fprintf(out,"static const YYCODETYPE yy_lookahead[] = {\n"); lineno++; + for(i=j=0; insymbol; + if( j==0 ) fprintf(out," /* %5d */ ", i); + fprintf(out, " %4d,", la); + if( j==9 || i==n-1 ){ + fprintf(out, "\n"); lineno++; + j = 0; + }else{ + j++; + } + } + fprintf(out, "};\n"); lineno++; + + /* Output the yy_shift_ofst[] table */ + fprintf(out, "#define YY_SHIFT_USE_DFLT (%d)\n", mnTknOfst-1); lineno++; + n = lemp->nstate; + while( n>0 && lemp->sorted[n-1]->iTknOfst==NO_OFFSET ) n--; + fprintf(out, "#define YY_SHIFT_MAX %d\n", n-1); lineno++; + fprintf(out, "static const %s yy_shift_ofst[] = {\n", + minimum_size_type(mnTknOfst-1, mxTknOfst)); lineno++; + for(i=j=0; isorted[i]; + ofst = stp->iTknOfst; + if( ofst==NO_OFFSET ) ofst = mnTknOfst - 1; + if( j==0 ) fprintf(out," /* %5d */ ", i); + fprintf(out, " %4d,", ofst); + if( j==9 || i==n-1 ){ + fprintf(out, "\n"); lineno++; + j = 0; + }else{ + j++; + } + } + fprintf(out, "};\n"); lineno++; + + /* Output the yy_reduce_ofst[] table */ + fprintf(out, "#define YY_REDUCE_USE_DFLT (%d)\n", mnNtOfst-1); lineno++; + n = lemp->nstate; + while( n>0 && lemp->sorted[n-1]->iNtOfst==NO_OFFSET ) n--; + fprintf(out, "#define YY_REDUCE_MAX %d\n", n-1); lineno++; + fprintf(out, "static const %s yy_reduce_ofst[] = {\n", + minimum_size_type(mnNtOfst-1, mxNtOfst)); lineno++; + for(i=j=0; isorted[i]; + ofst = stp->iNtOfst; + if( ofst==NO_OFFSET ) ofst = mnNtOfst - 1; + if( j==0 ) fprintf(out," /* %5d */ ", i); + fprintf(out, " %4d,", ofst); + if( j==9 || i==n-1 ){ + fprintf(out, "\n"); lineno++; + j = 0; + }else{ + j++; + } + } + fprintf(out, "};\n"); lineno++; + + /* Output the default action table */ + fprintf(out, "static const YYACTIONTYPE yy_default[] = {\n"); lineno++; + n = lemp->nstate; + for(i=j=0; isorted[i]; + if( j==0 ) fprintf(out," /* %5d */ ", i); + fprintf(out, " %4d,", stp->iDflt); + if( j==9 || i==n-1 ){ + fprintf(out, "\n"); lineno++; + j = 0; + }else{ + j++; + } + } + fprintf(out, "};\n"); lineno++; + tplt_xfer(lemp->name,in,out,&lineno); + + /* Generate the table of fallback tokens. + */ + if( lemp->has_fallback ){ + for(i=0; interminal; i++){ + struct symbol *p = lemp->symbols[i]; + if( p->fallback==0 ){ + fprintf(out, " 0, /* %10s => nothing */\n", p->name); + }else{ + fprintf(out, " %3d, /* %10s => %s */\n", p->fallback->index, + p->name, p->fallback->name); + } + lineno++; + } + } + tplt_xfer(lemp->name, in, out, &lineno); + + /* Generate a table containing the symbolic name of every symbol + */ + for(i=0; insymbol; i++){ + sprintf(line,"\"%s\",",lemp->symbols[i]->name); + fprintf(out," %-15s",line); + if( (i&3)==3 ){ fprintf(out,"\n"); lineno++; } + } + if( (i&3)!=0 ){ fprintf(out,"\n"); lineno++; } + tplt_xfer(lemp->name,in,out,&lineno); + + /* Generate a table containing a text string that describes every + ** rule in the rule set of the grammar. This information is used + ** when tracing REDUCE actions. + */ + for(i=0, rp=lemp->rule; rp; rp=rp->next, i++){ + assert( rp->index==i ); + fprintf(out," /* %3d */ \"", i); + writeRuleText(out, rp); + fprintf(out,"\",\n"); lineno++; + } + tplt_xfer(lemp->name,in,out,&lineno); + + /* Generate code which executes every time a symbol is popped from + ** the stack while processing errors or while destroying the parser. + ** (In other words, generate the %destructor actions) + */ + if( lemp->tokendest ){ + int once = 1; + for(i=0; insymbol; i++){ + struct symbol *sp = lemp->symbols[i]; + if( sp==0 || sp->type!=TERMINAL ) continue; + if( once ){ + fprintf(out, " /* TERMINAL Destructor */\n"); lineno++; + once = 0; + } + fprintf(out," case %d: /* %s */\n", + sp->index, sp->name); lineno++; + } + for(i=0; insymbol && lemp->symbols[i]->type!=TERMINAL; i++); + if( insymbol ){ + emit_destructor_code(out,lemp->symbols[i],lemp,&lineno); + } + fprintf(out," break;\n"); lineno++; + } + if( lemp->vardest ){ + struct symbol *dflt_sp = 0; + int once = 1; + for(i=0; insymbol; i++){ + struct symbol *sp = lemp->symbols[i]; + if( sp==0 || sp->type==TERMINAL || + sp->index<=0 || sp->destructor!=0 ) continue; + if( once ){ + fprintf(out, " /* Default NON-TERMINAL Destructor */\n"); lineno++; + once = 0; + } + fprintf(out," case %d: /* %s */\n", + sp->index, sp->name); lineno++; + dflt_sp = sp; + } + if( dflt_sp!=0 ){ + emit_destructor_code(out,dflt_sp,lemp,&lineno); + fprintf(out," break;\n"); lineno++; + } + } + for(i=0; insymbol; i++){ + struct symbol *sp = lemp->symbols[i]; + if( sp==0 || sp->type==TERMINAL || sp->destructor==0 ) continue; + fprintf(out," case %d: /* %s */\n", + sp->index, sp->name); lineno++; + + /* Combine duplicate destructors into a single case */ + for(j=i+1; jnsymbol; j++){ + struct symbol *sp2 = lemp->symbols[j]; + if( sp2 && sp2->type!=TERMINAL && sp2->destructor + && sp2->dtnum==sp->dtnum + && strcmp(sp->destructor,sp2->destructor)==0 ){ + fprintf(out," case %d: /* %s */\n", + sp2->index, sp2->name); lineno++; + sp2->destructor = 0; + } + } + + emit_destructor_code(out,lemp->symbols[i],lemp,&lineno); + fprintf(out," break;\n"); lineno++; + } + tplt_xfer(lemp->name,in,out,&lineno); + + /* Generate code which executes whenever the parser stack overflows */ + tplt_print(out,lemp,lemp->overflow,&lineno); + tplt_xfer(lemp->name,in,out,&lineno); + + /* Generate the table of rule information + ** + ** Note: This code depends on the fact that rules are number + ** sequentually beginning with 0. + */ + for(rp=lemp->rule; rp; rp=rp->next){ + fprintf(out," { %d, %d },\n",rp->lhs->index,rp->nrhs); lineno++; + } + tplt_xfer(lemp->name,in,out,&lineno); + + /* Generate code which execution during each REDUCE action */ + for(rp=lemp->rule; rp; rp=rp->next){ + translate_code(lemp, rp); + } + for(rp=lemp->rule; rp; rp=rp->next){ + struct rule *rp2; + if( rp->code==0 ) continue; + fprintf(out," case %d: /* ", rp->index); + writeRuleText(out, rp); + fprintf(out, " */\n"); lineno++; + for(rp2=rp->next; rp2; rp2=rp2->next){ + if( rp2->code==rp->code ){ + fprintf(out," case %d: /* ", rp2->index); + writeRuleText(out, rp2); + fprintf(out," */\n"); lineno++; + rp2->code = 0; + } + } + emit_code(out,rp,lemp,&lineno); + fprintf(out," break;\n"); lineno++; + } + tplt_xfer(lemp->name,in,out,&lineno); + + /* Generate code which executes if a parse fails */ + tplt_print(out,lemp,lemp->failure,&lineno); + tplt_xfer(lemp->name,in,out,&lineno); + + /* Generate code which executes when a syntax error occurs */ + tplt_print(out,lemp,lemp->error,&lineno); + tplt_xfer(lemp->name,in,out,&lineno); + + /* Generate code which executes when the parser accepts its input */ + tplt_print(out,lemp,lemp->accept,&lineno); + tplt_xfer(lemp->name,in,out,&lineno); + + /* Append any addition code the user desires */ + tplt_print(out,lemp,lemp->extracode,&lineno); + + fclose(in); + fclose(out); + return; +#endif +} + +/* Generate a header file for the parser */ +void ReportHeader(struct lemon *lemp) +{ + FILE *out, *in; + const char *prefix; + char line[LINESIZE]; + char pattern[LINESIZE]; + int i; + + if( lemp->tokenprefix ) prefix = lemp->tokenprefix; + else prefix = ""; + in = file_open(lemp,".h","rb"); + if( in ){ + for(i=1; interminal && fgets(line,LINESIZE,in); i++){ + sprintf(pattern,"#define %s%-30s %2d\n",prefix,lemp->symbols[i]->name,i); + if( strcmp(line,pattern) ) break; + } + fclose(in); + if( i==lemp->nterminal ){ + /* No change in the file. Don't rewrite it. */ + return; + } + } + out = file_open(lemp,".h","wb"); + if( out ){ + for(i=1; interminal; i++){ + fprintf(out,"#define %s%-30s %2d\n",prefix,lemp->symbols[i]->name,i); + } + fclose(out); + } + return; +} + +/* Reduce the size of the action tables, if possible, by making use +** of defaults. +** +** In this version, we take the most frequent REDUCE action and make +** it the default. Except, there is no default if the wildcard token +** is a possible look-ahead. +*/ +void CompressTables(struct lemon *lemp) +{ + struct state *stp; + struct action *ap, *ap2; + struct rule *rp, *rp2, *rbest; + int nbest, n; + int i; + int usesWildcard; + + for(i=0; instate; i++){ + stp = lemp->sorted[i]; + nbest = 0; + rbest = 0; + usesWildcard = 0; + + for(ap=stp->ap; ap; ap=ap->next){ + if( ap->type==SHIFT && ap->sp==lemp->wildcard ){ + usesWildcard = 1; + } + if( ap->type!=REDUCE ) continue; + rp = ap->x.rp; + if( rp->lhsStart ) continue; + if( rp==rbest ) continue; + n = 1; + for(ap2=ap->next; ap2; ap2=ap2->next){ + if( ap2->type!=REDUCE ) continue; + rp2 = ap2->x.rp; + if( rp2==rbest ) continue; + if( rp2==rp ) n++; + } + if( n>nbest ){ + nbest = n; + rbest = rp; + } + } + + /* Do not make a default if the number of rules to default + ** is not at least 1 or if the wildcard token is a possible + ** lookahead. + */ + if( nbest<1 || usesWildcard ) continue; + + + /* Combine matching REDUCE actions into a single default */ + for(ap=stp->ap; ap; ap=ap->next){ + if( ap->type==REDUCE && ap->x.rp==rbest ) break; + } + assert( ap ); + ap->sp = Symbol_new("{default}"); + for(ap=ap->next; ap; ap=ap->next){ + if( ap->type==REDUCE && ap->x.rp==rbest ) ap->type = NOT_USED; + } + stp->ap = Action_sort(stp->ap); + } +} + + +/* +** Compare two states for sorting purposes. The smaller state is the +** one with the most non-terminal actions. If they have the same number +** of non-terminal actions, then the smaller is the one with the most +** token actions. +*/ +static int stateResortCompare(const void *a, const void *b){ + const struct state *pA = *(struct state *const *)a; + const struct state *pB = *(struct state *const *)b; + int n; + + n = pB->nNtAct - pA->nNtAct; + if( n==0 ){ + n = pB->nTknAct - pA->nTknAct; + } + return n; +} + + +/* +** Renumber and resort states so that states with fewer choices +** occur at the end. Except, keep state 0 as the first state. +*/ +void ResortStates(struct lemon *lemp) +{ + int i; + struct state *stp; + struct action *ap; + + for(i=0; instate; i++){ + stp = lemp->sorted[i]; + stp->nTknAct = stp->nNtAct = 0; + stp->iDflt = lemp->nstate + lemp->nrule; + stp->iTknOfst = NO_OFFSET; + stp->iNtOfst = NO_OFFSET; + for(ap=stp->ap; ap; ap=ap->next){ + if( compute_action(lemp,ap)>=0 ){ + if( ap->sp->indexnterminal ){ + stp->nTknAct++; + }else if( ap->sp->indexnsymbol ){ + stp->nNtAct++; + }else{ + stp->iDflt = compute_action(lemp, ap); + } + } + } + } + qsort(&lemp->sorted[1], lemp->nstate-1, sizeof(lemp->sorted[0]), + stateResortCompare); + for(i=0; instate; i++){ + lemp->sorted[i]->statenum = i; + } +} + + +/***************** From the file "set.c" ************************************/ +/* +** Set manipulation routines for the LEMON parser generator. +*/ + +static int size = 0; + +/* Set the set size */ +void SetSize(int n) +{ + size = n+1; +} + +/* Allocate a new set */ +char *SetNew(void){ + char *s; + s = (char*)calloc( size, 1); + if( s==0 ){ + memory_error(); + } + return s; +} + +/* Deallocate a set */ +void SetFree(char *s) +{ + free(s); +} + +/* Add a new element to the set. Return TRUE if the element was added +** and FALSE if it was already there. */ +int SetAdd(char *s, int e) +{ + int rv; + assert( e>=0 && esize = 1024; + x1a->count = 0; + x1a->tbl = (x1node*)malloc( + (sizeof(x1node) + sizeof(x1node*))*1024 ); + if( x1a->tbl==0 ){ + free(x1a); + x1a = 0; + }else{ + int i; + x1a->ht = (x1node**)&(x1a->tbl[1024]); + for(i=0; i<1024; i++) x1a->ht[i] = 0; + } + } +} +/* Insert a new record into the array. Return TRUE if successful. +** Prior data with the same key is NOT overwritten */ +int Strsafe_insert(char *data) +{ + x1node *np; + int h; + int ph; + + if( x1a==0 ) return 0; + ph = strhash(data); + h = ph & (x1a->size-1); + np = x1a->ht[h]; + while( np ){ + if( strcmp(np->data,data)==0 ){ + /* An existing entry with the same key is found. */ + /* Fail because overwrite is not allows. */ + return 0; + } + np = np->next; + } + if( x1a->count>=x1a->size ){ + /* Need to make the hash table bigger */ + int i,size; + struct s_x1 array; + array.size = size = x1a->size*2; + array.count = x1a->count; + array.tbl = (x1node*)malloc( + (sizeof(x1node) + sizeof(x1node*))*size ); + if( array.tbl==0 ) return 0; /* Fail due to malloc failure */ + array.ht = (x1node**)&(array.tbl[size]); + for(i=0; icount; i++){ + x1node *oldnp, *newnp; + oldnp = &(x1a->tbl[i]); + h = strhash(oldnp->data) & (size-1); + newnp = &(array.tbl[i]); + if( array.ht[h] ) array.ht[h]->from = &(newnp->next); + newnp->next = array.ht[h]; + newnp->data = oldnp->data; + newnp->from = &(array.ht[h]); + array.ht[h] = newnp; + } + free(x1a->tbl); + *x1a = array; + } + /* Insert the new data */ + h = ph & (x1a->size-1); + np = &(x1a->tbl[x1a->count++]); + np->data = data; + if( x1a->ht[h] ) x1a->ht[h]->from = &(np->next); + np->next = x1a->ht[h]; + x1a->ht[h] = np; + np->from = &(x1a->ht[h]); + return 1; +} + +/* Return a pointer to data assigned to the given key. Return NULL +** if no such key. */ +char *Strsafe_find(const char *key) +{ + int h; + x1node *np; + + if( x1a==0 ) return 0; + h = strhash(key) & (x1a->size-1); + np = x1a->ht[h]; + while( np ){ + if( strcmp(np->data,key)==0 ) break; + np = np->next; + } + return np ? np->data : 0; +} + +/* Return a pointer to the (terminal or nonterminal) symbol "x". +** Create a new symbol if this is the first time "x" has been seen. +*/ +struct symbol *Symbol_new(const char *x) +{ + struct symbol *sp; + + sp = Symbol_find(x); + if( sp==0 ){ + sp = (struct symbol *)calloc(1, sizeof(struct symbol) ); + MemoryCheck(sp); + sp->name = Strsafe(x); + sp->type = safe_isupper(*x) ? TERMINAL : NONTERMINAL; + sp->rule = 0; + sp->fallback = 0; + sp->prec = -1; + sp->assoc = UNK; + sp->firstset = 0; + sp->lambda = LEMON_FALSE; + sp->destructor = 0; + sp->destLineno = 0; + sp->datatype = 0; + sp->useCnt = 0; + Symbol_insert(sp,sp->name); + } + sp->useCnt++; + return sp; +} + +/* Compare two symbols for working purposes +** +** Symbols that begin with upper case letters (terminals or tokens) +** must sort before symbols that begin with lower case letters +** (non-terminals). Other than that, the order does not matter. +** +** We find experimentally that leaving the symbols in their original +** order (the order they appeared in the grammar file) gives the +** smallest parser tables in SQLite. +*/ +int Symbolcmpp(const void *a_arg, const void *b_arg){ + struct symbol *const *a = (struct symbol *const *) a_arg; + struct symbol *const *b = (struct symbol *const *) b_arg; + int i1 = (**a).index + 10000000*((**a).name[0]>'Z'); + int i2 = (**b).index + 10000000*((**b).name[0]>'Z'); + return i1-i2; +} + +/* There is one instance of the following structure for each +** associative array of type "x2". +*/ +struct s_x2 { + int size; /* The number of available slots. */ + /* Must be a power of 2 greater than or */ + /* equal to 1 */ + int count; /* Number of currently slots filled */ + struct s_x2node *tbl; /* The data stored here */ + struct s_x2node **ht; /* Hash table for lookups */ +}; + +/* There is one instance of this structure for every data element +** in an associative array of type "x2". +*/ +typedef struct s_x2node { + struct symbol *data; /* The data */ + char *key; /* The key */ + struct s_x2node *next; /* Next entry with the same hash */ + struct s_x2node **from; /* Previous link */ +} x2node; + +/* There is only one instance of the array, which is the following */ +static struct s_x2 *x2a; + +/* Allocate a new associative array */ +void Symbol_init(void){ + if( x2a ) return; + x2a = (struct s_x2*)malloc( sizeof(struct s_x2) ); + if( x2a ){ + x2a->size = 128; + x2a->count = 0; + x2a->tbl = (x2node*)malloc( + (sizeof(x2node) + sizeof(x2node*))*128 ); + if( x2a->tbl==0 ){ + free(x2a); + x2a = 0; + }else{ + int i; + x2a->ht = (x2node**)&(x2a->tbl[128]); + for(i=0; i<128; i++) x2a->ht[i] = 0; + } + } +} +/* Insert a new record into the array. Return TRUE if successful. +** Prior data with the same key is NOT overwritten */ +int Symbol_insert(struct symbol *data, char *key) +{ + x2node *np; + int h; + int ph; + + if( x2a==0 ) return 0; + ph = strhash(key); + h = ph & (x2a->size-1); + np = x2a->ht[h]; + while( np ){ + if( strcmp(np->key,key)==0 ){ + /* An existing entry with the same key is found. */ + /* Fail because overwrite is not allows. */ + return 0; + } + np = np->next; + } + if( x2a->count>=x2a->size ){ + /* Need to make the hash table bigger */ + int i,size; + struct s_x2 array; + array.size = size = x2a->size*2; + array.count = x2a->count; + array.tbl = (x2node*)malloc( + (sizeof(x2node) + sizeof(x2node*))*size ); + if( array.tbl==0 ) return 0; /* Fail due to malloc failure */ + array.ht = (x2node**)&(array.tbl[size]); + for(i=0; icount; i++){ + x2node *oldnp, *newnp; + oldnp = &(x2a->tbl[i]); + h = strhash(oldnp->key) & (size-1); + newnp = &(array.tbl[i]); + if( array.ht[h] ) array.ht[h]->from = &(newnp->next); + newnp->next = array.ht[h]; + newnp->key = oldnp->key; + newnp->data = oldnp->data; + newnp->from = &(array.ht[h]); + array.ht[h] = newnp; + } + free(x2a->tbl); + *x2a = array; + } + /* Insert the new data */ + h = ph & (x2a->size-1); + np = &(x2a->tbl[x2a->count++]); + np->key = key; + np->data = data; + if( x2a->ht[h] ) x2a->ht[h]->from = &(np->next); + np->next = x2a->ht[h]; + x2a->ht[h] = np; + np->from = &(x2a->ht[h]); + return 1; +} + +/* Return a pointer to data assigned to the given key. Return NULL +** if no such key. */ +struct symbol *Symbol_find(const char *key) +{ + int h; + x2node *np; + + if( x2a==0 ) return 0; + h = strhash(key) & (x2a->size-1); + np = x2a->ht[h]; + while( np ){ + if( strcmp(np->key,key)==0 ) break; + np = np->next; + } + return np ? np->data : 0; +} + +/* Return the n-th data. Return NULL if n is out of range. */ +struct symbol *Symbol_Nth(int n) +{ + struct symbol *data; + if( x2a && n>0 && n<=x2a->count ){ + data = x2a->tbl[n-1].data; + }else{ + data = 0; + } + return data; +} + +/* Return the size of the array */ +int Symbol_count(void) +{ + return x2a ? x2a->count : 0; +} + +/* Return an array of pointers to all data in the table. +** The array is obtained from malloc. Return NULL if memory allocation +** problems, or if the array is empty. */ +struct symbol **Symbol_arrayof(void) +{ + struct symbol **array; + int i,size; + if( x2a==0 ) return 0; + size = x2a->count; + array = (struct symbol **)calloc(size, sizeof(struct symbol *)); + if( array ){ + for(i=0; itbl[i].data; + } + return array; +} + +/* Compare two configurations */ +int Configcmp(const void *a_arg, const void *b_arg) +{ + const struct config *a = a_arg, *b = b_arg; + int x; + x = a->rp->index - b->rp->index; + if( x==0 ) x = a->dot - b->dot; + return x; +} + +/* Compare two states */ +PRIVATE int statecmp(struct config *a, struct config *b) +{ + int rc; + for(rc=0; rc==0 && a && b; a=a->bp, b=b->bp){ + rc = a->rp->index - b->rp->index; + if( rc==0 ) rc = a->dot - b->dot; + } + if( rc==0 ){ + if( a ) rc = 1; + if( b ) rc = -1; + } + return rc; +} + +/* Hash a state */ +PRIVATE int statehash(struct config *a) +{ + int h=0; + while( a ){ + h = h*571 + a->rp->index*37 + a->dot; + a = a->bp; + } + return h; +} + +/* Allocate a new state structure */ +struct state *State_new(void) +{ + struct state *new; + new = (struct state *)calloc(1, sizeof(struct state) ); + MemoryCheck(new); + return new; +} + +/* There is one instance of the following structure for each +** associative array of type "x3". +*/ +struct s_x3 { + int size; /* The number of available slots. */ + /* Must be a power of 2 greater than or */ + /* equal to 1 */ + int count; /* Number of currently slots filled */ + struct s_x3node *tbl; /* The data stored here */ + struct s_x3node **ht; /* Hash table for lookups */ +}; + +/* There is one instance of this structure for every data element +** in an associative array of type "x3". +*/ +typedef struct s_x3node { + struct state *data; /* The data */ + struct config *key; /* The key */ + struct s_x3node *next; /* Next entry with the same hash */ + struct s_x3node **from; /* Previous link */ +} x3node; + +/* There is only one instance of the array, which is the following */ +static struct s_x3 *x3a; + +/* Allocate a new associative array */ +void State_init(void){ + if( x3a ) return; + x3a = (struct s_x3*)malloc( sizeof(struct s_x3) ); + if( x3a ){ + x3a->size = 128; + x3a->count = 0; + x3a->tbl = (x3node*)malloc( + (sizeof(x3node) + sizeof(x3node*))*128 ); + if( x3a->tbl==0 ){ + free(x3a); + x3a = 0; + }else{ + int i; + x3a->ht = (x3node**)&(x3a->tbl[128]); + for(i=0; i<128; i++) x3a->ht[i] = 0; + } + } +} +/* Insert a new record into the array. Return TRUE if successful. +** Prior data with the same key is NOT overwritten */ +int State_insert(struct state *data, struct config *key) +{ + x3node *np; + int h; + int ph; + + if( x3a==0 ) return 0; + ph = statehash(key); + h = ph & (x3a->size-1); + np = x3a->ht[h]; + while( np ){ + if( statecmp(np->key,key)==0 ){ + /* An existing entry with the same key is found. */ + /* Fail because overwrite is not allows. */ + return 0; + } + np = np->next; + } + if( x3a->count>=x3a->size ){ + /* Need to make the hash table bigger */ + int i,size; + struct s_x3 array; + array.size = size = x3a->size*2; + array.count = x3a->count; + array.tbl = (x3node*)malloc( + (sizeof(x3node) + sizeof(x3node*))*size ); + if( array.tbl==0 ) return 0; /* Fail due to malloc failure */ + array.ht = (x3node**)&(array.tbl[size]); + for(i=0; icount; i++){ + x3node *oldnp, *newnp; + oldnp = &(x3a->tbl[i]); + h = statehash(oldnp->key) & (size-1); + newnp = &(array.tbl[i]); + if( array.ht[h] ) array.ht[h]->from = &(newnp->next); + newnp->next = array.ht[h]; + newnp->key = oldnp->key; + newnp->data = oldnp->data; + newnp->from = &(array.ht[h]); + array.ht[h] = newnp; + } + free(x3a->tbl); + *x3a = array; + } + /* Insert the new data */ + h = ph & (x3a->size-1); + np = &(x3a->tbl[x3a->count++]); + np->key = key; + np->data = data; + if( x3a->ht[h] ) x3a->ht[h]->from = &(np->next); + np->next = x3a->ht[h]; + x3a->ht[h] = np; + np->from = &(x3a->ht[h]); + return 1; +} + +/* Return a pointer to data assigned to the given key. Return NULL +** if no such key. */ +struct state *State_find(struct config *key) +{ + int h; + x3node *np; + + if( x3a==0 ) return 0; + h = statehash(key) & (x3a->size-1); + np = x3a->ht[h]; + while( np ){ + if( statecmp(np->key,key)==0 ) break; + np = np->next; + } + return np ? np->data : 0; +} + +/* Return an array of pointers to all data in the table. +** The array is obtained from malloc. Return NULL if memory allocation +** problems, or if the array is empty. */ +struct state **State_arrayof(void) +{ + struct state **array; + int i,size; + if( x3a==0 ) return 0; + size = x3a->count; + array = (struct state **)malloc( sizeof(struct state *)*size ); + if( array ){ + for(i=0; itbl[i].data; + } + return array; +} + +/* Hash a configuration */ +PRIVATE int confighash(struct config *a) +{ + int h=0; + h = h*571 + a->rp->index*37 + a->dot; + return h; +} + +/* There is one instance of the following structure for each +** associative array of type "x4". +*/ +struct s_x4 { + int size; /* The number of available slots. */ + /* Must be a power of 2 greater than or */ + /* equal to 1 */ + int count; /* Number of currently slots filled */ + struct s_x4node *tbl; /* The data stored here */ + struct s_x4node **ht; /* Hash table for lookups */ +}; + +/* There is one instance of this structure for every data element +** in an associative array of type "x4". +*/ +typedef struct s_x4node { + struct config *data; /* The data */ + struct s_x4node *next; /* Next entry with the same hash */ + struct s_x4node **from; /* Previous link */ +} x4node; + +/* There is only one instance of the array, which is the following */ +static struct s_x4 *x4a; + +/* Allocate a new associative array */ +void Configtable_init(void){ + if( x4a ) return; + x4a = (struct s_x4*)malloc( sizeof(struct s_x4) ); + if( x4a ){ + x4a->size = 64; + x4a->count = 0; + x4a->tbl = (x4node*)malloc( + (sizeof(x4node) + sizeof(x4node*))*64 ); + if( x4a->tbl==0 ){ + free(x4a); + x4a = 0; + }else{ + int i; + x4a->ht = (x4node**)&(x4a->tbl[64]); + for(i=0; i<64; i++) x4a->ht[i] = 0; + } + } +} +/* Insert a new record into the array. Return TRUE if successful. +** Prior data with the same key is NOT overwritten */ +int Configtable_insert(struct config *data) +{ + x4node *np; + int h; + int ph; + + if( x4a==0 ) return 0; + ph = confighash(data); + h = ph & (x4a->size-1); + np = x4a->ht[h]; + while( np ){ + if( Configcmp(np->data,data)==0 ){ + /* An existing entry with the same key is found. */ + /* Fail because overwrite is not allows. */ + return 0; + } + np = np->next; + } + if( x4a->count>=x4a->size ){ + /* Need to make the hash table bigger */ + int i,size; + struct s_x4 array; + array.size = size = x4a->size*2; + array.count = x4a->count; + array.tbl = (x4node*)malloc( + (sizeof(x4node) + sizeof(x4node*))*size ); + if( array.tbl==0 ) return 0; /* Fail due to malloc failure */ + array.ht = (x4node**)&(array.tbl[size]); + for(i=0; icount; i++){ + x4node *oldnp, *newnp; + oldnp = &(x4a->tbl[i]); + h = confighash(oldnp->data) & (size-1); + newnp = &(array.tbl[i]); + if( array.ht[h] ) array.ht[h]->from = &(newnp->next); + newnp->next = array.ht[h]; + newnp->data = oldnp->data; + newnp->from = &(array.ht[h]); + array.ht[h] = newnp; + } + free(x4a->tbl); + *x4a = array; + } + /* Insert the new data */ + h = ph & (x4a->size-1); + np = &(x4a->tbl[x4a->count++]); + np->data = data; + if( x4a->ht[h] ) x4a->ht[h]->from = &(np->next); + np->next = x4a->ht[h]; + x4a->ht[h] = np; + np->from = &(x4a->ht[h]); + return 1; +} + +/* Return a pointer to data assigned to the given key. Return NULL +** if no such key. */ +struct config *Configtable_find(struct config *key) +{ + int h; + x4node *np; + + if( x4a==0 ) return 0; + h = confighash(key) & (x4a->size-1); + np = x4a->ht[h]; + while( np ){ + if( Configcmp(np->data,key)==0 ) break; + np = np->next; + } + return np ? np->data : 0; +} + +/* Remove all data from the table. Pass each data to the function "f" +** as it is removed. ("f" may be null to avoid this step.) */ +void Configtable_clear(int(*f)(struct config *)) +{ + int i; + if( x4a==0 || x4a->count==0 ) return; + if( f ) for(i=0; icount; i++) (*f)(x4a->tbl[i].data); + for(i=0; isize; i++) x4a->ht[i] = 0; + x4a->count = 0; + return; +} diff --git a/slow-tests/sm/integration/lemon/script.py b/slow-tests/sm/integration/lemon/script.py new file mode 100644 index 00000000..f5439a23 --- /dev/null +++ b/slow-tests/sm/integration/lemon/script.py @@ -0,0 +1,23 @@ +# -*- coding: utf-8 -*- +# Copyright 2012, 2013 David Malcolm +# Copyright 2012, 2013 Red Hat, Inc. +# +# This is free software: you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see +# . + +from sm import main +from sm.parser import parse_file + +checker = parse_file('sm/checkers/malloc_checker.sm') +main([checker]) diff --git a/slow-tests/sm/integration/lemon/stdout.txt b/slow-tests/sm/integration/lemon/stdout.txt new file mode 100644 index 00000000..e69de29b diff --git a/sm/__init__.py b/sm/__init__.py new file mode 100644 index 00000000..f704a641 --- /dev/null +++ b/sm/__init__.py @@ -0,0 +1,86 @@ +# Copyright 2012, 2013 David Malcolm +# Copyright 2012, 2013 Red Hat, Inc. +# +# This is free software: you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see +# . + +import gcc + +from sm.options import Options +from sm.solver import Context, solve +from sm.utils import Timer + +class IpaSmPass(gcc.IpaPass): + def __init__(self, checkers, options, selftest): + gcc.IpaPass.__init__(self, 'sm-ipa-pass') + self.checkers = checkers + self.options = options + self.selftest = selftest + + def execute(self): + if self.options.during_lto: + # LTO pass: + # Only run the analysis during the link, within lto1, not for each + # cc1 invocation: + if not gcc.is_lto(): + return + + if 0: + from gccutils import callgraph_to_dot, invoke_dot + dot = callgraph_to_dot() + invoke_dot(dot, name='callgraph') + + # Interprocedural implementation, using the supergraph of all calls: + from gccutils.graph.supergraph import Supergraph + sg = Supergraph(split_phi_nodes=True, add_fake_entry_node=True) + if self.options.show_supergraph: + dot = sg.to_dot('supergraph') + from gccutils import invoke_dot + # print(dot) + invoke_dot(dot, name='supergraph') + + for checker in self.checkers: + for sm in checker.sms: + ctxt = Context(checker, sm, sg, self.options) + + def run(): + with Timer(ctxt, 'running %s' % sm.name): + solve(ctxt, 'solution', self.selftest) + + if self.options.enable_profile: + # Profiled version: + import cProfile + prof_filename = '%s.%s.sm-profile' % (gcc.get_dump_base_name(), + sm.name) + try: + cProfile.runctx('run()', + globals(), locals(), + filename=prof_filename) + finally: + import pstats + prof = pstats.Stats(prof_filename) + prof.sort_stats('cumulative').print_stats(20) + else: + # Unprofiled version: + run() + +def main(checkers, options=None, selftest=None): + if not options: + options = Options() + + # Run as an interprocedural pass (over SSA gimple), potentially + # during lto1: + ps = IpaSmPass(checkers, options, selftest) + ps.register_before('whole-program') + diff --git a/sm/checker.py b/sm/checker.py new file mode 100644 index 00000000..93058113 --- /dev/null +++ b/sm/checker.py @@ -0,0 +1,853 @@ +# Copyright 2012, 2013 David Malcolm +# Copyright 2012, 2013 Red Hat, Inc. +# +# This is free software: you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see +# . + +from ast import parse, increment_lineno +import sys + +import gcc + +from gccutils import get_nonnull_arguments +from gccutils.graph.supergraph import ReturnNode + +from sm.error import gccexpr_to_str, Error +from sm.utils import simplify + +def indent(text): + return '\n'.join([' %s' % line + for line in text.splitlines()]) + +class Checker: + # Top-level object representing a .sm file + def __init__(self, sms): + self.sms = sms # list of Sm + + def __repr__(self): + return 'Checker(%r)' % self.sms + + def __str__(self): + return '\n'.join(str(sm) for sm in self.sms) + + def to_dot(self, name): + from sm.dot import checker_to_dot + return checker_to_dot(self, name) + + def accept(self, visitor): + visitor.visit(self) + for sm in self.sms: + sm.accept(visitor) + +class Sm: + def __init__(self, name, clauses): + self.name = name + self.clauses = clauses + + def __str__(self): + result = 'sm %s {\n' % self.name + for clause in self.clauses: + result += indent(str(clause)) + '\n\n' + result += '}\n' + return result + + def __repr__(self): + return ('Sm(name=%r, clauses=%r)' + % (self.name, self.clauses)) + + def iter_statenames(self): + statenames = set() + for sc in self.clauses: + if isinstance(sc, StateClause): + for statename in sc.statelist: + if statename.endswith('*'): + continue + if statename not in statenames: + statenames.add(statename) + yield statename + + def accept(self, visitor): + visitor.visit(self) + for sc in self.clauses: + sc.accept(visitor) + +class Clause: + # top-level item within an sm + def accept(self, visitor): + visitor.visit(self) + +class Decl(Clause): + # a matchable thing + def __init__(self, has_state, name): + self.has_state = has_state + self.name = name + + def __str__(self): + return ('%sdecl %s %s;\n' + % ('stateful ' if self.has_state else '', + self.kind, + self.name)) + + def __hash__(self): + return hash(self.name) + + @classmethod + def make(cls, has_state, declkind, name): + if declkind == 'any_pointer': + return AnyPointer(has_state, name) + elif declkind == 'any_variable': + return AnyVariable(has_state, name) + elif declkind == 'any_expr': + return AnyExpr(has_state, name) + raise UnknownDeclkind(declkind) + + def __repr__(self): + return '%s(%r)' % (self.__class__.__name__, self.name) + + def __eq__(self, other): + if self.__class__ == other.__class__: + if self.name == other.name: + return True + + def matched_by(self, gccexpr): + print(self) + raise NotImplementedError() + +class AnyPointer(Decl): + kind = 'any_pointer' + def matched_by(self, gccexpr): + return isinstance(gccexpr.type, gcc.PointerType) + +class AnyVariable(Decl): + kind = 'any_variable' + def matched_by(self, gccexpr): + return isinstance(gccexpr, (gcc.VarDecl, gcc.ParmDecl)) + +class AnyExpr(Decl): + kind = 'any_expr' + def matched_by(self, gccexpr): + return True + +class NamedPattern(Clause): + """ + The definition of a named pattern + """ + def __init__(self, name, pattern): + self.name = name + self.pattern = pattern + + def __str__(self): + return 'pat %s %s;' % (self.name, self.pattern) + +class PythonFragment(Clause): + """ + A fragment of Python, with a line number offset so that tracebacks + give the correct location within the original .sm file + """ + def __init__(self, src, lineoffset): + self.src = src + from sm.parser import DEBUG_LINE_NUMBERING + if DEBUG_LINE_NUMBERING: + print('setting self.lineoffset = %r' % lineoffset) + print(' repr(src): %r' % src) + self.lineoffset = lineoffset + self._code = None + + def compile(self, filename): + if not filename: + filename = '' + + expr, num_stripped_lines = self._get_source() + from sm.parser import DEBUG_LINE_NUMBERING + if DEBUG_LINE_NUMBERING: + print('num_stripped_lines: %i' % num_stripped_lines) + print('self.lineoffset: %i' % self.lineoffset) + try: + astroot = parse(expr, filename) + except SyntaxError, err: + err.lineno += self.lineoffset + num_stripped_lines + raise err + increment_lineno(astroot, self.lineoffset + num_stripped_lines) + self._code = compile(astroot, filename, 'exec') + + def __str__(self): + return '{{%s}}' % self.src + def __repr__(self): + return '%s(%r)' % (self.__class__.__name__, self.src, ) + def __eq__(self, other): + if self.__class__ == other.__class__: + if self.src == other.src: + if self.lineoffset == other.lineoffset: + return True + + def _get_source(self): + # Get at python code + lines = self.src.splitlines() + # Strip leading fully-whitespace lines: + num_stripped_lines = 0 + while lines and (lines[0] == '' or lines[0].isspace()): + lines = lines[1:] + num_stripped_lines += 1 + def try_to_fix_indent(): + # Locate any source-wide indentation based on indentation of first non-whitespace line: + indent = len(lines[0]) - len(lines[0].lstrip()) + outdented_lines = [] + for line in lines: + prefix = line[:indent] + if not (prefix == '' or prefix.isspace()): + # indentation error + return lines + outdented_lines.append(line[indent:]) + return outdented_lines + + lines = try_to_fix_indent() + return '\n'.join(lines), num_stripped_lines + + def get_code(self): + assert self._code + return self._code + +class StateClause(Clause): + def __init__(self, statelist, patternrulelist): + self.statelist = statelist + self.patternrulelist = patternrulelist + + def __str__(self): + result = ('%s:\n' + % (', '.join([str(state) + for state in self.statelist]))) + prs = '\n| '.join([str(pr) + for pr in self.patternrulelist]) + result += indent(prs) + return result + + def __repr__(self): + return 'StateClause(statelist=%r, patternrulelist=%r)' % (self.statelist, self.patternrulelist) + + def accept(self, visitor): + visitor.visit(self) + for pr in self.patternrulelist: + pr.accept(visitor) + +class PatternRule: + def __init__(self, pattern, outcomes): + self.pattern = pattern + self.outcomes = outcomes + + def __str__(self): + result = '%s => ' % self.pattern + result += ', '.join([str(outcome) + for outcome in self.outcomes]) + result += ';' + return result + + def __repr__(self): + return 'PatternRule(pattern=%r, outcomes=%r)' % (self.pattern, self.outcomes) + + def accept(self, visitor): + visitor.visit(self) + for outcome in self.outcomes: + outcome.accept(visitor) + +class Match: + """ + A match of a pattern + """ + def __init__(self, pattern, node): + self.pattern = pattern + self.node = node + self._dict = {} + + def __eq__(self, other): + if isinstance(other, Match): + return self.pattern == other.pattern and self._dict == other._dict + + def __hash__(self): + return hash(self.pattern) + + def __repr__(self): + return 'Match(%r, %r)' % (self.pattern, self._dict) + + def description(self, ctxt): + return self.pattern.description(self, ctxt) + + def match_term(self, ctxt, gccexpr, smexpr): + """ + Determine whether gccexpr matches smexpr; + if it does, add it to this Match's dictionary + """ + if 0: + ctxt.debug('Match.match_term(self=%r, ctxt=%r, gccexpr=%r, smexpr=%r)', + self, ctxt, gccexpr, smexpr) + gccexpr = ctxt.compare(gccexpr, smexpr) + if gccexpr: + if isinstance(smexpr, str): + decl = ctxt.lookup_decl(smexpr) + gccexpr = simplify(gccexpr) + self._dict[decl] = gccexpr + return True + + def describe(self, ctxt, smexpr): + ctxt.debug('Match.describe(self=%r, smexpr=%r)', self, smexpr) + if isinstance(smexpr, str): + smdecl = ctxt.lookup_decl(smexpr) + vardecl = self._dict[smdecl] + return gccexpr_to_str(ctxt, self.node, vardecl) + else: + return gccexpr_to_str(ctxt, self.node, smexpr) + + def describe_stateful_smexpr(self, ctxt): + gccvar = self.get_stateful_gccvar(ctxt) + return str(gccvar) + + def get_stateful_gccvar(self, ctxt): + return self._dict[ctxt._stateful_decl] + + def iter_binding(self): + for decl, gccexpr in self._dict.iteritems(): + yield (decl, gccexpr) + +class Pattern: + def iter_matches(self, stmt, edge, ctxt): + print('self: %r' % self) + raise NotImplementedError() + + def description(self, match, ctxt): + print('self: %r' % self) + raise NotImplementedError() + + def __hash__(self): + return id(self) + +class Assignment(Pattern): + def __init__(self, lhs, rhs): + self.lhs = lhs + self.rhs = rhs + def __repr__(self): + return 'Assignment(lhs=%r, rhs=%r)' % (self.lhs, self.rhs) + def __str__(self): + return '{ %s = %s }' % (self.lhs, self.rhs) + def __eq__(self, other): + if self.__class__ == other.__class__: + if self.lhs == other.lhs: + if self.rhs == other.rhs: + return True + + def iter_matches(self, stmt, edge, ctxt): + if isinstance(stmt, gcc.GimpleAssign): + if len(stmt.rhs) == 1: + m = Match(self, edge.srcnode) + if m.match_term(ctxt, stmt.lhs, self.lhs): + if m.match_term(ctxt, stmt.rhs[0], self.rhs): + yield m + + def description(self, match, ctxt): + return ('%s assigned to %s' + % (match.describe(ctxt, self.lhs), + match.describe(ctxt, self.rhs))) + +class AddressOf(Pattern): + def __init__(self, lhs, rhs): + self.lhs = lhs + self.rhs = rhs + def __repr__(self): + return 'AddressOf(lhs=%r, rhs=%r)' % (self.lhs, self.rhs) + def __str__(self): + return '{ %s = &%s }' % (self.lhs, self.rhs) + def __eq__(self, other): + if self.__class__ == other.__class__: + if self.lhs == other.lhs: + if self.rhs == other.rhs: + return True + def iter_matches(self, stmt, edge, ctxt): + if isinstance(stmt, gcc.GimpleAssign): + if stmt.exprcode == gcc.AddrExpr: + m = Match(self, edge.srcnode) + if m.match_term(ctxt, stmt.lhs, self.lhs): + if m.match_term(ctxt, stmt.rhs[0], self.rhs): + yield m + + def description(self, match, ctxt): + return ('assignment of %s to &%s' % + (match.describe(ctxt, self.lhs), + match.describe(ctxt, self.rhs))) + +class Return(Pattern): + def __init__(self, retval): + self.retval = retval + def __repr__(self): + return 'Return(retval=%r)' % (self.retval, ) + def __str__(self): + return '{ return %s }' % (self.retval, ) + def __eq__(self, other): + if self.__class__ == other.__class__: + if self.retval == other.retval: + return True + def iter_matches(self, stmt, edge, ctxt): + if isinstance(stmt, gcc.GimpleReturn): + m = Match(self, edge.srcnode) + if self.retval is not None: + if m.match_term(ctxt, stmt.retval, self.retval): + yield m + else: + if stmt.retval is None: + yield m + + def description(self, match, ctxt): + if self.retval is not None: + return ('return of %s' % + match.describe(ctxt, self.retval)) + else: + return 'return' + +class FunctionCall(Pattern): + def __init__(self, fnname, args): + self.fnname = fnname + self.args = args + + def __str__(self): + return '{ %s(...) }' % self.fnname + + def iter_matches(self, stmt, edge, ctxt): + if not isinstance(stmt, gcc.GimpleCall): + return + + if not isinstance(stmt.fn, gcc.AddrExpr): + return + + if not isinstance(stmt.fn.operand, gcc.FunctionDecl): + return + + if stmt.fn.operand.name != self.fnname: + return + + # We have a matching function name: + m = Match(self, edge.srcnode) + def matches_args(): + for i, arg in enumerate(self.args): + if not m.match_term(ctxt, stmt.args[i], arg): + if 0: + print('arg match failed on: %i %s %s' + % (i, arg, stmt.args[i])) + return False + return True + if matches_args(): + yield m + +class ResultOfFnCall(FunctionCall): + def __init__(self, lhs, fnname, args): + FunctionCall.__init__(self, fnname, args) + self.lhs = lhs + + def __repr__(self): + return 'ResultOfFnCall(lhs=%r, fnname=%r, args=%r)' % (self.lhs, self.fnname, self.args) + def __str__(self): + return ('{ %s = %s(%s) }' + % (self.lhs, + self.fnname, + ', '.join([str(arg) for arg in self.args]))) + def __eq__(self, other): + if self.__class__ == other.__class__: + if self.lhs == other.lhs: + if self.fnname == other.fnname: + if self.args == other.args: + return True + + def iter_matches(self, stmt, edge, ctxt): + for m in FunctionCall.iter_matches(self, stmt, edge, ctxt): + if m.match_term(ctxt, stmt.lhs, self.lhs): + yield m + + def description(self, match, ctxt): + return ('%s assigned to the result of %s(%s)' + % (match.describe(ctxt, self.lhs), self.fnname, + ', '.join([match.describe(ctxt, arg) + for arg in self.args]))) + + +class ArgsOfFnCall(FunctionCall): + def __repr__(self): + return 'ArgsOfFnCall(fnname=%r, args=%r)' % (self.fnname, self.args) + def __str__(self): + return '{ %s(%s) } ' % (self.fnname, + ', '.join([str(arg) + for arg in self.args])) + def __eq__(self, other): + if self.__class__ == other.__class__: + if self.fnname == other.fnname: + if self.args == other.args: + return True + + def description(self, match, ctxt): + return ('%s passed to %s()' + % (match.get_stateful_gccvar(ctxt), self.fnname)) + +class Comparison(Pattern): + def __init__(self, lhs, op, rhs): + self.lhs = lhs + self.op = op + self.rhs = rhs + def __repr__(self): + return 'Comparison(%r, %r, %r)' % (self.lhs, self.op, self.rhs) + def __str__(self): + return '{ %s %s %s }' % (self.lhs, self.op, self.rhs) + def __eq__(self, other): + if self.__class__ == other.__class__: + if self.lhs == other.lhs: + if self.op == other.op: + if self.rhs == other.rhs: + return True + + def iter_matches(self, stmt, edge, ctxt): + if isinstance(stmt, gcc.GimpleCond): + if 0: + print(' %r %r %r %r %r' % (stmt.lhs, stmt.rhs, stmt.exprcode, stmt.true_label, stmt.false_label)) + print('edge: %r' % edge) + print('edge.true_value: %r' % edge.true_value) + print('edge.false_value: %r' % edge.false_value) + + # For now, specialcase: + codes_for_ops = {'==' : gcc.EqExpr, + '!=' : gcc.NeExpr, + '<' : gcc.LtExpr, + '<=' : gcc.LeExpr, + '>' : gcc.GtExpr, + '>=' : gcc.GeExpr} + exprcode = codes_for_ops[self.op] + if stmt.exprcode == exprcode: + m = Match(self, edge.srcnode) + if m.match_term(ctxt, stmt.lhs, self.lhs): + if m.match_term(ctxt, stmt.rhs, self.rhs): + yield m + + def description(self, match, ctxt): + return ('%s compared against %s' + % (match.describe(ctxt, self.lhs), + match.describe(ctxt, self.rhs))) + +class VarDereference(Pattern): + def __init__(self, var): + self.var = var + def __repr__(self): + return 'VarDereference(var=%r)' % self.var + def __str__(self): + return '{ *%s }' % self.var + def __eq__(self, other): + if self.__class__ == other.__class__: + if self.var == other.var: + return True + def iter_matches(self, stmt, edge, ctxt): + def check_for_match(node, loc): + if isinstance(node, gcc.MemRef): + if ctxt.compare(node.operand, self.var): + return True + # We don't care about the args during return-handling: + if isinstance(edge.srcnode, ReturnNode): + return + t = stmt.walk_tree(check_for_match, stmt.loc) + if t: + m = Match(self, edge.srcnode) + m.match_term(ctxt, t.operand, self.var) + yield m + + def description(self, match, ctxt): + return ('dereference of %s' + % (match.describe(ctxt, self.var))) + +class ArrayLookup(Pattern): + def __init__(self, array, index): + self.array = array + self.index = index + def __repr__(self): + return 'ArrayLookup(array=%r, index=%r)' % (self.array, self.index) + def __str__(self): + return '{ %s[%s] }' % (self.array, self.index) + def __eq__(self, other): + if self.__class__ == other.__class__: + if self.array == other.array: + if self.index == other.index: + return True + def iter_matches(self, stmt, edge, ctxt): + def check_for_match(node, loc): + if isinstance(node, gcc.ArrayRef): + return True + t = stmt.walk_tree(check_for_match, stmt.loc) + if t: + m = Match(self, edge.srcnode) + if m.match_term(ctxt, t.array, self.array): + if m.match_term(ctxt, t.index, self.index): + yield m + + def description(self, match, ctxt): + return ('%s[%s]' + % (match.describe(ctxt, self.array), + match.describe(ctxt, self.index))) + +class VarUsage(Pattern): + def __init__(self, var): + self.var = var + def __repr__(self): + return 'VarUsage(var=%r)' % self.var + def __str__(self): + return '{ %s }' % self.var + def __eq__(self, other): + if self.__class__ == other.__class__: + if self.var == other.var: + return True + def iter_matches(self, stmt, edge, ctxt): + def check_for_match(node, loc): + # print('check_for_match(%r, %r)' % (node, loc)) + if isinstance(node, (gcc.VarDecl, gcc.ParmDecl, gcc.SsaName)): + if ctxt.compare(node, self.var): + return True + # We don't care about the args during return-handling: + if isinstance(edge.srcnode, ReturnNode): + return + t = stmt.walk_tree(check_for_match, stmt.loc) + if t: + m = Match(self, edge.srcnode) + m.match_term(ctxt, t, self.var) + yield m + + def description(self, match, ctxt): + return ('usage of %s' % match.describe(ctxt, self.var)) + +class NamedPatternReference(Pattern): + def __init__(self, name): + self.name = name + + def __str__(self): + return self.name + + def __repr__(self): + return 'NamedPatternReference(%r)' % self.name + + def iter_matches(self, stmt, edge, ctxt): + namedpattern = ctxt.lookup_pattern(self.name) + return namedpattern.pattern.iter_matches(stmt, edge, ctxt) + +class SpecialPattern(Pattern): + def __init__(self, name): + self.name = name + + def __str__(self): + return '$%s$' % self.name + + def __eq__(self, other): + if self.__class__ == other.__class__: + return True + + @classmethod + def make(cls, name): + if name == 'arg_must_not_be_null': + return NonnullArg(name) + + class UnknownSpecialPattern(Exception): + def __init__(self, name): + self.name = name + raise UnknownSpecialPattern(name) + +class NonnullArg(SpecialPattern): + def iter_matches(self, stmt, edge, ctxt): + # (this is similar to FunctionCall.iter_matches) + + ctxt.debug('NonNullArg.iter_matches(%s, %s)', stmt, edge) + + if not isinstance(stmt, gcc.GimpleCall): + return + + for argindex in get_nonnull_arguments(stmt.fn.type.dereference): + m = Match(self, edge.srcnode) + gccexpr = stmt.args[argindex] + gccexpr = simplify(gccexpr) + m._dict[ctxt._stateful_decl] = gccexpr + class FakeExpr: + def __init__(self, name): + self.name = name + def __str__(self): + return self.name + def __repr__(self): + return 'FakeExpr(%r)' % self.name + m._dict[FakeExpr('argindex')] = argindex + m._dict[FakeExpr('argnumber')] = argindex + 1 + m._dict[FakeExpr('function')] = stmt.fn.operand + # 'parameter' is only available if we also have the function + # definition, not just the declaration: + if stmt.fn.operand.arguments: + m._dict[FakeExpr('parameter')] = \ + stmt.fn.operand.arguments[argindex] + else: + m._dict[FakeExpr('parameter')] = None + yield m + + def description(self, match, ctxt): + return '%s used as must-not-be-NULL argument' % match.get_stateful_gccvar(ctxt) + +class OrPattern(Pattern): + """ + A compound pattern which matches if any of its component patterns match + """ + def __init__(self, *patterns): + self.patterns = patterns + # Fold away: + # OrPattern(pat1, OrPattern(pat2, OrPattern(pat3, ...))) + # to: + # OrPattern(pat1, pat2, pat3, ...) + if isinstance(self.patterns[-1], OrPattern): + self.patterns = self.patterns[:-1] + self.patterns[-1].patterns + + def __repr__(self): + return 'OrPattern(patterns=%r)' % (self.patterns,) + + def __str__(self): + return ' | '.join([str(pat) + for pat in self.patterns]) + + def iter_matches(self, stmt, edge, ctxt): + for pattern in self.patterns: + for match in pattern.iter_matches(stmt, edge, ctxt): + yield match + +class Outcome: + def accept(self, visitor): + visitor.visit(self) + +class TransitionTo(Outcome): + def __init__(self, statename): + self.statename = statename + def __str__(self): + return str(self.statename) + def __repr__(self): + return 'TransitionTo(statename=%r)' % self.statename + def __eq__(self, other): + if self.__class__ == other.__class__: + if self.statename == other.statename: + return True + + def get_effect_for_state(self, ctxt, edge, match, state): + from sm.solver import State + dststate = State(self.statename) + return Effect(dststate, []) + + def iter_reachable_statenames(self): + yield self.statename + +class BooleanOutcome(Outcome): + def __init__(self, guard, outcome): + self.guard = guard + self.outcome = outcome + def __str__(self): + return ('%s=%s' % ('true' if self.guard else 'false', + self.outcome)) + def __repr__(self): + return 'BooleanOutcome(guard=%r, outcome=%r)' % (self.guard, self.outcome) + def __eq__(self, other): + if self.__class__ == other.__class__: + if self.guard == other.guard: + if self.outcome == other.outcome: + return True + + def iter_reachable_statenames(self): + for statename in self.outcome.iter_reachable_statenames(): + yield statename + + def accept(self, visitor): + visitor.visit(self) + self.outcome.accept(visitor) + + # get_effect_for_state() is never called on a BooleanOutcome; the + # BooleanOutcome is always resolved per-edge into the guarded outcome, + # or discarded. This happens within find_possible_matches() + +class BoundVariable: + """ + A variable exposed to a Python script fragment + """ + def __init__(self, ctxt, supernode, gccexpr): + self.ctxt = ctxt + self.supernode = supernode + self.gccexpr = gccexpr + + def __str__(self): + return gccexpr_to_str(self.ctxt, self.supernode, self.gccexpr) + + def __getattr__(self, name): + return getattr(self.gccexpr, name) + +class Effect: + """ + The result of applying an Outcome to a particular state. + """ + def __init__(self, dststate, errors): + from sm.solver import State + assert isinstance(dststate, State) + self.dststate = dststate + self.errors = errors + +class PythonOutcome(Outcome, PythonFragment): + def iter_reachable_statenames(self): + return [] + + def get_effect_for_state(self, ctxt, edge, match, state): + """ + Generate a PythonEffect for this fragment on the given + edge/match/state input + """ + code = self.get_code() + errors = [] + + # Create environment for execution of the code: + def error(msg, cwe=None): + # Locate the caller, so that we add it to the Error object: + caller = sys._getframe().f_back + errors.append(Error(edge.srcnode, match, msg, globals_['state'], cwe, + sm_filename=caller.f_code.co_filename, + sm_lineno=caller.f_lineno)) + + def set_state(name, **kwargs): + from sm.solver import State + ctxt.debug('set_state(%r, %r)', name, kwargs) + globals_['state'] = State(name, **kwargs) + + globals_ = {'error' : error, + 'set_state' : set_state, + 'state': state} + ctxt.python_globals.update(globals_) + + # Bind the names for the matched Decls + # For example, when: + # state decl any_pointer ptr; + # has been matched by: + # void *q; + # then we bind the string "ptr" to the gcc.VarDecl for q + # (which has str() == 'q') + locals_ = {} + for decl, value in match.iter_binding(): + locals_[decl.name] = BoundVariable(ctxt, edge.srcnode, value) + ctxt.python_locals.update(locals_) + + if 0: + print(' globals_: %r' % globals_) + print(' locals_: %r' % locals_) + # Now run the code: + ctxt.debug('state before: %r', globals_['state']) + ctxt.log('evaluating python code') + result = eval(code, ctxt.python_globals, ctxt.python_locals) + ctxt.debug('state after: %r', globals_['state']) + + # Clear the binding: + for name in locals_: + del ctxt.python_locals[name] + + return Effect(globals_['state'], errors) diff --git a/sm/checkers/malloc_checker.sm b/sm/checkers/malloc_checker.sm new file mode 100644 index 00000000..f206d9c4 --- /dev/null +++ b/sm/checkers/malloc_checker.sm @@ -0,0 +1,69 @@ +/* + Work-in-progress checker for malloc/free usage + */ +sm malloc_checker { + stateful decl any_pointer ptr; + + ptr.*: + { ptr = malloc() } => ptr.unchecked; + + ptr.*: + { ptr = 0 } => ptr.null; + + ptr.*: + { ptr == 0 } => true=ptr.null, false=ptr.nonnull + | { ptr != 0 } => true=ptr.nonnull, false=ptr.null + ; + + ptr.unchecked: + { *ptr } + => {{ + error('dereference of possibly-NULL pointer %s' % ptr, + # "CWE-690: Unchecked Return Value to NULL Pointer Dereference" + cwe='CWE-690') + }}, ptr.nonnull; + + ptr.null: + { *ptr } + => {{ + error('dereference of NULL pointer %s' % ptr, + # "CWE-690: Unchecked Return Value to NULL Pointer Dereference" + cwe='CWE-690') + }}, ptr.stop; + + ptr.start, ptr.unchecked, ptr.null, ptr.nonnull: + { free(ptr) } => ptr.free; + + ptr.free: + { free(ptr) } + => {{ + error('double-free of %s' % ptr, + # "CWE-415: Double Free" + cwe='CWE-415') + }}, ptr.stop + | { ptr } + => {{ + error('use-after-free of %s' % ptr, + # "CWE-416: Use After Free" + cwe='CWE-416') + }}, ptr.stop + ; + + ptr.unchecked: + $arg_must_not_be_null$ + => {{ + error('possibly-NULL pointer %s passed as argument %i to %s' + % (ptr, argnumber, function), + # "CWE-690: Unchecked Return Value to NULL Pointer Dereference" + cwe='CWE-690') + }}, ptr.nonnull; + + ptr.null: + $arg_must_not_be_null$ + => {{ + error('NULL pointer %s passed as argument %i to %s' + % (ptr, argnumber, function), + # "CWE-690: Unchecked Return Value to NULL Pointer Dereference" + cwe='CWE-690') + }}, ptr.stop; +} diff --git a/sm/checkers/points_to_stack.sm b/sm/checkers/points_to_stack.sm new file mode 100644 index 00000000..6b52d91a --- /dev/null +++ b/sm/checkers/points_to_stack.sm @@ -0,0 +1,60 @@ +/* + Copyright 2013 David Malcolm + Copyright 2013 Red Hat, Inc. + + This is free software: you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + . +*/ + +/* + Checker for CWE-562: Return of Stack Variable Address + + This will only detect simple cases: it doesn't attempt to track e.g. + pointers within a return struct, or within a reference graph. + + Also, it potentially exhibits false positives for the case where + a pointer to a variable deeper in the stack is returned by a frame + higher up, where the usage of the area is within the lifetime of + the stack frame holding it. + + */ +sm points_to_stack { + stateful decl any_pointer ptr; + decl any_variable var; + + {{ + def is_on_stack(var): + import gcc + + if isinstance(var.context, gcc.FunctionDecl): + if not var.static: + return True + }} + + ptr.*: + { ptr = &var } => + {{ + if is_on_stack(var): + set_state('ptr.points_to_stack', var_pointed_to=var) + }}; + + ptr.points_to_stack: + { return ptr } + => {{ + error('return of "%s" returns address within stack of local variable "%s"' + % (ptr, state.var_pointed_to), + # "CWE-562: Return of Stack Variable Address": + cwe='CWE-562') + }}; +} \ No newline at end of file diff --git a/sm/checkers/sizeof_allocation.sm b/sm/checkers/sizeof_allocation.sm new file mode 100644 index 00000000..48e472f8 --- /dev/null +++ b/sm/checkers/sizeof_allocation.sm @@ -0,0 +1,91 @@ +/* + Copyright 2012 David Malcolm + Copyright 2012 Red Hat, Inc. + + This is free software: you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + . + + Checker to detect typos of the form: malloc(sizeof(WRONG_TYPE)) +*/ +sm sizeof_allocation { + stateful decl any_pointer ptr; + decl any_pointer other_ptr; + decl any_expr nmemb; + decl any_expr sz; + + {{ + def is_known_int(var): + # We can only run the checker on constant integer expressions + import gcc + return isinstance(var.gccexpr, gcc.IntegerCst) + + def check_size(ptr, allocated_size): + import gcc + type_pointed_to = ptr.type.dereference + if not isinstance(type_pointed_to, gcc.VoidType): + required_size = type_pointed_to.sizeof + if allocated_size < required_size: + error("allocation too small: pointer to %s (%i bytes)" + " initialized with allocation of %i bytes" + % (type_pointed_to, required_size, allocated_size), + cwe="CWE-131") # "Incorrect Calculation of Buffer Size" + + # Handle cases where the cast happens on another line: + set_state("ptr.sized", size=int(allocated_size)) + }} + + ptr.*: + /* Various allocators: */ + /* libc: */ + { ptr = malloc(sz) } + | { ptr = realloc(other_ptr, sz) } + /* libpython: */ + | { ptr = PyMem_Malloc(sz) } + | { ptr = PyMem_Realloc(other_ptr, sz) } + /* glib: */ + | { ptr = g_malloc(sz) } + | { ptr = g_malloc0(sz) } + | { ptr = g_realloc(other_ptr, sz) } + | { ptr = g_try_malloc(sz) } + | { ptr = g_try_malloc0(sz) } + | { ptr = g_try_realloc(other_ptr, sz) } + /* etc */ + => {{ + if is_known_int(sz): + check_size(ptr, sz) + }}; + + ptr.*: + /* libc: */ + { ptr = calloc(nmemb, sz) } + /* glib: */ + | { ptr = g_malloc_n(nmemb, sz) } + | { ptr = g_malloc0_n(nmemb, sz) } + | { ptr = g_realloc_n(other_pointer, nmemb, sz) } + | { ptr = g_try_malloc_n(nmemb, sz) } + | { ptr = g_try_malloc0_n(nmemb, sz) } + | { ptr = g_try_realloc_n(other_pointer, nmemb, sz) } + => {{ + if is_known_int(nmemb): + if is_known_int(sz): + check_size(ptr, int(nmemb) * int(sz)) + }}; + + ptr.sized: + { other_ptr = ptr } => + {{ + check_size(other_ptr, allocated_size=state.size) + }}; + +} diff --git a/sm/checkers/taint.sm b/sm/checkers/taint.sm new file mode 100644 index 00000000..03ac0c92 --- /dev/null +++ b/sm/checkers/taint.sm @@ -0,0 +1,71 @@ +sm taint { + stateful decl any_expr x; + + decl any_expr y; + decl any_expr arr; + decl any_expr size; + decl any_expr nmemb; + decl any_expr stream; + + /* Patterns that detect upper-bound and lower-bound checks: */ + pat check_ub { x < y } | { x <= y }; + pat check_lb { x > y } | { x >= y }; + pat check_eq { x == y }; + pat check_ne { x != y }; + + /* Sources of tainted data: */ + x.*: + /* reading data from files: */ + {fread(x, size, nmemb, stream)} => x.tainted + | {read(fd, x, count)} => x.tainted + ; + + /* + When a tainted value is compared, mark it as now having either + an upper-bound, or a lower-bound: + */ + x.tainted: + check_ub => true=x.has_ub, false=x.has_lb + | check_lb => true=x.has_lb, false=x.has_ub + ; + + /* + If a value has only one bound, check to see if it gains the other bound: + */ + x.has_lb: + check_ub => true=x.ok + | check_lb => false=x.ok + ; + x.has_ub: + check_ub => false=x.ok + | check_lb => true=x.ok + ; + + /* If a value is successfully compared against a specific value, it's OK */ + x.tainted, x.has_lb, x.has_ub: + check_eq => true=x.ok + | check_ne => false=x.ok + ; + + /* Detect when a tainted value is used in a dangerous way: */ + x.tainted: + { arr[x] } => {{ + error('use of tainted value %s in array lookup without bounds checking' % x, + cwe='CWE-129') # "Improper Validation of Array Index" + }}; + + x.has_lb: + { arr[x] } => {{ + error('use of tainted value %s in array lookup without upper-bound check' % x, + cwe='CWE-129') # "Improper Validation of Array Index" + }}; + + /* Unsigned types have an implicit lower bound: */ + x.has_ub: + { arr[x] } => {{ + if not x.type.unsigned: + error('use of tainted value %s in array lookup without lower-bound check' % x, + cwe='CWE-129') # "Improper Validation of Array Index" + }}; + +} diff --git a/sm/dataflow.py b/sm/dataflow.py new file mode 100644 index 00000000..dd2aa104 --- /dev/null +++ b/sm/dataflow.py @@ -0,0 +1,146 @@ +# Copyright 2012, 2013 David Malcolm +# Copyright 2012, 2013 Red Hat, Inc. +# +# This is free software: you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see +# . + +# Dataflow solver: finds the maximum fixed point (MFP) of a set of dataflow +# equations on a graph. + +import gcc + +class AbstractValue: + """ + Roughly speaking, an AbstractValue instance is an item of dataflow + information at a given node; the class knows the initial value for + entry nodes, how to compute the effect of an edge on a value (flow + function), and how to merge together the information for multiple + in-edges at a node. The solver can then use this to analyze a graph, + obtaining an instance per-node. + + More formally, an AbstractValue should be a meet-semilattice with no + infinitely-descending chains. + + Translating the jargon somewhat: a "meet-semilattice" is a + partially-ordered set in which every non-empty finite subset has a + greatest lower bound (or "meet") within the set. Informally, we have + a set of descriptions of the program state at a particular node, and + we can use "meet" on them when control-flow merges to obtain the best + description of the intersection of said information: any value which is + lower than all of the in-values is a coarser description of the program + state than any of them, giving some description of the possible + resulting state. By picking the *greatest* such lower bound, meet() is + picking the finest approximation to program state that's still safe. + + The infinitely-descending chains rule informally means that repeated + calls to meet() should always eventually reach some fixed point after + a finite number of steps, so that the analysis is guaranteed to + terminate. It also implies that there is a bottom element, less than + all other values. + + For more information, see e.g. chapter 3 of + "Data Flow Analysis: Theory and Practice" (2009) + Uday Khedker, Amitabha Sanyal, Bageshri Karkare + """ + @classmethod + def make_entry_point(cls, ctxt, node): + raise NotImplementedError + + @classmethod + def get_edge_value(cls, ctxt, srcvalue, edge): + """ + Generate a (dstvalue, details) pair, where "details" can be of an + arbitrary type (per AbstractValue) and could be None + """ + raise NotImplementedError + + @classmethod + def meet(cls, ctxt, lhs, rhs): + raise NotImplementedError + +def fixed_point_solver(ctxt, graph, cls): + # Given an AbstractValue subclass "cls", find the fixed point, + # generating a dict from Node to cls instance + # Use "None" as the bottom element: unreachable + # otherwise, a cls instance + result = {} + for node in graph.nodes: + result[node] = None + + # FIXME: make this a priority queue, in the node's topological order? + + # Set up worklist: + workset = set() + worklist = [] + for node in graph.get_entry_nodes(): + result[node] = cls.make_entry_point(ctxt, node) + assert result[node] is not None + worklist.append(node) + workset.add(node) + + numiters = 0 + while worklist: + node = worklist.pop() + workset.remove(node) + numiters += 1 + if ctxt.options.enable_timing: + if numiters % 1000 == 0: + ctxt.timing('iter %i: len(worklist): %i analyzing node: %s', + numiters, len(worklist), node) + else: + ctxt.log('iter %i: len(worklist): %i analyzing node: %s', + numiters, len(worklist), node) + with ctxt.indent(): + # Set the location so that if an unhandled + # exception occurs, it should at least identify the + # code that triggered it: + stmt = node.stmt + if stmt: + if stmt.loc: + gcc.set_location(stmt.loc) + + srcvalue = result[node] + ctxt.log('srcvalue: %s', srcvalue) + assert srcvalue is not None + + for edge in node.succs: + ctxt.log('analyzing out-edge: %s', edge) + + dstnode = edge.dstnode + oldvalue = result[dstnode] + + # Get value along outedge: + edgevalue, details = cls.get_edge_value(ctxt, srcvalue, edge) + ctxt.log(' edge value: %s', edgevalue) + ctxt.log(' oldvalue: %s', oldvalue) + + newvalue = cls.meet(ctxt, oldvalue, edgevalue) + + ctxt.log(' newvalue: %s', newvalue) + + if newvalue != oldvalue: + # strictly speaking, newvalue must be < oldvalue, but we + # rely on the AbstractValue to correctly implement that + ctxt.log(' value changed from: %s to %s', + oldvalue, + newvalue) + assert newvalue is not None + result[dstnode] = newvalue + if dstnode not in workset: + worklist.append(dstnode) + workset.add(dstnode) + + ctxt.timing('took %i iterations to reach fixed point', numiters) + return result + diff --git a/sm/dot.py b/sm/dot.py new file mode 100644 index 00000000..e32bf1d8 --- /dev/null +++ b/sm/dot.py @@ -0,0 +1,92 @@ +# Copyright 2012 David Malcolm +# Copyright 2012 Red Hat, Inc. +# +# This is free software: you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see +# . + +from gccutils.dot import to_html + +from sm.checker import TransitionTo, BooleanOutcome, PythonOutcome, \ + StateClause + +def checker_to_dot(checker, name): + result = 'digraph %s {\n' % name + for sm in checker.sms: + result += sm_to_dot(sm) + result += '}\n' + return result + +def statename_to_dot(statename): + return statename.replace('.', '_') + +def pattern_to_dot(pattern): + return str(pattern) + +def python_to_dot(outcome): + return to_html(outcome.src) + +def make_dot_for_pr(statename, pr): + result = '' + for outcome in pr.outcomes: + def make_edge(src, dst, label): + return ' %s -> %s [label=<%s>];\n' % (src, dst, label) + def make_label(condition, guardtext, actiontext): + if guardtext: + guardedtext = '%s %s' % (condition, guardtext) + else: + guardedtext = str(condition) + if actiontext: + return '%s: %s' % (guardedtext, actiontext) + else: + return guardedtext + def edge_for_outcome(outcome, guardtext): + if isinstance(outcome, TransitionTo): + return make_edge(statename_to_dot(statename), + statename_to_dot(outcome.statename), + make_label(pattern_to_dot(pr.pattern), + guardtext, + '')) + elif isinstance(outcome, BooleanOutcome): + return edge_for_outcome(outcome.outcome, + 'is %s' % outcome.guard) + elif isinstance(outcome, PythonOutcome): + return make_edge(statename_to_dot(statename), + statename_to_dot(statename), + make_label(pattern_to_dot(pr.pattern), + guardtext, + python_to_dot(outcome))) + else: + print(outcome) + raise UnknownOutcome(outcome) + result += edge_for_outcome(outcome, '') + return result + +def sm_to_dot(sm): + result = ' subgraph %s {\n' % sm.name + for state in sm.iter_statenames(): + result += ' %s [label=<%s>];\n' % (statename_to_dot(state), state) + result += '\n' + for sc in sm.clauses: + if not isinstance(sc, StateClause): + continue + for statename in sc.statelist: + if statename.endswith('*'): + for expandedname in sm.iter_statenames(): + for pr in sc.patternrulelist: + result += make_dot_for_pr(expandedname, pr) + else: + for pr in sc.patternrulelist: + result += make_dot_for_pr(statename, pr) + result += ' }\n' + return result diff --git a/sm/error.py b/sm/error.py new file mode 100644 index 00000000..15dfc25d --- /dev/null +++ b/sm/error.py @@ -0,0 +1,324 @@ +# Copyright 2012, 2013 David Malcolm +# Copyright 2012, 2013 Red Hat, Inc. +# +# This is free software: you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see +# . + +import gcc + +from gccutils.graph.stmtgraph import ExitNode, SplitPhiNode +from gccutils.graph.supergraph import CallNode, ReturnNode, \ + CallToStart, ExitToReturnSite + +from sm.reporter import Report, Note +from sm.utils import simplify, stateset_to_str, get_retval_aliases + +class PathAnnotations: + """ + Important events along a path + """ + def __init__(self, ctxt, error, path): + # Determine important events within the path + # Walk backwards along it, tracking the expression of importance + # and its state + ctxt.debug('error.match: %s', error.match) + expr = error.match.get_stateful_gccvar(ctxt) + states = frozenset([error.state]) + ctxt.debug('expr, states: %s, %s', expr, states) + self._significant_for_node = {path[-1].dstnode : (expr, states)} + for edge in path[::-1]: + ctxt.debug('edge: %s', edge) + ctxt.debug(' edge.inneredge: %s', edge.inneredge) + ctxt.debug(' edge.match: %s', edge.match) + srcnode = edge.srcnode + dstnode = edge.dstnode + inneredge = edge.inneredge + stmt = srcnode.stmt + if isinstance(edge.inneredge, CallToStart): + # Update the expr of interest based on param/arg mapping: + for param, arg in zip(srcnode.stmt.fndecl.arguments, + srcnode.stmt.args): + param = simplify(param) + arg = simplify(arg) + if expr == param: + expr = arg + elif isinstance(edge.inneredge, ExitToReturnSite): + # Was state propagated through the return value? + if inneredge.calling_stmtnode.stmt.lhs: + exitsupernode = inneredge.srcnode + assert isinstance(exitsupernode.innernode, ExitNode) + retval = simplify(exitsupernode.innernode.returnval) + lhs = simplify(inneredge.calling_stmtnode.stmt.lhs) + if expr == lhs: + expr = retval + + # Did the params change state? + callsite = inneredge.dstnode.callnode.innernode + ctxt.debug('callsite: %s', callsite) + for param, arg in zip(callsite.stmt.fndecl.arguments, + callsite.stmt.args): + param = simplify(param) + arg = simplify(arg) + ctxt.debug('param: %s', param) + ctxt.debug('arg: %s', arg) + if expr == arg: + expr = param + elif isinstance(stmt, gcc.GimpleAssign): + lhs = simplify(stmt.lhs) + if stmt.exprcode == gcc.VarDecl: + rhs = simplify(stmt.rhs[0]) + if expr == lhs: + expr = rhs + elif stmt.exprcode == gcc.ComponentRef: + compref = stmt.rhs[0] + if expr == lhs: + srcsupernode = srcnode.supergraphnode + if ctxt.get_aliases(srcsupernode, compref) in ctxt.states_for_node[srcsupernode]._dict: + expr = compref + else: + expr = compref.target + elif isinstance(stmt, gcc.GimplePhi): + assert isinstance(srcnode.stmtnode, SplitPhiNode) + rhs = simplify(srcnode.stmtnode.rhs) + ctxt.debug(' rhs: %r', rhs) + lhs = simplify(stmt.lhs) + ctxt.debug(' lhs: %r', lhs) + if expr == lhs: + expr = rhs + if edge.match: + if edge.match.get_stateful_gccvar(ctxt) == expr: + # This is a match affecting the expr of interest: + states = srcnode.get_states_for_expr(ctxt, expr) + + ctxt.debug(' expr, states: %s, %s', expr, states) + self._significant_for_node[srcnode] = (expr, states) + + def get_significant_expr_at(self, node): + return self._significant_for_node[node][0] + + def get_significant_states_at(self, node): + return self._significant_for_node[node][1] + +class Error: + # A stored error + def __init__(self, srcnode, match, msg, state, cwe, sm_filename, sm_lineno): + self.srcnode = srcnode + self.match = match + self.msg = msg + self.state = state + + # cwe can be None, or a str of the form "CWE-[0-9]+" + # e.g. "CWE-590" aka "Free of Memory not on the Heap" + # see http://cwe.mitre.org/data/definitions/590.html + self.cwe = cwe + + # Metadata about where in the sm script this error was emitted: + self.sm_filename = sm_filename # so that you can import helper files + self.sm_lineno = sm_lineno + + @property + def gccloc(self): + gccloc = self.srcnode.get_gcc_loc() + if gccloc is None: + gccloc = self.function.end + return gccloc + + @property + def function(self): + return self.srcnode.function + + def __lt__(self, other): + # Provide a sort order, so that they sort into source order + + # First sort by location: + if self.gccloc < other.gccloc: + return True + elif self.gccloc > other.gccloc: + return False + + # Failing that, sort by message: + return self.msg < other.msg + + def __eq__(self, other): + if self.srcnode == other.srcnode: + if self.match == other.match: + if self.msg == other.msg: + if self.state == other.state: + return True + + def __hash__(self): + return hash(self.srcnode) ^ hash(self.match) ^ hash(self.msg) ^ hash(self.state) + + def make_report(self, ctxt, solution): + """ + Generate a Report instance (or None if it's impossible) + """ + notes = [] + loc = self.gccloc + stateful_gccvar = self.match.get_stateful_gccvar(ctxt) + path = solution.get_shortest_path_to(self.srcnode, + ctxt.get_aliases(self.srcnode, stateful_gccvar), + self.state) + ctxt.debug('path: %r', path) + if path is None: + # unreachable: + ctxt.log('unreachable error') + return None + + # Figure out the interesting events along the path: + pa = PathAnnotations(ctxt, self, path) + + # Now generate a report, using the significant events: + for edge in path: + srcnode = edge.srcnode + srcsupernode = edge.srcnode.innernode + srcgccloc = srcsupernode.get_gcc_loc() + srcexpr = pa.get_significant_expr_at(srcnode) + srcstates = pa.get_significant_states_at(srcnode) + + dstnode = edge.dstnode + dstsupernode = edge.dstnode.innernode + dstgccloc = dstsupernode.get_gcc_loc() + dstexpr = pa.get_significant_expr_at(dstnode) + dststates = pa.get_significant_states_at(dstnode) + + with ctxt.indent(): + ctxt.debug('edge from:') + with ctxt.indent(): + ctxt.debug('srcnode: %s', srcsupernode) + ctxt.debug('pa._significant_for_node[srcnode]: %s', + pa._significant_for_node[srcnode]) + ctxt.debug('srcstates: %s', srcstates) + ctxt.debug('srcloc: %s', srcgccloc) + ctxt.debug('to:') + with ctxt.indent(): + ctxt.debug('dstnode: %s', dstsupernode) + ctxt.debug('pa._significant_for_node[dstnode]: %s', + pa._significant_for_node[dstnode]) + ctxt.debug('dststates: %s', dststates) + ctxt.debug('dstloc: %s', dstgccloc) + + gccloc = srcgccloc + desc = '' + if isinstance(srcsupernode, CallNode): + #if gccloc is None: + # gccloc = dstgccloc + desc= ('call from %s() to %s()' + % (srcsupernode.function.decl.name, + dstsupernode.function.decl.name)) + elif isinstance(dstsupernode, ReturnNode): + if gccloc is None: + gccloc = dstgccloc + desc = ('return from %s() to %s()' + % (srcsupernode.function.decl.name, + dstsupernode.function.decl.name)) + if gccloc: + if pa._significant_for_node[srcnode] != pa._significant_for_node[dstnode]: + if edge.match: + # Describe state change: + if desc: + desc += ': ' + desc += edge.match.description(ctxt) + notes.append(Note(gccloc, desc)) + continue + + # We care about state changes, or state propagations, + # but we don't care about propagations of the "start" state: + newstates = dststates - srcstates + ctxt.debug('newstates: %s', newstates) + if newstates or ctxt.get_default_state() not in srcstates: + # Debugging information on state change: + if desc: + desc += ': ' + desc += ('state of %s (%s) propagated to %s' + % (gccexpr_to_str(ctxt, srcsupernode, srcexpr), + ' or '.join(['"%s"' % state for state in srcstates]), + gccexpr_to_str(ctxt, dstsupernode, dstexpr))) + notes.append(Note(gccloc, desc)) + continue + # Debugging information on state change: + if 0: + desc += ('%s: %s:%s -> %s:%s' + % (ctxt.sm.name, + srcexpr, stateset_to_str(srcstates), + dstexpr, stateset_to_str(dststates))) + notes.append(Note(gccloc, desc)) + else: + if desc: + notes.append(Note(gccloc, desc)) + continue + + # repeat the message at the end of the path, if anything else has + # been said: + if notes: + gccloc = path[-1].dstnode.innernode.get_gcc_loc() + if gccloc: + notes.append(Note(gccloc, self.msg)) + + return Report(ctxt.sm, self, notes) + + +def get_user_expr(equivcls): + # What's the best expr within the equivcls for use in a description? + # named locals (not temporaries): + if equivcls is None: + return 'None' + + for expr in equivcls: + if isinstance(expr, gcc.VarDecl) and expr.name: + return expr + + # composites: + for expr in equivcls: + if isinstance(expr, gcc.ComponentRef): + return expr + + # otherwise, pick the first: + for expr in equivcls: + return expr + +def equivcls_to_user_str(ctxt, supernode, equivcls): + ctxt.debug('equivcls_to_user_str: node: %s gccexpr: %s', + supernode, equivcls) + expr = get_user_expr(equivcls) + return gccexpr_to_str(ctxt, supernode, expr) + +def gccexpr_to_str(ctxt, supernode, gccexpr): + ctxt.debug('gccexpr_to_str: node: %s gccexpr: %s', supernode, gccexpr) + if isinstance(gccexpr, gcc.VarDecl): + if gccexpr.name: + return str(gccexpr) + else: + # We have a temporary variable. + # Try to use a better name if the node "knows" where the + # temporary came from: + aliases = ctxt.get_aliases(supernode, gccexpr) + for alias in aliases: + if isinstance(alias, gcc.VarDecl): + if alias.name: + return str(alias) + if isinstance(alias, gcc.ComponentRef): + return str(alias) + + # Is it the return value? + ctxt.debug('get_retval_aliases: %s', get_retval_aliases(ctxt, supernode)) + if gccexpr in get_retval_aliases(ctxt, supernode): + return 'return value' + + # Couldn't find a better name. Identify it as a temporary, + # and give the specific ID in parentheses, since this is useful + # for debugging: + return 'temporary (%s)' % gccexpr + + return str(gccexpr) diff --git a/sm/expgraph.py b/sm/expgraph.py new file mode 100644 index 00000000..9e44f5d8 --- /dev/null +++ b/sm/expgraph.py @@ -0,0 +1,319 @@ +# Copyright 2012, 2013 David Malcolm +# Copyright 2012, 2013 Red Hat, Inc. +# +# This is free software: you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see +# . + +from gccutils.dot import Table, Tr, Td, Text, Br, Font +from gccutils.graph import Graph, Node, Edge + +from sm.solver import StatesForNode +from sm.utils import stateset_to_str, equivcls_to_str + +class ExplodedGraph(Graph): + """ + An exploded Supergraph where (equivcls, state) information has been + added to each node, so that are multiple nodes representing each inner + node, and the edges represent the valid state changes. + + Within each node, we have the subset of state for when: + + (equivcls == state) + + Hence for the node with states: + + {v1, v2} : {stateA, stateB}, {v3} : {stateC, stateD, stateE} + + there will be 5 StatewiseExplodedNodes, for: + {v1, v2}: stateA + {v1, v2}: stateB + {v3}: stateC + {v3}: stateD + {v3}: stateE + to ensure that we have an ExplodedNode for each possible equivcls:state + pairing, whilst avoiding visiting all possible combinations. + + As a special case we don't split nodes which have just a single possible + state for each equivcls, and use a SoloExplodedNode to represent this + case + + Every edge is also labelled with the sm.checker.Match (if any) that + causes it. + """ + __slots__ = ('ctxt', 'innergraph', + 'expnode_for_triple', + 'expnodes_for_innernode') + + def __init__(self, ctxt, innergraph): + Graph.__init__(self) + self.ctxt = ctxt + self.innergraph = innergraph + + # dict from (SupergraphNode, equivcls, state) to ExplodedNode: + self.expnode_for_triple = {} + + # dict from SupergraphNode to set of ExplodedNode: + self.expnodes_for_innernode = {} + + def add_node(self, expnode): + """ + key = (expnode.innernode, expnode.equivcls, expnode.state) + assert key not in self.expnode_for_triple + self.expnode_for_triple[key] = expnode + """ + if expnode.innernode in self.expnodes_for_innernode: + self.expnodes_for_innernode[expnode.innernode].add(expnode) + else: + self.expnodes_for_innernode[expnode.innernode] = set([expnode]) + return Graph.add_node(self, expnode) + + def get_entry_nodes(self): + return [self.get_entry_node()] + + def get_entry_node(self): + for expnode in self.expnodes_for_innernode[self.innergraph.fake_entry_node]: + return expnode # there should be just one + + def get_expnode_with_state(self, innernode, equivcls, state): + assert equivcls is not None + assert state is not None + + for expnode in self.expnodes_for_innernode[innernode]: + if state in expnode.states_subset._dict[equivcls]: + return expnode + + def _make_edge(self, srcnode, dstnode, inneredge, match): + return ExplodedEdge(srcnode, dstnode, inneredge, match) + +class ExplodedNode(Node): + __slots__ = ('innernode', 'states_subset', ) + + def __init__(self, ctxt, innernode): + Node.__init__(self) + self.innernode = innernode + # (the subclasses also set up self.states_subset) + + def to_dot_html(self, ctxt): + inner = self.innernode.to_dot_html(self) + table = Table(cellborder=1) + + tr = table.add_child(Tr()) + td = tr.add_child(Td(align='left')) + td.add_child(self.get_title_dot_html(ctxt)) + + tr = table.add_child(Tr()) + td = tr.add_child(Td(align='left')) + td.add_child(Text('states_subset: %s' + % self.states_subset)) + if ctxt.facts_for_expnode: + facts = ctxt.facts_for_expnode[self] + if facts: + for fact in facts.set_: + tr = table.add_child(Tr()) + td = tr.add_child(Td(align='left')) + td.add_child(Text('FACT: %s' % (fact, ))) + else: + tr = table.add_child(Tr()) + td = tr.add_child(Td(align='left')) + td.add_child(Text('NO FACTS')) + tr = table.add_child(Tr()) + td = tr.add_child(Td(align='left')) + td.add_child(inner) + return table + + @property + def stmt(self): + return self.innernode.stmt + + @property + def supergraphnode(self): + return self.innernode.supergraphnode + + @property + def stmtnode(self): + return self.innernode.supergraphnode.stmtnode + + def get_subgraph_path(self, ctxt): + return self.innernode.get_subgraph_path(ctxt) + + def get_states_for_expr(self, ctxt, expr): + return self.states_subset.get_states_for_expr(ctxt, expr) + +class SoloExplodedNode(ExplodedNode): + """ + A node within the ExplodedGraph in which the underlying node has only + one possible combination of state variables + """ + __slots__ = () + + def __init__(self, ctxt, innernode): + ExplodedNode.__init__(self, ctxt, innernode) + self.states_subset = ctxt.states_for_node[self.innernode] + + def __repr__(self): + return 'SoloExplodedNode(%r)' % (self.innernode) + + def get_title_dot_html(self, ctxt): + return Text('%s' % self.__class__.__name__) + +class StatewiseExplodedNode(ExplodedNode): + """ + A node within the ExplodedGraph exploring the case that a particular + equivcls has a particular state + """ + __slots__ = ('equivcls', 'state') + + def __init__(self, ctxt, innernode, equivcls, state): + ExplodedNode.__init__(self, ctxt, innernode) + self.equivcls = equivcls + self.state = state + + # Calculate states_subset, the StatesForNode comprising all possible + # states for the underlying innernode, intersected with just those + # in which the given equivcls has the given state: + allstates = ctxt.states_for_node[self.innernode] + assert self.equivcls in allstates._dict + _dict = allstates._dict.copy() + _dict[self.equivcls] = frozenset([self.state]) + self.states_subset = StatesForNode(self.innernode, _dict) + + def __repr__(self): + return ('StatewiseExplodedNode(%r, %r, %r)' + % (self.innernode, self.equivcls, self.state)) + + def get_title_dot_html(self, ctxt): + return Text('%s: %s=%s ' + % (self.__class__.__name__, + equivcls_to_str(self.equivcls), + self.state)) + +class ExplodedEdge(Edge): + __slots__ = ('inneredge', 'match') + + def __init__(self, srcnode, dstnode, inneredge, match): + Edge.__init__(self, srcnode, dstnode) + self.inneredge = inneredge + self.match = match + + def __hash__(self): + return hash(self.srcnode) ^ hash(self.dstnode) ^ hash(self.inneredge) + + def __eq__(self, other): + if isinstance(other, ExplodedEdge): + if self.srcnode == other.srcnode: + if self.dstnode == other.dstnode: + if self.inneredge == other.inneredge: + if self.match == other.match: + return True + + def to_dot_label(self, ctxt): + result = self.inneredge.to_dot_label(ctxt) + if self.match: + desc = self.match.description(ctxt) + result += (': ' if result else '') + desc + return result + + @property + def true_value(self): + return self.inneredge.true_value + + @property + def false_value(self): + return self.inneredge.false_value + + @property + def stmtedge(self): + return self.inneredge.stmtedge + +def build_exploded_graph(ctxt): + expgraph = ExplodedGraph(ctxt, ctxt.graph) + solonodes = set() + + # Populate the expgraph with nodes for the various (equivcls, state) + # pairs: + for innernode in ctxt.graph.nodes: + states_for_node = ctxt.states_for_node[innernode] + if not states_for_node: + # Unreachable node + continue + assert isinstance(states_for_node, StatesForNode) + + # Add exploded nodes for every equivcls that has multiple possible + # states: + expnodes = [] + for equivcls in states_for_node._dict: + assert isinstance(equivcls, frozenset) + states = states_for_node._dict[equivcls] + if len(states) > 1: + for state in states: + expnode = StatewiseExplodedNode(ctxt, innernode, equivcls, state) + expgraph.add_node(expnode) + expnodes.append(expnode) + if not expnodes: + # We have a node for which every equivcls has exactly one state; + # add an exploded node representing this: + expnode = SoloExplodedNode(ctxt, innernode) + expgraph.add_node(expnode) + solonodes.add(innernode) + + # Create edges within the exploded graph: + for i, srcexpnode in enumerate(expgraph.nodes): + if i % 100 == 0: + ctxt.timing('iter %i; len(expgraph.nodes): %i len(expgraph.edges): %i', + i, len(expgraph.nodes), len(expgraph.edges)) + + ctxt.log('srcexpnode: %s', srcexpnode) + with ctxt.indent(): + # Get the subset of state at this node: + srcvalue = srcexpnode.states_subset + if srcvalue is None: + continue + + ctxt.log('srcvalue (subset): %s', srcvalue) + for inneredge in srcexpnode.innernode.succs: + ctxt.log('inneredge: %s', inneredge) + with ctxt.indent(): + # Rerun the state propagation for the state subset for the + # given edge to get some state subset for the dstnode + # (which might be the whole of the state set): + dstnode = inneredge.dstnode + dstvalue, match = StatesForNode.get_edge_value(ctxt, srcvalue, inneredge) + if ctxt.options.enable_log: + ctxt.log('dstvalue (subset): %s', dstvalue) + ctxt.log('match: %s', match) + ctxt.log('states for dstnode: %s', ctxt.states_for_node[dstnode]) + + if dstvalue is None: + continue + + # Wire up edges accordingly within the ExplodedGraph: + dstexpnodes = expgraph.expnodes_for_innernode[dstnode] + if dstnode in solonodes: + assert len(dstexpnodes) == 1 + for dstexpnode in dstexpnodes: + # (there will just be one) + expgraph.add_edge(srcexpnode, dstexpnode, inneredge, match) + ctxt.log('added edge to solo node') + else: + for dstexpnode in dstexpnodes: + if ctxt.options.enable_log: + ctxt.log('dstexpnode: equivcls: %s state: %s states_subset: %s', + equivcls_to_str(dstexpnode.equivcls), + dstexpnode.state, + dstexpnode.states_subset) # stateset_to_str(dstexpnode.states_subset)) + if dstvalue.is_subset_of(dstexpnode.states_subset): + expgraph.add_edge(srcexpnode, dstexpnode, inneredge, match) + ctxt.log('(added edge for ^^^)') + + return expgraph diff --git a/sm/facts.py b/sm/facts.py new file mode 100644 index 00000000..2dc2319a --- /dev/null +++ b/sm/facts.py @@ -0,0 +1,543 @@ +# Copyright 2012, 2013 David Malcolm +# Copyright 2012, 2013 Red Hat, Inc. +# +# This is free software: you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see +# . + +############################################################################ +# Preprocessing phase +############################################################################ +import gcc + +from gccutils.graph.stmtgraph import SplitPhiNode, ExitNode +from gccutils.graph.supergraph import CallToReturnSiteEdge, CallToStart, \ + ExitToReturnSite + +import sm.dataflow +from sm.utils import simplify, Timer + +# For applying boolean not: +inverseops = {'==' : '!=', + '!=' : '==', + '<' : '>=', + '<=' : '>', + '>' : '<=', + '>=' : '<', + } + +# For flipping LHS and RHS: +flippedops = { + # Equality/inequality is symmetric: + '==' : '==', + '!=' : '!=', + + # Comparisons change direction: + '<' : '>', + '<=' : '>=', + '>' : '<', + '>=' : '<=', + } + +# Mapping from gcc expression codes to Python method names: +exprcodenames = { + gcc.PlusExpr: '__add__', + gcc.MultExpr: '__mul__', + gcc.TruncDivExpr: '__div__', + } + +LOCAL_VAR_TYPES = (gcc.VarDecl, gcc.ParmDecl) + +class Fact(object): + __slots__ = ('lhs', 'op', 'rhs') + + def __init__(self, lhs, op, rhs): + self.lhs = lhs + self.op = op + self.rhs = rhs + + def __str__(self): + return '%s %s %s' % (self.lhs, self.op, self.rhs) + + def __repr__(self): + return 'Fact(%r, %r, %r)' % (self.lhs, self.op, self.rhs) + + def __eq__(self, other): + if isinstance(other, Fact): + if self.lhs == other.lhs: + if self.op == other.op: + if self.rhs == other.rhs: + return True + + def __hash__(self): + return hash(self.lhs) ^ hash(self.op) ^ hash(self.rhs) + + def __lt__(self, other): + # Support sorting of facts to allow for consistent ordering in + # test output + if isinstance(other, Fact): + return (self.lhs, self.op, self.rhs) < (other.lhs, other.op, other.rhs) + +class Factoid: + """ + Like a fact, but has an context-dependent LHS + """ + __slots__ = ('op', 'rhs') + + def __init__(self, op, rhs): + self.op = op + self.rhs = rhs + + def __str__(self): + return '%s %s' % (self.op, self.rhs) + + def __repr__(self): + return 'Factoid(%r, %r)' % (self.op, self.rhs) + + def __eq__(self, other): + if isinstance(other, Factoid): + if self.op == other.op: + if self.rhs == other.rhs: + return True + + def __hash__(self): + return hash(self.op) ^ hash(self.rhs) + + def __lt__(self, other): + # Support sorting of facts to allow for consistent ordering in + # test output + if isinstance(other, Factoid): + return (self.op, self.rhs) < (other.op, other.rhs) + + def apply_binary_op_to_constant(self, opname, other): + const = self.rhs + if isinstance(const, gcc.Constant): + const = const.constant + return Factoid(self.op, getattr(const, opname)(other)) + +class Facts(sm.dataflow.AbstractValue): + """ + A set of facts describing the possible state of the program at a + particular node in the graph. + + * topmost value: None signifies "unreachable": the empty set of possible + states: no states are possible + + * intermediate values: various non-empty sets of Fact statements. + (F1 and F2) is more specific than merely (F1) or (F2), hence + "(a == 0)" < "(a==0 and b > 3)" + and + "(b > 3)" < "(a==0 and b > 3)" + + * bottom value: empty set of Fact statements, meaning the full set of + possible states: all states are possible + + * the "meet" of two values is the set of all possible states from + either: the union of possible states, and hence (for now) the + intersection of the possible facts. + """ + __slots__ = ('set_', 'partitions', ) + + def __init__(self): + self.set_ = set() + + # lazily constructed + # dict from expr to (shared) sets of exprs + self.partitions = None + + def copy(self): + clone = Facts() + clone.set_ = set(self.set_) + clone.partitions = self.partitions + return clone + + def add(self, fact): + self.set_.add(fact) + self.partitions = None + + def __and__(self, other): + result = Facts() + result.set_ = self.set_ & other.set_ + return result + + def __str__(self): + return '(%s)' % (' && '.join([str(fact) + for fact in sorted(self.set_)])) + + def __eq__(self, other): + if other is None: + return False + return self.set_ == other.set_ + + def __ne__(self, other): + if other is None: + return True + return self.set_ != other.set_ + + @classmethod + def make_entry_point(cls, ctxt, node): + return Facts() + + @classmethod + def get_edge_value(cls, ctxt, srcvalue, edge): + # Don't propagate information along the *intra*procedural edge + # of an interprocedural callsite (i.e. one where both caller and + # callee have their CFG in the supergraph), so that if the called + # function never returns, we don't erroneously let that affect + # subsequent state within the callee. + # This means that e.g. within + # if (i > 10) { + # something_that_calls_abort(); + # } + # foo() + # that only facts from the false edge reach the call to foo(), and + # hence we know there that (i <= 10) + if isinstance(edge, CallToReturnSiteEdge): + return None, None + + srcnode = edge.srcnode + stmt = srcnode.stmt + dstfacts = srcvalue.copy() + + # Handle interprocedural edges: + if isinstance(edge, CallToStart): + # Rewrite any facts referencing the arguments to contain the + # parameters: + args_to_params = dict((simplify(arg), simplify(param)) + for arg, param in zip(stmt.args, + stmt.fndecl.arguments)) + dstfacts = srcvalue.remap_for_scope(args_to_params) + + elif isinstance(edge, ExitToReturnSite): + # Rewrite any facts referencing the return value: + if edge.calling_stmtnode.stmt.lhs: + exitsupernode = srcnode + assert isinstance(exitsupernode.innernode, ExitNode) + retval = simplify(exitsupernode.innernode.returnval) + ctxt.debug('retval: %s', retval) + + lhs = edge.calling_stmtnode.stmt.lhs + ctxt.debug('lhs: %s', lhs) + retval_to_lhs = {retval : simplify(lhs)} + dstfacts = srcvalue.remap_for_scope(retval_to_lhs) + + if isinstance(stmt, gcc.GimpleAssign): + exprcode = stmt.exprcode + if 1: + ctxt.debug('gcc.GimpleAssign: %s', stmt) + ctxt.debug(' stmt.lhs: %r', stmt.lhs) + ctxt.debug(' stmt.rhs: %r', stmt.rhs) + ctxt.debug(' exprcode: %r', exprcode) + if exprcode in exprcodenames: + lhs = simplify(stmt.lhs) + rhs0 = simplify(stmt.rhs[0]) + rhs1 = simplify(stmt.rhs[1]) + dstfacts._assignment_from_binary_op(ctxt, lhs, rhs0, rhs1, + exprcodenames[exprcode]) + elif exprcode in (gcc.IntegerCst, + gcc.ParmDecl, gcc.VarDecl, + gcc.MemRef, gcc.ComponentRef): + assert len(stmt.rhs) == 1 + lhs = simplify(stmt.lhs) + rhs = simplify(stmt.rhs[0]) + dstfacts._assignment(lhs, rhs) + else: + # We don't know how to handle this expression code, so + # just forget what we knew about the LHS: + lhs = simplify(stmt.lhs) + dstfacts._remove_invalidated_facts(lhs) + + elif isinstance(stmt, gcc.GimpleCond): + if 1: + ctxt.debug('gcc.GimpleCond: %s', stmt) + lhs = simplify(stmt.lhs) + rhs = simplify(stmt.rhs) + op = stmt.exprcode.get_symbol() + if edge.true_value: + dstfacts.add( Fact(lhs, op, rhs) ) + if edge.false_value: + op = inverseops[op] + dstfacts.add( Fact(lhs, op, rhs) ) + elif isinstance(stmt, gcc.GimpleSwitch): + if 0: + ctxt.debug('gcc.GimpleSwitch: %s', stmt) + print(stmt) + indexvar = simplify(stmt.indexvar) + + # More than one gcc.CaseLabelExpr may point at the same label + # These will be the same SupergraphEdge within the Supergraph + # Hence a SupergraphEdge may have zero or more gcc.CaseLabelExpr + + minvalue = None + maxvalue = None + for cle in edge.stmtedge.caselabelexprs: + if cle.low is not None: + if minvalue is None or minvalue > cle.low: + minvalue = cle.low + + if cle.high is not None: + # a range from cle.low ... cle.high + if maxvalue is None or maxvalue < cle.high: + maxvalue = cle.high + else: + # a single value: cle.low + if maxvalue is None or maxvalue < cle.low: + maxvalue = cle.low + if 0: + print('minvalue: %r' % minvalue) + print('maxvalue: %r' % maxvalue) + if minvalue is not None: + if minvalue == maxvalue: + dstfacts.add(Fact(indexvar, '==', minvalue)) + else: + dstfacts.add(Fact(indexvar, '>=', minvalue)) + dstfacts.add(Fact(indexvar, '<=', maxvalue)) + elif isinstance(stmt, gcc.GimplePhi): + srcnode = edge.srcnode + if 1: + ctxt.debug('gcc.GimplePhi: %s', stmt) + ctxt.debug(' srcnode: %s', srcnode) + ctxt.debug(' srcnode: %r', srcnode) + ctxt.debug(' srcnode.innernode: %s', srcnode.innernode) + ctxt.debug(' srcnode.innernode: %r', srcnode.innernode) + assert isinstance(srcnode.supergraphnode.innernode, SplitPhiNode) + rhs = simplify(srcnode.supergraphnode.innernode.rhs) + lhs = simplify(stmt.lhs) + dstfacts._assignment(lhs, rhs) + + # Full check to see if the resulting facts aren't contradictory: + if dstfacts != srcvalue: + if dstfacts.is_possible(ctxt): + return dstfacts, None + else: + return None, None + return dstfacts, None + + @classmethod + def meet(cls, ctxt, lhs, rhs): + # The set of valid known facts from multiple inedges is the + # intersection of the facts from each inedge: + if lhs is None: + return rhs + if rhs is None: + return lhs + return lhs & rhs + + def _make_equiv_classes(self): + partitions = {} + + for fact in self.set_: + lhs, op, rhs = fact.lhs, fact.op, fact.rhs + if op == '==': + if lhs in partitions: + if rhs in partitions: + merged = partitions[lhs] | partitions[rhs] + else: + partitions[lhs].add(rhs) + merged = partitions[lhs] + else: + if rhs in partitions: + partitions[rhs].add(lhs) + merged = partitions[rhs] + else: + merged = set([lhs, rhs]) + partitions[lhs] = partitions[rhs] = merged + + self.partitions = partitions + + def get_equiv_classes(self): + # Get equiv classes as a frozenset of frozensets: + if self.partitions is None: + self._make_equiv_classes() + return frozenset([frozenset(equivcls) + for equivcls in self.partitions.values()]) + + def is_possible(self, ctxt): + ctxt.debug('is_possible: %s', self) + # Work-in-progress implementation: + # Gather vars by equivalence classes: + + if self.partitions is None: + self._make_equiv_classes() + ctxt.debug('partitions: %s', self.partitions) + + # There must be at most one specific constant within any equivalence + # class: + constants = {} + for key in self.partitions: + equivcls = self.partitions[key] + for expr in equivcls: + # (we support "int" here to make it easier to unit-test this code) + if isinstance(expr, (gcc.IntegerCst, int)): + if key in constants: + # More than one (non-equal) constant within the class: + ctxt.debug('impossible: equivalence class for %s' + ' contains non-equal constants %s and %s' + % (equivcls, constants[key], expr)) + return False + constants[key] = expr + + ctxt.debug('constants: %s' % constants) + + # Check any such constants against other inequalities: + for fact in self.set_: + lhs, op, rhs = fact.lhs, fact.op, fact.rhs + if op in ('!=', '<', '>'): + if isinstance(rhs, (gcc.IntegerCst, int)): + if lhs in constants: + if constants[lhs] == rhs: + # a == CONST_1 && a != CONST_1 is impossible: + ctxt.debug('impossible: equivalence class for %s' + ' equals constant %s but has %s %s %s', + equivcls, constants[lhs], lhs, op, rhs) + return False + + # All tests passed: + return True + + def get_aliases(self, expr): + if self.partitions is None: + self._make_equiv_classes() + if expr in self.partitions: + return frozenset(self.partitions[expr]) + else: + return frozenset([expr]) + + def expr_is_referenced_externally(self, ctxt, var): + ctxt.debug('expr_is_referenced_externally(%s, %s)', self, var) + for fact in self.set_: + lhs, op, rhs = fact.lhs, fact.op, fact.rhs + if op == '==': + # For now, any equality will do it + # FIXME: needs to be something that isn't a local + if var == lhs: + return True + if var == rhs: + return True + return False + + def _remove_invalidated_facts(self, expr): + # remove any facts relating to an expression that might have changed + # value: + for fact in list(self.set_): + if expr == fact.lhs or expr == fact.rhs: + self.set_.remove(fact) + + def _assignment(self, lhs, rhs): + if lhs == rhs: + return + self._remove_invalidated_facts(lhs) + self.add( Fact(lhs, '==', rhs) ) + + def _assignment_from_binary_op(self, ctxt, lhs, rhs0, rhs1, opname): + if 1: + ctxt.debug('_assignment_from_binary_op(%s, ...,' + ' lhs=%s, rhs0=%s, rhs1=%s, opname=%s)', + self, lhs, rhs0, rhs1, opname) + + rhs0factoids = list(self.iter_factoids_about(rhs0)) + ctxt.debug('rhs0factoids: %s', rhs0factoids) + + rhs1factoids = list(self.iter_factoids_about(rhs1)) + ctxt.debug('rhs1factoids: %s', rhs1factoids) + + # If we have, say, (i > 42) and we have i = i + 1 + # we want to end up with (i > 43): + if isinstance(rhs1, gcc.IntegerCst): + resultfactoids = Factoids([factoid.apply_binary_op_to_constant(opname, int(rhs1)) + for factoid in self.iter_factoids_about(rhs0) + if isinstance(factoid.rhs, (gcc.Constant, int, long))]) + ctxt.debug('resultfactoids: %s', resultfactoids) + + self._remove_invalidated_facts(lhs) + + for fact in resultfactoids.make_facts_for_lhs(lhs).set_: + self.add(fact) + else: + self._remove_invalidated_facts(lhs) + + def iter_factoids_about(self, expr): + for fact in self.set_: + if expr == fact.lhs: + yield Factoid(fact.op, fact.rhs) + if expr == fact.rhs: + yield Factoid(flippedops[fact.op], fact.rhs) + + def remap_for_scope(self, dict_): + if 0: + print('remap_for_scope: %s with %s' % (self, dict_)) + result = Facts() + for fact in self.iter_all_facts(): + if fact.lhs in dict_: + fact = Fact(dict_[fact.lhs], + fact.op, + fact.rhs) + else: + if isinstance(fact.lhs, LOCAL_VAR_TYPES): + if 0: + print('dropping: %s' % fact) + continue + if fact.rhs in dict_: + fact = Fact(fact.lhs, + fact.op, + dict_[fact.rhs]) + else: + if isinstance(fact.rhs, LOCAL_VAR_TYPES): + if 0: + print('dropping: %s' % fact) + continue + + result.add(fact) + if 0: + print(' result: %s' % result) + return result + + def iter_all_facts(self): + for fact in self.set_: + if fact.op == '==': + for other in self.set_: + if fact.lhs == other.lhs: + yield Fact(fact.rhs, other.op, other.rhs) + if fact.lhs == other.rhs: + yield Fact(other.rhs, other.op, fact.rhs) + if fact.rhs == other.lhs: + yield Fact(fact.lhs, other.op, other.rhs) + if fact.rhs == other.rhs: + yield Fact(other.rhs, other.op, fact.lhs) + yield fact + +class Factoids(set): + __slots__ = () + + def __str__(self): + return '(%s)' % (' && '.join([str(factoid) + for factoid in sorted(self)])) + + def make_facts_for_lhs(self, lhs): + result = Facts() + result.set_ = set([Fact(lhs, factoid.op, factoid.rhs) + for factoid in self]) + return result + +def remove_impossible(ctxt, facts_for_node, graph): + # Purge graph of any nodes with contradictory facts which are thus + # impossible to actually reach + with Timer(ctxt, 'remove_impossible'): + changes = 0 + for node in list(graph.nodes): + facts = facts_for_node[node] + if facts is None or not facts.is_possible(ctxt): + ctxt.log('removing impossible node: %s' % node) + changes += graph.remove_node(node) + ctxt.log('removed %i node(s)' % changes) + return changes diff --git a/sm/notes.rst b/sm/notes.rst new file mode 100644 index 00000000..ec0f8169 --- /dev/null +++ b/sm/notes.rst @@ -0,0 +1,75 @@ +Implementation Notes +==================== +Ideally, we're modelling the state of all variables in the code, which can +be thought of as a state tuple, and the "perfect" solution would be to model +the flow through the "exploded" supergraph. e.g. for 3 vars a, b, c, we +start at the entry node with (a, b, c) as (start, start, start). We would +then iteratively find all state tuples for all nodes. + +In this ideal world we would walk through this V-dimensional space (where V +is the number of variables), determining the precise shape of the visitable +subset at each statement in the supergraph. + +However, we can't do this, as it explodes: for V vars and S states there are +S ** V possible state tuples. e.g. for 10 variables and 5 states there +are 5 ** 10 = roughly 9.7 million possible state tuples. I'd hoped that this +would be relatively sparse, but it can readily be exploded by a series of: + + if (foo()) v1 = stateA(); else v1 = stateB(); + if (foo()) v2 = stateA(); else v2 = stateB(); + ... + if (foo()) vN = stateA(); else vN = stateB(); + +which gives 2^N states tuples, even if we constrain by function "scope". + +e.g. for vars: a, b and states: start, foo, bar, baz + +all possible states: e.g. + + a: start | foo | bar | baz +--------+-------+-----+-----+----- +b:start | Y | | | +b:foo | Y | | | +b:bar | | Y | | Y +b:baz | | | | + +where Y (for "yes") marks a reachable combination + +Given that this is not computationally feasible, we need a simpler approach. + +Abstract domain +--------------- +Possible states for var at a given supergraph node: + + a : some subset of S + b : some subset of S + +This is analogous to the interval domain over integers: we merely know the +(ranges of) possible values of the vars; we don't model any interaction +between those ranges. + +Doing this gives us an overapproximation; for example, this precise solution + + a: start | foo | bar | baz +--------+-------+-----+-----+----- +b:start | Y | | | +b:foo | Y | | | +b:bar | | Y | | Y +b:baz | | | | + +would be modelled as: + + a: {start, foo, baz} + b: {start, foo, bar} + +which expands to this: + + a: start | foo | bar | baz +--------+-------+-----+-----+----- +b:start | Y | E | | E +b:foo | Y | E | | E +b:bar | E | Y | | Y +b:baz | | | | + +where the "E" (for error) indicate the false positives due to the +over-approximation. diff --git a/sm/options.py b/sm/options.py new file mode 100644 index 00000000..b9e02596 --- /dev/null +++ b/sm/options.py @@ -0,0 +1,101 @@ +# Copyright 2012, 2013 David Malcolm +# Copyright 2012, 2013 Red Hat, Inc. +# +# This is free software: you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see +# . + +ENABLE_LOG=0 +ENABLE_DEBUG=0 +ENABLE_PROFILE=0 +ENABLE_TIMING=0 +SHOW_SUPERGRAPH=0 +SHOW_EXPLODED_GRAPH=0 +DUMP_SOLUTION=0 +SHOW_SOLUTION=0 + +class Options: + """ + dump_json: if set to True, then error reports will be written out as + JSON files with names of the form + "INPUTFILENAME.hash.sm.json" + rather than to stderr, and the presence of such errors will + not lead to gcc treating the compilation as a failure + + enable_log: + + If set to True, emit medium-level debug information to stderr + + enable_debug: + + If set to True, emit low-level debug information to stderr + + enable_profile: + + If set to True, use CPython's cProfile module to generate a profile + of the activity for each checker. The top 20 longest functions calls + (cumulatively) will be emitted to stdout, and a profile will be + written to a file for each checker that was run, suitable for viewing + e.g. by RunSnakeRun + + Given an input file "foo.c" and checkers "bar" and "baz", it will + write out files: + foo.c.bar.sm-profile + foo.c.baz.sm-profile + + enable_timing: if set to True, dump timing information to stderr + + show_supergraph: + + If set to True, render and display a png visualization of the + supergraph + + show_exploded_graph: + + If set to True, render and display a png visualization of the + supergraph exploded by state, before and after pruning valid paths + + dump_solution: + + If set to True, print detailed information about the solver's + internal state to stderr + + show_solution: + + If set to True, render and display a png visualization of the + solver's internal state + """ + def __init__(self, + cache_errors=True, + during_lto=False, + dump_json=False, + enable_log = ENABLE_LOG, + enable_debug = ENABLE_DEBUG, + enable_profile=ENABLE_PROFILE, + enable_timing=ENABLE_TIMING, + show_supergraph=SHOW_SUPERGRAPH, + show_exploded_graph=SHOW_EXPLODED_GRAPH, + dump_solution=DUMP_SOLUTION, + show_solution=SHOW_SOLUTION): + self.cache_errors = cache_errors + self.during_lto = during_lto + self.dump_json = dump_json + self.enable_log = enable_log + self.enable_debug = enable_debug + self.enable_profile = enable_profile + self.enable_timing = enable_timing + self.show_supergraph = show_supergraph + self.show_exploded_graph = show_exploded_graph + self.dump_solution = dump_solution + self.show_solution = show_solution + diff --git a/sm/parser.py b/sm/parser.py new file mode 100644 index 00000000..c9299bdc --- /dev/null +++ b/sm/parser.py @@ -0,0 +1,556 @@ +# Copyright 2012 David Malcolm +# Copyright 2012 Red Hat, Inc. +# +# This is free software: you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see +# . + +# parser for ".sm" files + +# Uses "ply", so we'll need python-ply on Fedora +import ply.lex as lex +import ply.yacc as yacc + +from sm.checker import Checker, Sm, Decl, NamedPattern, StateClause, \ + PatternRule, PythonFragment, \ + NamedPatternReference, SpecialPattern, OrPattern, \ + Assignment, Return, \ + ResultOfFnCall, ArgsOfFnCall, Comparison, VarDereference, ArrayLookup, \ + VarUsage, AddressOf, \ + TransitionTo, BooleanOutcome, PythonOutcome + +############################################################################ +# Tokenizer: +############################################################################ +DEBUG_LINE_NUMBERING = 0 + +reserved = ['decl', 'sm', 'stateful', 'true', 'false', + 'any_pointer', 'any_variable', 'any_expr', 'pat', 'return'] +tokens = [ + 'ID','LITERAL_NUMBER', 'LITERAL_STRING', + 'ACTION', + 'LBRACE','RBRACE', 'LPAREN', 'RPAREN', 'LSQUARE', 'RSQUARE', + 'AMPERSAND', + 'COMMA', 'DOT', + 'COLON', 'SEMICOLON', + 'ASSIGNMENT', 'STAR', 'PIPE', + 'COMPARISON', + 'DOLLARPATTERN', + 'PYTHON', + ] + [r.upper() for r in reserved] + +def t_PYTHON(t): + r'\{\{(.|\n)*?\}\}' + # matched double-braces, with arbitrary text (and whitespace) inside: + # Drop the double-braces, and record the offset for the line number: + numlines = t.value.count('\n') + t.value = (t.value[2:-2], t.lexer.lineno - 1) + t.lexer.lineno += numlines + if DEBUG_LINE_NUMBERING: + print('t_PYTHON with %i lines' % t.value.count('\n')) + print(' t.lexer.lineno: %i' % t.lexer.lineno) + return t + +t_ACTION = r'=>' +t_LPAREN = r'\(' +t_RPAREN = r'\)' +t_LBRACE = r'{' +t_RBRACE = r'}' +t_LSQUARE = r'\[' +t_RSQUARE = r'\]' +t_AMPERSAND = r'&' +t_COMMA = r',' +t_DOT = r'\.' +t_COLON = r':' +t_SEMICOLON = r';' +t_ASSIGNMENT = r'=' +t_STAR = r'\*' +t_PIPE = r'\|' + +def t_COMMENT(t): + r'/\*(.|\n)*?\*/' + # C-style comments + # print('skipping comment: %r' % t) + t.lexer.lineno += t.value.count('\n') + if DEBUG_LINE_NUMBERING: + print('t_COMMENT with %i lines' % t.value.count('\n')) + print(' t.lexer.lineno: %i' % t.lexer.lineno) + +def t_ID(t): + r'[a-zA-Z_][a-zA-Z_0-9]*' + # Check for reserved words: + if t.value in reserved: + t.type = t.value.upper() + else: + t.type = 'ID' + return t + +def t_COMPARISON(t): + r'<=|<|==|!=|>=|>' + return t + +def t_LITERAL_NUMBER(t): + r'(0x[0-9a-fA-F]+|\d+)' + try: + if t.value.startswith('0x'): + t.value = long(t.value, 16) + else: + t.value = long(t.value) + except ValueError: + raise ParserError(t.value) + return t + +def t_LITERAL_STRING(t): + r'"([^"]*)"|\'([^\']*)\'' + # Drop the quotes: + t.value = t.value[1:-1] + return t + +def t_DOLLARPATTERN(t): + r'\$[a-zA-Z_][a-zA-Z_0-9]*\$' + # Drop the dollars: + t.value = t.value[1:-1] + return t + +def t_newline(t): + r'\n+' + t.lexer.lineno += len(t.value) + if DEBUG_LINE_NUMBERING: + print('t_newline with %i lines' % len(t.value)) + print(' t.lexer.lineno: %i' % t.lexer.lineno) + + +# Ignored characters +t_ignore = " \t" + +def t_error(t): + raise ParserError.from_token(t, "Illegal character '%s'" % t.value[0]) + +lexer = lex.lex() + + +############################################################################ +# Grammar: +# (try to keep this in sync with the documentation in docs/sm.rst) +############################################################################ +""" +precedence = ( + ('left', 'AND', 'OR'), + ('left', 'NOT'), + ('left', 'COMPARISON'), +) +""" + +#--------------------------------------------------------------------------- +# High-level rules: +#--------------------------------------------------------------------------- + +def p_checker(p): + '''checker : sm + | sm checker + ''' + # top-level rule, covering the whole file: one or more sm clauses + if len(p) == 2: + p[0] = Checker([p[1]]) + else: + p[0] = Checker([p[1]] + p[2].sms) + +def p_sm(p): + 'sm : SM ID LBRACE smclauses RBRACE' + p[0] = Sm(name=p[2], clauses=p[4]) + +def p_smclauses(p): + '''smclauses : smclause + | smclauses smclause''' + if len(p) == 2: + p[0] = [p[1]] + else: + p[0] = p[1] + [p[2]] + +def p_smclause_decl(p): + ''' + smclause : optional_stateful DECL declkind ID SEMICOLON + ''' + # e.g. "stateful decl any_pointer ptr;" + # e.g. "decl any_expr x;" + has_state = (p[1] == 'stateful') + declkind = p[3] + name = p[4] + p[0] = Decl.make(has_state, declkind, name) + +def p_smclause_namedpatterndefinition(p): + ''' + smclause : PAT ID pattern SEMICOLON + ''' + p[0] = NamedPattern(name=p[2], + pattern=p[3]) + +def p_smclause_python(p): + ''' + smclause : PYTHON + ''' + src, lineoffset = p[1] + if DEBUG_LINE_NUMBERING: + print("src.count('\\n') %i" % src.count('\n')) + p[0] = PythonFragment(src=src, + lineoffset=lineoffset) + +def p_smclause_stateclause(p): + 'smclause : statelist COLON patternrulelist SEMICOLON' + # e.g. + # ptr.unknown, ptr.null, ptr.nonnull: + # { ptr == 0 } => true=ptr.null, false=ptr.nonnull + # | { ptr != 0 } => true=ptr.nonnull, false=ptr.null + # ; + # + p[0] = StateClause(statelist=p[1], patternrulelist=p[3]) + +#--------------------------------------------------------------------------- +# Declarations: +#--------------------------------------------------------------------------- + +def p_empty(p): + 'empty :' + pass + +def p_optional_stateful(p): + ''' + optional_stateful : STATEFUL + | empty + ''' + p[0] = p[1] + +def p_declkind(p): + ''' + declkind : ANY_POINTER + | ANY_VARIABLE + | ANY_EXPR + ''' + p[0] = p[1] + +#--------------------------------------------------------------------------- +# Pattern-matching rules: +#--------------------------------------------------------------------------- + +def p_statelist(p): + '''statelist : statename + | statename COMMA statelist + ''' + # e.g. + # ptr.unknown, ptr.null, ptr.nonnull + if len(p) == 2: + p[0] = [p[1]] + else: + p[0] = [p[1]] + p[3] + +def p_patternrulelist(p): + '''patternrulelist : patternrule + | patternrule PIPE patternrulelist + ''' + # e.g. + # { ptr == 0 } => true=ptr.null, false=ptr.nonnull + # | { ptr != 0 } => true=ptr.nonnull, false=ptr.null + if len(p) == 2: + p[0] = [p[1]] + else: + p[0] = [p[1]] + p[3] + +def p_statename(p): + '''statename : ID DOT ID + | ID + | ID DOT STAR + | STAR + ''' + if len(p) == 4: + p[0] = '%s.%s' % (p[1], p[3]) # FIXME + else: + p[0] = p[1] + +#--------------------------------------------------------------------------- +# Various kinds of pattern: +#--------------------------------------------------------------------------- + +def p_pattern_cpattern(p): + ''' + pattern : LBRACE cpattern RBRACE + ''' + # e.g. + # { ptr = malloc() } + p[0] = p[2] + +def p_pattern_namedpatternreference(p): + ''' + pattern : ID + ''' + # e.g. + # checked_against_0 + p[0] = NamedPatternReference(p[1]) + +def p_pattern_dollarpattern(p): + ''' + pattern : DOLLARPATTERN + ''' + # e.g. + # $leaked$ + p[0] = SpecialPattern.make(p[1]) + +def p_pattern_or(p): + ''' + pattern : pattern PIPE pattern + ''' + # e.g. + # $leaked$ | { x == 0 } + p[0] = OrPattern(p[1], p[3]) + +def p_patternrule(p): + ''' + patternrule : pattern ACTION outcomes + ''' + # e.g. "{ ptr = malloc() } => ptr.unknown" + # e.g. "$leaked$ => ptr.leaked" + p[0] = PatternRule(pattern=p[1], outcomes=p[3]) + +#--------------------------------------------------------------------------- +# Various kinds of "cpattern": +#--------------------------------------------------------------------------- + +def p_cpattern_assignment(p): + ''' + cpattern : ID ASSIGNMENT LITERAL_STRING + | ID ASSIGNMENT LITERAL_NUMBER + | ID ASSIGNMENT ID + ''' + # e.g. "q = 0" + p[0] = Assignment(lhs=p[1], rhs=p[3]) + +def p_cpattern_address_of(p): + 'cpattern : ID ASSIGNMENT AMPERSAND ID' + # e.g. "&var" + p[0] = AddressOf(lhs=p[1], rhs=p[4]) + +def p_cpattern_return(p): + ''' + cpattern : RETURN ID + | RETURN LITERAL_STRING + | RETURN LITERAL_NUMBER + | RETURN + ''' + # e.g. "return var" + if len(p) == 3: + p[0] = Return(retval=p[2]) + else: + p[0] = Return(retval=None) + +def p_cpattern_result_of_fn_call(p): + 'cpattern : ID ASSIGNMENT ID LPAREN fncall_args RPAREN' + # e.g. "ptr = malloc()" + p[0] = ResultOfFnCall(lhs=p[1], fnname=p[3], args=p[5]) + +def p_fncall_arg(p): + ''' + fncall_arg : ID + | LITERAL_STRING + | LITERAL_NUMBER + ''' + p[0] = p[1] + +def p_nonempty_fncall_args(p): + ''' + nonempty_fncall_args : fncall_arg + | fncall_args COMMA fncall_arg + ''' + if len(p) == 2: + p[0] = [p[1]] + else: + p[0] = p[1] + [p[3]] + +def p_fncall_args_from_nonempty(p): + ''' + fncall_args : nonempty_fncall_args + ''' + p[0] = p[1] + +def p_fncall_args_from_empty(p): + ''' + fncall_args : empty + ''' + p[0] = [] + +def p_cpattern_arg_of_fn_call(p): + 'cpattern : ID LPAREN fncall_args RPAREN' + # e.g. "free(ptr)" + p[0] = ArgsOfFnCall(fnname=p[1], args=p[3]) + +def p_cpattern_comparison(p): + ''' + cpattern : ID COMPARISON LITERAL_NUMBER + | ID COMPARISON ID + ''' + # e.g. "ptr == 0" + p[0] = Comparison(lhs=p[1], op=p[2], rhs=p[3]) + +def p_cpattern_dereference(p): + 'cpattern : STAR ID' + # e.g. "*ptr" + p[0] = VarDereference(var=p[2]) + +def p_cpattern_arraylookup(p): + 'cpattern : ID LSQUARE ID RSQUARE' + # e.g. "arr[x]" + p[0] = ArrayLookup(array=p[1], index=p[3]) + +def p_cpattern_usage(p): + 'cpattern : ID' + # e.g. "ptr" + p[0] = VarUsage(var=p[1]) + +#--------------------------------------------------------------------------- +# The various outcomes when a pattern matches +#--------------------------------------------------------------------------- + +def p_outcomes(p): + '''outcomes : outcome + | outcome COMMA outcomes''' + # e.g. "ptr.unknown" + if len(p) == 2: + p[0] = [p[1]] + else: + p[0] = [p[1]] + p[3] + +def p_outcome_newstate(p): + 'outcome : statename' + # e.g. "ptr.unknown" + p[0] = TransitionTo(statename=p[1]) + +def p_outcome_boolean_outcome(p): + '''outcome : TRUE ASSIGNMENT outcome + | FALSE ASSIGNMENT outcome''' + # e.g. "true=ptr.null" + p[0] = BooleanOutcome(guard=True if p[1] == 'true' else False, + outcome=p[3]) + +def p_outcome_python(p): + 'outcome : PYTHON' + # e.g. "{ error('use of possibly-NULL pointer %s' % ptr)}" + src, lineoffset = p[1] + if DEBUG_LINE_NUMBERING: + print("src.count('\\n') %i" % src.count('\n')) + p[0] = PythonOutcome(src=src, + lineoffset=lineoffset) + +############################################################################ +# Error-handling: +############################################################################ + +class ParserError(Exception): + @classmethod + def from_production(cls, p, val, msg): + return ParserError(p.lexer.lexdata, + p.lexer.lexpos - len(val), + val, + msg) + + @classmethod + def from_token(cls, t, msg="Parse error"): + return ParserError(t.lexer.lexdata, + t.lexer.lexpos - len(str(t.value)), + t.value, + msg) + + def __init__(self, input_, pos, value, msg): + self.input_ = input_ + self.filename = None + self.pos = pos + self.value = value + self.msg = msg + + # Locate the line with the error: + startidx = pos + endidx = pos + len(str(value)) + while startidx >= 1 and input_[startidx - 1] != '\n': + startidx -= 1 + while endidx < (len(input_) - 1) and input_[endidx + 1] != '\n': + endidx += 1 + self.errline = input_[startidx:endidx] + self.errpos = pos - startidx + self.lineno = input_[:startidx].count('\n') + + def __str__(self): + result = ('%s at "%s":\n%s\n%s' + % (self.msg, self.value, + self.errline, + ' '*self.errpos + '^'*len(str(self.value)))) + if self.filename: + result = ('\n%s:%i:%i: %s' + % (self.filename, + self.lineno + 1, self.errpos + 1, + result)) + return result + +def p_error(p): + raise ParserError.from_production(p, p.value, 'Parser error') + + +def _compile_python_fragments(ch): + """ + Precompile any PythonFragment instances within the ch + + We can't do this during construction because unfortunately both CPython + and ply use SyntaxError: a syntax error in embedded Python code will be + caught (and misunderstood) by ply. + + Hence we have to postprocess the ch tree after parsing + """ + class PythonVisitor: + def visit(self, node): + if isinstance(node, PythonFragment): + node.compile(ch.filename) + pv = PythonVisitor() + ch.accept(pv) + +############################################################################ +# Interface: +############################################################################ +# Entry points: +def parse_string(s): + if 0: + test_lexer(s) + if 0: + print(s) + parser = yacc.yacc(debug=0, write_tables=0) + ch = parser.parse(s)#, debug=1) + ch.filename = None + _compile_python_fragments(ch) + return ch + +def parse_file(filename): + parser = yacc.yacc(debug=0, write_tables=0) + with open(filename) as f: + s = f.read() + try: + ch = parser.parse(s)#, debug=1) + ch.filename = filename + _compile_python_fragments(ch) + return ch + except ParserError, err: + err.filename = filename + raise err + +def test_lexer(s): + print(s) + lexer.input(s) + while True: + tok = lexer.token() + if not tok: break + print tok + diff --git a/sm/reporter.py b/sm/reporter.py new file mode 100644 index 00000000..f688fdad --- /dev/null +++ b/sm/reporter.py @@ -0,0 +1,103 @@ +# Copyright 2012 David Malcolm +# Copyright 2012 Red Hat, Inc. +# +# This is free software: you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see +# . + +from collections import namedtuple +import hashlib +import json +import os +import sys + +import gccutils + +class Note(namedtuple('Note', ('gccloc', 'msg'))): + def as_json(self): + return dict(loc=gccloc_as_json(self.gccloc), + message=self.msg) + +def gccloc_as_json(gccloc): + return dict(givenfilename=gccloc.file, + actualfilename=os.path.abspath(gccloc.file), + line=gccloc.line, + column=gccloc.column) + +class Report: + def __init__(self, sm, err, notes): + self.sm = sm + self.err = err + self.notes = notes + + def as_json(self): + sm_as_json = dict(name=self.sm.name) + if self.err.sm_filename: + # Metadata about where in the sm script this error was emitted: + sm_as_json['filename'] = self.err.sm_filename + sm_as_json['line'] = self.err.sm_lineno + + jsonic = dict(sm=sm_as_json, + loc=gccloc_as_json(self.err.gccloc), + message=self.err.msg, + notes=[]) + if self.err.cwe: + jsonic['cwe'] = self.err.cwe + for note in self.notes: + jsonic['notes'].append(note.as_json()) + + return jsonic + +class Reporter: + def add(self, report): + raise NotImplementedError + +class StderrReporter(Reporter): + def __init__(self): + self.curfun = None + self.curfile = None + + def add(self, report): + err = report.err + gccloc = err.gccloc + if err.function != self.curfun or gccloc.file != self.curfile: + # Fake the function-based output + # e.g.: + # "tests/sm/examples/malloc-checker/input.c: In function 'use_after_free':" + sys.stderr.write("%s: In function '%s':\n" + % (gccloc.file, err.function.decl.name)) + self.curfun = err.function + self.curfile = gccloc.file + if report.err.cwe: + msg = '%s [%s]' % (report.err.msg, report.err.cwe) + else: + msg = report.err.msg + gccutils.error(report.err.gccloc, msg) + self.curfun = err.function + self.curfun = err.function + + for note in report.notes: + gccutils.inform(note.gccloc, note.msg) + +class JsonReporter(Reporter): + def add(self, report): + jsonobj = report.as_json() + jsonsrc = json.dumps(jsonobj, + sort_keys=True, + indent=4, separators=(',', ': ')) + + # Use the sha-1 hash of the report to create a unique filename: + hexdigest = hashlib.sha1(jsonsrc).hexdigest() + filename = report.err.gccloc.file + '.%s.sm.json' % hexdigest + with open(filename, 'w') as f: + f.write(jsonsrc) diff --git a/sm/solution.py b/sm/solution.py new file mode 100644 index 00000000..27bbc685 --- /dev/null +++ b/sm/solution.py @@ -0,0 +1,175 @@ +# Copyright 2012, 2013 David Malcolm +# Copyright 2012, 2013 Red Hat, Inc. +# +# This is free software: you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see +# . + + +# We could create a separate graph, but it's probably easier +# to simply add the state information directly to the node +# It's the dot visualization we want, though, that makes it easy to debug + +from gccutils import invoke_dot, get_src_for_loc +from gccutils.dot import Table, Tr, Td, Text, Br, Font +from gccutils.graph import Graph, Node, Edge + +import sm.dataflow +from sm.utils import Timer, stateset_to_str, equivcls_to_str + +class Solution: + def __init__(self, ctx): + self.ctxt = ctx + + def dump(self, out): + global _indent + _indent = 0 + def writeln(line, indent=0): + global _indent + _indent += indent + out.write(' ' * _indent) + out.write(line) + out.write('\n') + _indent -= indent + + writeln('SOLUTION FOR %s' % self.ctxt.sm.name) + _indent += 2 + + writeln('; underlying graph has: %i nodes %i edges' + % (len(self.ctxt.graph.nodes), + len(self.ctxt.graph.edges))) + + # 1st pass: enumerate nodes in topologically-sorted order: + nodes = self.ctxt.graph.topologically_sorted_nodes() + index_for_node = {} + node_for_index = {} + for i, node in enumerate(nodes): + index_for_node[node] = i + node_for_index[i] = node + + # 2nd pass: write out the nodes with their edges: + for i, node in enumerate(nodes): + # Write the node: + writeln('%i: %s' % (i, node)) + _indent += 4 + if node.stmt: + if node.stmt.loc: + writeln('src: %s: %s' % (node.stmt.loc, get_src_for_loc(node.stmt.loc))) + facts = self.ctxt.facts_for_node[node] + writeln('facts: %s' % facts) + if facts is not None: + writeln('partitions: {%s}' + % ', '.join(['{%s}' % ', '.join([str(expr) + for expr in equivcls]) + for equivcls in facts.get_equiv_classes()])) + # Write out state information from fixed point solver: + writeln('fixed point states:') + states = self.ctxt.states_for_node[node] + if states: + for equivcls in states._dict: + writeln('%s: %s' + % (equivcls_to_str(equivcls), + stateset_to_str(states._dict[equivcls])), + indent=2) + else: + writeln('None', indent=2) + + _indent -= 2 + + for edge in node.succs: + if edge.true_value: + boolstr = "true: " + elif edge.false_value: + boolstr = "false: " + else: + boolstr = "" + writeln('%sgoto %i' % (boolstr, index_for_node[edge.dstnode]), + indent=2) + possible_matches = self.ctxt.possible_matches_for_edge[edge] + if possible_matches: + writeln('possible matches:', + indent=4) + for match in possible_matches: + writeln(match.describe(self.ctxt), + indent=6) + _indent -= 2 + + def to_dot(self, name): + # (a handy debug method is essential) + # basically we want to reuse the underlying graph's to_dot, but + # use some diferent policy... + class SolutionRenderer: + def __init__(self, solution): + self.solution = solution + def node_to_dot_html(self, node): + # raise foo # FIXME: we'll annotate this: + + inner = node.to_dot_html(self) + table = Table(cellborder=1) + states = self.solution.ctxt.states_for_node[node] + if states: + for equivcls in states._dict: + tr = table.add_child(Tr()) + td = tr.add_child(Td(align='left')) + td.add_child(Text('%s: %s' + % (equivcls_to_str(equivcls), + stateset_to_str(states._dict[equivcls])))) + else: + tr = table.add_child(Tr()) + td = tr.add_child(Td(align='left')) + td.add_child(Text('NOT REACHED')) + + facts = self.solution.ctxt.facts_for_node[node] + if facts is not None: + for fact in facts.set_: + tr = table.add_child(Tr()) + td = tr.add_child(Td(align='left')) + td.add_child(Text('FACT: %s' % (fact, ))) + #td.add_child(Text('FACT: %r' % fact)) + else: + tr = table.add_child(Tr()) + td = tr.add_child(Td(align='left')) + td.add_child(Text('NO FACTS')) + tr = table.add_child(Tr()) + td = tr.add_child(Td(align='left')) + td.add_child(inner) + return table + + return self.ctxt.graph.to_dot(name, SolutionRenderer(self)) + + def get_shortest_path_to(self, dstnode, equivcls, state): + # backtrack from destination until you reach a srcnode whilst + # obeying various restrictions: + # * equivcls/states have to match (or have state transitions) + # * call stack has to be obeyed: return to correct caller + # * perhaps some simple rules about known "state", to suppress + # the most obvious false positives + + ctxt = self.ctxt + + ctxt.debug('get_shortest_path_to:') + ctxt.debug(' dstnode: %s', dstnode) + ctxt.debug(' equivcls: %s', equivcls) + ctxt.debug(' state: %s', state) + + ctxt.log('building error graph') + with ctxt.indent(): + expgraph = ctxt.expgraph + with Timer(ctxt, 'calculating shortest path through exploded graph'): + dstexpnode = expgraph.get_expnode_with_state(dstnode, equivcls, state) + if dstexpnode is None: + return None + srcexpnode = expgraph.get_entry_node() + + return expgraph.get_shortest_path(srcexpnode, dstexpnode) + diff --git a/sm/solver.py b/sm/solver.py new file mode 100644 index 00000000..f98326c3 --- /dev/null +++ b/sm/solver.py @@ -0,0 +1,1136 @@ +# Copyright 2012, 2013 David Malcolm +# Copyright 2012, 2013 Red Hat, Inc. +# +# This is free software: you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see +# . + +############################################################################ +# Solver: what states are possible at each location? +############################################################################ + +from collections import Counter +import sys + +import gcc + +from gccutils import DotPrettyPrinter, invoke_dot +from gccutils.dot import to_html +from gccutils.graph import Graph, Node, Edge +from gccutils.graph.stmtgraph import ExitNode, SplitPhiNode +from gccutils.graph.supergraph import \ + CallToReturnSiteEdge, CallToStart, ExitToReturnSite, \ + SupergraphNode, SupergraphEdge, CallNode, ReturnNode, FakeEntryEdge +from gccutils.graph.query import Query + +import sm.checker +from sm.checker import Match, BooleanOutcome, \ + Decl, NamedPattern, StateClause, \ + PythonFragment, PythonOutcome +import sm.dataflow +import sm.error +import sm.parser +from sm.reporter import StderrReporter, JsonReporter +import sm.solution +from sm.utils import Timer, simplify, stateset_to_str, equivcls_to_str + +VARTYPES = (gcc.VarDecl, gcc.ParmDecl, ) + +class State(object): + """ + States are normally just names (strings), but potentially can have extra + named attributes + """ + __slots__ = ('name', 'kwargs') + + def __init__(self, name, **kwargs): + self.name = name + self.kwargs = kwargs + + def __repr__(self): + if self.kwargs: + kwargs = ', '.join('%r=%r' % (k, v) + for k, v in self.kwargs.iteritems()) + return 'State(%r, %s)' % (self.name, kwargs) + else: + return 'State(%r)' % self.name + + def __str__(self): + if self.kwargs: + return repr(self) + else: + return self.name + + def __eq__(self, other): + if isinstance(other, State): + if self.name == other.name: + if self.kwargs == other.kwargs: + return True + + def __ne__(self, other): + return not (self == other) + + def __hash__(self): + result = hash(self.name) + for k, v in self.kwargs.iteritems(): + result ^= hash(k) ^ hash(v) + return result + + def __getattr__(self, name): + if name in self.kwargs: + return self.kwargs[name] + if name in self.__dict__: + return self.__dict[name] + raise AttributeError('%s' % name) + +class StateNameSet(frozenset): + __slots__ = ('has_wildcard', ) + + def __init__(self, statenames): + frozenset.__init__(self, statenames) + self.has_wildcard = False + for statename in statenames: + if statename.endswith('*'): + self.has_wildcard = True + break + + def __contains__(self, key): + if self.has_wildcard: + return True + return frozenset.__contains__(self, key) + +class PossibleMatch(object): + __slots__ = ('expr', + 'statenames', + 'sc', + 'pattern', + 'outcomes', + 'match') + + def __init__(self, expr, sc, pattern, outcomes, match): + self.expr = expr + self.statenames = StateNameSet(sc.statelist) + self.sc = sc + self.pattern = pattern + self.outcomes = outcomes + self.match = match + + def describe(self, ctxt): + stateliststr = ', '.join([str(state) + for state in self.statenames]) + outcomeliststr = ', '.join([str(outcome) + for outcome in self.outcomes]) + return '%r: %s => %s due to %s' % (str(self.expr), stateliststr, + outcomeliststr, self.pattern) + +def find_possible_matches(ctxt, edge): + result = [] + srcnode = edge.srcnode + stmt = srcnode.stmt + for sc in ctxt._stateclauses: + # Locate any rules that could apply, regardless of the current + # state: + for pr in sc.patternrulelist: + with ctxt.indent(): + # ctxt.debug('%r: %r', (srcshape, pr)) + # For now, skip interprocedural calls and the + # ENTRY/EXIT nodes: + if not stmt: + continue + # Now see if the rules apply for the current state: + ctxt.debug('considering pattern %s for stmt: %s', pr.pattern, stmt) + ctxt.debug('considering pattern %r for stmt: %r', pr.pattern, stmt) + for match in pr.pattern.iter_matches(stmt, edge, ctxt): + ctxt.debug('pr.pattern: %r', pr.pattern) + ctxt.debug('match: %r', match) + ctxt.debug('match.get_stateful_gccvar(ctxt): %r', match.get_stateful_gccvar(ctxt)) + + # Filter the list of outcomes to those that actually apply: + outcomes = [] + for outcome in pr.outcomes: + # Resolve any booleans for the edge, either going + # directly to the guarded outcome, or discarding + # this one: + if isinstance(outcome, BooleanOutcome): + if edge.true_value and outcome.guard: + outcome = outcome.outcome + elif edge.false_value and not outcome.guard: + outcome = outcome.outcome + else: + continue + outcomes.append(outcome) + + yield PossibleMatch(match.get_stateful_gccvar(ctxt), + sc, + pr.pattern, + outcomes, + match) + +class StatesForNode(sm.dataflow.AbstractValue): + """ + The possible states that data can be in at a particular node in the + graph, tracked by equivalence classes (which themselves come from + sm.facts.Facts for the node). + + * topmost value: None signifies "unreachable": empty set of possible + states + + * intermediate values: mapping from equivalence classes to sets of + possible states that the given equivalence class can be in + + * bottommost value: each mapping has the set of *all* possible states + + * the "meet" of two values is the set of all possible states from + either, hence keywise-union of the possible states for each + equivalence class. + """ + def __init__(self, node, _dict): + assert isinstance(node, SupergraphNode) + self.node = node + + # dict from equivcls to set of states + self._dict = _dict + + def __str__(self): + kvstrs = [] + for equivcls, states in self._dict.iteritems(): + kvstrs.append('%s=%s' % (equivcls_to_str(equivcls), + stateset_to_str(states))) + return '{%s}' % ', '.join(kvstrs) + + def __eq__(self, other): + if isinstance(other, StatesForNode): + if self.node == other.node: + return self._dict == other._dict + + def __ne__(self, other): + return not self == other + + def get_combo_count(self): + """ + How many possible subsets does this have? + """ + result = 1 + for equivcls, states in self._dict.iteritems(): + result *= len(states) + return result + + def get_equivcls_for_expr(self, ctxt, expr): + return ctxt.get_aliases(self.node, expr) + + def match_states_by_name(self, ctxt, expr, statenames): + equivcls = self.get_equivcls_for_expr(ctxt, expr) + if equivcls in self._dict: + # Do the state sets intersect? + # (returning the intersection, which will be true if non-empty) + result = [state + for state in self._dict[equivcls] + if state.name in statenames] + return frozenset(result) + + def get_states_for_expr(self, ctxt, expr): + equivcls = self.get_equivcls_for_expr(ctxt, expr) + if equivcls in self._dict: + return self._dict[equivcls] + return frozenset([ctxt.get_default_state()]) + + def is_subset_of(self, other): + assert isinstance(other, StatesForNode) + assert self.node == other.node + for equivcls, states in self._dict.iteritems(): + if not states.issubset(other._dict[equivcls]): + return False + return True + + def assign_to_from(self, ctxt, dstnode, lhs, rhs): + assert isinstance(dstnode, SupergraphNode) + result = self.propagate_to(ctxt, dstnode) + result._dict[ctxt.get_aliases(dstnode, lhs)] = \ + self.get_states_for_expr(ctxt, rhs) + return result + + def set_state_for_expr(self, ctxt, dstnode, expr, state): + assert isinstance(dstnode, SupergraphNode) + assert isinstance(state, State) + result = self.propagate_to(ctxt, dstnode) + result._dict[ctxt.get_aliases(dstnode, expr)] = frozenset([state]) + return result + + def propagate_to(self, ctxt, dstnode): + assert isinstance(dstnode, SupergraphNode) + _dict = {} + for equivcls, states in self._dict.iteritems(): + for expr in equivcls: + _dict[ctxt.get_aliases(dstnode, expr)] = states + return StatesForNode(dstnode, _dict) + + @classmethod + def make_entry_point(cls, ctxt, node): + _dict = {} + function = node.function + if function: + for expr in ctxt.smexprs[function]: + if isinstance(expr, (gcc.VarDecl, gcc.ParmDecl)): + _dict[ctxt.get_aliases(node, expr)] = \ + frozenset([ctxt.get_default_state()]) + return StatesForNode(node, _dict) + + @classmethod + def get_edge_value(cls, ctxt, srcvalue, edge): + assert isinstance(srcvalue, StatesForNode) # not None + srcnode = edge.srcnode + dstnode = edge.dstnode + stmt = srcnode.get_stmt() + ctxt.debug('edge from: %s', srcnode) + ctxt.debug(' to: %s', dstnode) + + # Handle interprocedural edges: + if isinstance(edge, CallToReturnSiteEdge): + # Ignore the intraprocedural edge for a function call: + return None, None + elif isinstance(edge, CallToStart): + # Alias the parameters with the arguments as necessary, so + # e.g. a function that free()s an arg has the caller's expr + # marked as free also: + assert isinstance(srcnode.stmt, gcc.GimpleCall) + # ctxt.debug(srcnode.stmt) + _dict = {} + for expr in ctxt.smexprs[dstnode.function]: + if isinstance(expr, gcc.VarDecl): + _dict[ctxt.get_aliases(dstnode, expr)] = \ + frozenset([ctxt.get_default_state()]) + for param, arg in zip(srcnode.stmt.fndecl.arguments, + srcnode.stmt.args): + # FIXME: change fndecl.arguments to fndecl.parameters + if 1: + ctxt.debug(' param: %r', param) + ctxt.debug(' arg: %r', arg) + #if ctxt.is_stateful_var(arg): + # shapechange.assign_var(param, arg) + arg = simplify(arg) + _dict[ctxt.get_aliases(dstnode, param)] = \ + srcvalue.get_states_for_expr(ctxt, arg) + return StatesForNode(dstnode, _dict), None + elif isinstance(edge, FakeEntryEdge): + _dict = {} + for expr in ctxt.smexprs[dstnode.function]: + if isinstance(expr, (gcc.VarDecl, gcc.ParmDecl)): + _dict[ctxt.get_aliases(dstnode, expr)] = \ + frozenset([ctxt.get_default_state()]) + return StatesForNode(dstnode, _dict), None + elif isinstance(edge, ExitToReturnSite): + # Propagate state through the return value: + # ctxt.debug('edge.calling_stmtnode: %s', edge.calling_stmtnode) + _dict = {} + if edge.calling_stmtnode.stmt.lhs: + exitsupernode = edge.srcnode + assert isinstance(exitsupernode.innernode, ExitNode) + retval = simplify(exitsupernode.innernode.returnval) + ctxt.debug('retval: %s', retval) + ctxt.debug('edge.calling_stmtnode.stmt.lhs: %s', + edge.calling_stmtnode.stmt.lhs) + _dict[ctxt.get_aliases(dstnode, simplify(edge.calling_stmtnode.stmt.lhs))] = \ + srcvalue.get_states_for_expr(ctxt, retval) + + # FIXME: we also need to backpatch the params, in case they've + # changed state + callsite = edge.dstnode.callnode.innernode + ctxt.debug('callsite: %s', callsite) + for param, arg in zip(callsite.stmt.fndecl.arguments, + callsite.stmt.args): + if 1: + ctxt.debug(' param: %r', param) + ctxt.debug(' arg: %r', arg) + _dict[ctxt.get_aliases(dstnode, simplify(arg))] = \ + srcvalue.get_states_for_expr(ctxt, simplify(param)) + return StatesForNode(dstnode, _dict), None + + matches = [] + + # Handle simple assignments so that variables inherit state: + if isinstance(stmt, gcc.GimpleAssign): + if 1: + ctxt.debug('gcc.GimpleAssign: %s', stmt) + ctxt.debug(' stmt.lhs: %r', stmt.lhs) + ctxt.debug(' stmt.rhs: %r', stmt.rhs) + ctxt.debug(' stmt.exprcode: %r', stmt.exprcode) + if stmt.exprcode == gcc.VarDecl: + lhs = simplify(stmt.lhs) + rhs = simplify(stmt.rhs[0]) + return srcvalue.assign_to_from(ctxt, dstnode, lhs, rhs), None + elif stmt.exprcode == gcc.ComponentRef: + # Field lookup + lhs = simplify(stmt.lhs) + compref = stmt.rhs[0] + if 1: + ctxt.debug('compref.target: %s', compref.target) + ctxt.debug('compref.field: %s', compref.field) + + # Do we already have a state for the field? + if ctxt.get_aliases(srcnode, compref) in srcvalue._dict: + return srcvalue.assign_to_from(ctxt, dstnode, lhs, compref), None + else: + # Inherit the state from the struct: + if ctxt.options.enable_log: + ctxt.log('%s inheriting states %s from "%s" via field "%s"', + lhs, + stateset_to_str(srcvalue.get_states_for_expr(ctxt, compref.target)), + compref.target, + compref.field) + return srcvalue.assign_to_from(ctxt, dstnode, lhs, compref.target), None + elif isinstance(stmt, gcc.GimplePhi): + if 1: + ctxt.debug('gcc.GimplePhi: %s', stmt) + ctxt.debug(' srcnode: %s', srcnode) + ctxt.debug(' srcnode: %r', srcnode) + ctxt.debug(' srcnode.innernode: %s', srcnode.innernode) + ctxt.debug(' srcnode.innernode: %r', srcnode.innernode) + assert isinstance(srcnode.innernode, SplitPhiNode) + rhs = simplify(srcnode.innernode.rhs) + ctxt.debug(' rhs: %r', rhs) + lhs = simplify(srcnode.stmt.lhs) + ctxt.debug(' lhs: %r', lhs) + return srcvalue.assign_to_from(ctxt, dstnode, lhs, rhs), None + + # Check to see if any of the precalculated matches from the sm script + # apply: + for pm in ctxt.possible_matches_for_edge[edge]: + if ctxt.options.enable_log: + ctxt.log('possible match: %s', pm.describe(ctxt)) + matchingstates = srcvalue.match_states_by_name(ctxt, pm.expr, pm.statenames) + if matchingstates: + if ctxt.options.enable_log: + ctxt.log('matchingstates: %s' % stateset_to_str(matchingstates)) + ctxt.log('got match in states %s of %s at %s', + stateset_to_str(matchingstates), + pm.describe(ctxt), + stmt) + + # What state changes happen in the outcomes? + # Apply the outcomes in order, merging the result of any + # that lead to state changes. + result = None + for state in matchingstates: + oldstate = state + for outcome in pm.outcomes: + if ctxt.options.enable_log: + ctxt.log('applying outcome to %s => %s with %s', + pm.expr, + outcome, + state) + effect = outcome.get_effect_for_state(ctxt, edge, + pm.match, state) + + # Update the effective state as seen by subsequent + # outcomes for this pattern match: + state = effect.dststate + + # Extract state changes from the effect: + newresult = srcvalue.set_state_for_expr(ctxt, + edge.dstnode, + pm.expr, + state) + ctxt.log('newresult: %s' % newresult) + result = StatesForNode.meet(ctxt, result, newresult) + + # "result" is now the merger of all possible result states + # for all input states + + # If none of the outcomes caused state changes, propagate + # them: + if result is None: + raise foo + #result = srcvalue.propagate_to(ctxt, edge.dstnode) + + ctxt.log('got result: %s', result) + return result, pm.match + else: + if ctxt.options.enable_log: + ctxt.log('matchingstates: %s', matchingstates) + ctxt.log('got match for wrong state {%s} for %s at %s', + stateset_to_str(srcvalue.get_states_for_expr(ctxt, pm.expr)), + pm.describe(ctxt), stmt) + + # Nothing matched: + return srcvalue.propagate_to(ctxt, dstnode), None + + @classmethod + def meet(cls, ctxt, lhs, rhs): + ctxt.log('meet of %s and %s', lhs, rhs) + if lhs is None: + return rhs + if rhs is None: + return lhs + assert isinstance(lhs, StatesForNode) + assert isinstance(rhs, StatesForNode) + assert lhs.node == rhs.node + _dict = lhs._dict.copy() + for expr, states in rhs._dict.iteritems(): + if expr in _dict: + _dict[expr] |= states + else: + _dict[expr] = states + return StatesForNode(lhs.node, _dict) + +def show_state_histogram(ctxt): + # Show an ASCII-art histogram to analyze how many state combinations + # there are: how many nodes have each number of valid state + # combinations (including None) + cnt = Counter() + for node in ctxt.graph.nodes: + states = ctxt.states_for_node[node] + if states: + cnt[states.get_combo_count()] += 1 + else: + cnt[None] += 1 + extent = cnt.most_common(1)[0][1] + scale = 40.0 / extent + ctxt.timing('%6s : %5s :', 'COMBOS', 'NODES') + for key in sorted(cnt.keys()): + ctxt.timing('%6s : %5s : %s', + key, cnt[key], + '*' * int(cnt[key] * scale)) + +def generate_errors_from_fixed_point(ctxt): + """ + Rerun all reachable matches on the fixed point states in order to allow + any Python fragments to emit any errors on the reachable states. + + The errors are added to ctxt.errors_from_fixed_point + """ + for node in ctxt.graph.nodes: + ctxt.debug('analyzing node: %s', node) + states = ctxt.states_for_node[node] + if states is None: + continue + with ctxt.indent(): + stmt = node.stmt + for edge in node.succs: + ctxt.debug('analyzing out-edge: %s', edge) + with ctxt.indent(): + for pm in ctxt.possible_matches_for_edge[edge]: + if ctxt.options.enable_log: + ctxt.debug('possible match: %s', pm.describe(ctxt)) + matchingstates = states.match_states_by_name(ctxt, pm.expr, pm.statenames) + if matchingstates: + if ctxt.options.enable_log: + ctxt.debug('matchingstates: %s' % stateset_to_str(matchingstates)) + ctxt.debug('got match in states %s of %s at %s', + stateset_to_str(matchingstates), + pm.describe(ctxt), + stmt) + for outcome in pm.outcomes: + for state in matchingstates: + if ctxt.options.enable_log: + ctxt.debug('applying outcome to %s => %s with %s', + pm.expr, + outcome, + states) + effect = outcome.get_effect_for_state(ctxt, edge, + pm.match, state) + # Extract errors from the effect: + for err in effect.errors: + ctxt.errors_from_fixed_point.add(err) + +class Context(object): + # An sm.checker.Sm (do we need any other context?) + + # in context, with a mapping from its vars to gcc.VarDecl + # (or ParmDecl) instances + __slots__ = ('options', + 'ch', + 'sm', + 'graph', + 'statenames', + + # A mapping from str (decl names) to Decl instances: + '_decls', + + # The stateful decl, if any: + '_stateful_decl', + + # A mapping from str (pattern names) to NamedPattern + # instances: + '_namedpatterns', + + # All StateClause instance, in order: + '_stateclauses', + + # Does any Python code call set_state()? + # (If so, we can't detect unreachable states) + '_uses_set_state', + + '_indent', + + # State instance for the default state + '_default_state', + + 'errors_from_fixed_point', + + 'python_locals', + 'python_globals', + + 'allexprs', + 'smexprs', + + 'facts_for_node', + 'leaks_for_edge', + 'possible_matches_for_edge', + 'states_for_node', + 'expgraph', + 'facts_for_expnode', + '_errors', + ) + + def __init__(self, ch, sm, graph, options): + self.options = options + + self.ch = ch + self.sm = sm + self.graph = graph + + # The Context caches some information about the sm to help + # process it efficiently: + # + # all state names: + self.statenames = list(sm.iter_statenames()) + + # a mapping from str (decl names) to Decl instances + self._decls = {} + + # the stateful decl, if any: + self._stateful_decl = None + + # a mapping from str (pattern names) to NamedPattern instances + self._namedpatterns = {} + + # all StateClause instance, in order: + self._stateclauses = [] + + # Does any Python code call set_state()? + # (If so, we can't detect unreachable states) + self._uses_set_state = False + + self._indent = 0 + + # Set up self._decls and self._stateful_decl: + for clause in sm.clauses: + if isinstance(clause, Decl): + self._decls[clause.name] = clause + if clause.has_state: + self._stateful_decl = clause + + self._default_state = State(self.get_default_statename()) + + reachable_statenames = set([self.get_default_statename()]) + + # Set up the other above attributes: + for clause in sm.clauses: + if isinstance(clause, Decl): + self._decls[clause.name] = clause + if clause.has_state: + self._stateful_decl = clause + elif isinstance(clause, NamedPattern): + self._namedpatterns[clause.name] = clause + elif isinstance(clause, PythonFragment): + if 'set_state' in clause.src: + self._uses_set_state = True + elif isinstance(clause, StateClause): + self._stateclauses.append(clause) + for pr in clause.patternrulelist: + for outcome in pr.outcomes: + for statename in outcome.iter_reachable_statenames(): + reachable_statenames.add(statename) + if isinstance(outcome, PythonOutcome): + if 'set_state' in outcome.src: + self._uses_set_state = True + + # 2nd pass: validate the sm: + for clause in sm.clauses: + if isinstance(clause, StateClause): + for statename in clause.statelist: + if statename.endswith('*'): + continue + if statename not in reachable_statenames \ + and not self._uses_set_state: + class UnreachableState(Exception): + def __init__(self, statename): + self.statename = statename + def __str__(self): + return str(self.statename) + raise UnreachableState(statename) + + self.errors_from_fixed_point = set() + + # Run any initial python code: + self.python_locals = {} + self.python_globals = {} + for clause in sm.clauses: + if isinstance(clause, PythonFragment): + code = clause.get_code() + result = eval(code, self.python_globals, self.python_locals) + + def __repr__(self): + return 'Context(%r)' % (self.statenames, ) + + def indent(self): + class IndentCM: + # context manager for indenting/outdenting the log + def __init__(self, ctxt): + self.ctxt = ctxt + + def __enter__(self): + self.ctxt._indent += 1 + + def __exit__(self, exc_type, exc_value, traceback): + self.ctxt._indent -= 1 + return IndentCM(self) + + def _get_indent(self): + # Indent by the stack depth plus self._indent: + depth = 0 + f = sys._getframe() + while f: + depth += 1 + f = f.f_back + return ' ' * (depth + self._indent) + + def timing(self, msg, *args): + # Highest-level logging: how long does each stage take to run? + if self.options.enable_timing: + formattedmsg = msg % args + sys.stderr.write('TIMING: %s: %s%s\n' + % (self.sm.name, self._get_indent(), formattedmsg)) + + def log(self, msg, *args): + # High-level logging + if self.options.enable_log: + formattedmsg = msg % args + sys.stderr.write('LOG : %s: %s%s\n' + % (self.sm.name, self._get_indent(), formattedmsg)) + + def debug(self, msg, *args): + # Lower-level logging + if self.options.enable_debug: + formattedmsg = msg % args + sys.stderr.write('DEBUG: %s: %s%s\n' + + % (self.sm.name, self._get_indent(), formattedmsg)) + + def lookup_decl(self, declname): + class UnknownDecl(Exception): + def __init__(self, declname): + self.declname = declname + def __str__(self): + return repr(declname) + if declname not in self._decls: + raise UnknownDecl(declname) + return self._decls[declname] + + def lookup_pattern(self, patname): + '''Lookup a named pattern''' + class UnknownNamedPattern(Exception): + def __init__(self, patname): + self.patname = patname + def __str__(self): + return repr(patname) + if patname not in self._namedpatterns: + raise UnknownNamedPattern(patname) + return self._namedpatterns[patname] + + def emit_errors(self, solution): + if self.options.dump_json: + reporter = JsonReporter() + else: + reporter = StderrReporter() + + for err in sorted(self._errors): + report = err.make_report(self, solution) + if report: + reporter.add(report) + + def compare(self, gccexpr, smexpr): + if 0: + self.debug(' compare(%r, %r)', gccexpr, smexpr) + + if isinstance(gccexpr, (gcc.VarDecl, gcc.ParmDecl, gcc.SsaName)): + #if gccexpr == self.var: + # self.debug '%r' % self.sm.varclauses.name + #if smexpr == self.sm.varclauses.name: + if isinstance(smexpr, str): + decl = self.lookup_decl(smexpr) + if decl.matched_by(gccexpr): + return gccexpr + + if isinstance(gccexpr, gcc.IntegerCst): + if isinstance(smexpr, (int, long)): + if gccexpr.constant == smexpr: + return gccexpr + if isinstance(smexpr, str): + decl = self.lookup_decl(smexpr) + if decl.matched_by(gccexpr): + return gccexpr + + if isinstance(gccexpr, gcc.AddrExpr): + # Dereference: + return self.compare(gccexpr.operand, smexpr) + + if isinstance(gccexpr, gcc.ComponentRef): + # Dereference: + return self.compare(gccexpr.target, smexpr) + + return None + + def get_default_statename(self): + return '%s.start' % self._stateful_decl.name + + def get_default_state(self): + return self._default_state + + def is_stateful_var(self, gccexpr): + ''' + Is this gcc.Tree of a kind that has state according to the current sm? + ''' + if isinstance(gccexpr, gcc.SsaName): + if isinstance(gccexpr.type, gcc.PointerType): + # TODO: the sm may impose further constraints + return True + + def find_scopes(self): + """ + Set up per-function dictionaries on the Context: + + * allexprs: the set of all tree expressions visible in that + function + + * smexprs: the subset of the above that match the stateful sm + expression type + """ + self.allexprs = {} + self.smexprs = {} + for function in self.graph.get_functions(): + smexprs = set() + allexprs = set() + def add_to_scope(node): + if isinstance(node, gcc.FunctionDecl): + return + if isinstance(node, gcc.SsaName): + add_to_scope(node.var) + + if isinstance(node, (gcc.VarDecl, gcc.ParmDecl, gcc.ComponentRef)): + allexprs.add(node) + if self._stateful_decl.matched_by(node): + smexprs.add(node) + + for bb in function.cfg.basic_blocks: + if bb.gimple: + for stmt in bb.gimple: + stmt.walk_tree(add_to_scope) + self.allexprs[function] = allexprs + self.smexprs[function] = smexprs + + def get_aliases(self, node, expr): + facts = self.facts_for_node[node] + if facts is not None: + return facts.get_aliases(expr) + else: + return frozenset([expr]) + + def solve(self, name): + # Preprocessing phase: identify the scope of expressions within each + # function + with Timer(self, 'find_scopes'): + self.find_scopes() + + # Preprocessing phase: gather simple per-node "facts", for use in + # giving better names for temporaries, and for identifying the return + # values of functions + from sm.facts import Facts + with Timer(self, 'sm.dataflow.fixed_point_solver(Facts)'): + self.facts_for_node = sm.dataflow.fixed_point_solver(self, self.graph, Facts) + + # Preprocessing: set up possible_matches_for_edge dict: + with Timer(self, 'find_possible_matches'): + self.possible_matches_for_edge = {} + for edge in self.graph.edges: + self.possible_matches_for_edge[edge] = \ + list(find_possible_matches(self, edge)) + + # Find the all possible states reachable for each in-scope expr at + # each node: + with Timer(self, 'sm.dataflow.fixed_point_solver(StatesForNode)'): + self.states_for_node = sm.dataflow.fixed_point_solver(self, self.graph, StatesForNode) + + if self.options.enable_timing: + show_state_histogram(self) + + self.timing('len(graph.nodes): %i', len(self.graph.nodes)) + self.timing('len(graph.edges): %i', len(self.graph.edges)) + + solution = sm.solution.Solution(self) + + # Generate self.errors_from_fixed_point: + with Timer(self, 'generate_errors'): + generate_errors_from_fixed_point(self) + self.timing('len(self.errors_from_fixed_point): %i', len(self.errors_from_fixed_point)) + + self._errors = list(self.errors_from_fixed_point) + + # Don't bother setting up error-reporting if there were no errors + # to report: + if len(self._errors) == 0: + return solution + + # Work-in-progress: + # Build exploded graph: + with Timer(self, 'build_exploded_graph'): + from sm.expgraph import build_exploded_graph + self.expgraph = build_exploded_graph(self) + + self.timing('len(expgraph.nodes): %i', len(self.expgraph.nodes)) + self.timing('len(expgraph.edges): %i', len(self.expgraph.edges)) + + self.facts_for_expnode = None + + if self.options.show_exploded_graph: + from gccutils import invoke_dot + dot = self.expgraph.to_dot('exploded_graph', self) + # Debug: view the exploded graph: + if 0: + ctxt.debug(dot) + invoke_dot(dot, 'exploded_graph') + + # Now prune the exploded graph, removing impossible paths: + with Timer(self, 'pruning exploded graph'): + from sm.facts import remove_impossible, Facts + + with Timer(self, 'sm.dataflow.fixed_point_solver(Facts for expgraph)'): + self.facts_for_expnode = \ + sm.dataflow.fixed_point_solver(self, + self.expgraph, + Facts) + + # Remove nodes that the factfinder thinks are unreachable: + with Timer(self, 'removing unreachable expnodes'): + pruned = 0 + for expnode in list(self.expgraph.nodes): + if self.facts_for_expnode[expnode] is None: + pruned += self.expgraph.remove_node(expnode) + self.timing('pruned %i expnodes', pruned) + + if self.options.show_exploded_graph: + from gccutils import invoke_dot + dot = self.expgraph.to_dot('pruned_graph', self) + invoke_dot(dot, 'pruned_graph') + + return solution + + ####################################################################### + # Utility methods for writing selftests + ####################################################################### + def _is_within(self, node, within): + if within: + if node.function: + if node.function.decl.name == within: + return True + return False + return True + + def _error_at_node(self, node): + if node.stmt: + if node.stmt.loc: + gcc.set_location(node.stmt.loc) + + def get_nodes(self): + query = Query(self.graph) + return query + + def find_call_of(self, funcname, within=None): + query = Query(self.graph).get_calls_of(funcname) + if within: + query = query.within(funcname=within) + return query.first() + + def find_implementation_of(self, funcname): + for fun in self.graph.stmtg_for_fun: + if fun.decl.name == funcname: + return self.graph.supernode_for_stmtnode[self.graph.stmtg_for_fun[fun].entry] + raise ValueError('implementation of %s() not found' % funcname) + + def find_exit_of(self, funcname): + for fun in self.graph.stmtg_for_fun: + if fun.decl.name == funcname: + return self.graph.supernode_for_stmtnode[self.graph.stmtg_for_fun[fun].exit] + raise ValueError('implementation of %s() not found' % funcname) + + def find_comparison_against(self, exprcode, const, within=None): + for node in self.graph.nodes: + if not self._is_within(node, within): + continue + stmt = node.stmt + if isinstance(stmt, gcc.GimpleCond): + if stmt.exprcode == exprcode: + if isinstance(stmt.rhs, gcc.Constant): + if stmt.rhs.constant == const: + return node + raise ValueError('comparison %s %s not found' % (exprcode, const)) + + def get_inedge(self, node): + if len(node.preds) > 1: + self._error_at_node(node) + raise ValueError('node %s has more than one inedge' % node) + return list(node.preds)[0] + + def get_successor(self, node): + if len(node.succs) > 1: + self._error_at_node(node) + raise ValueError('node %s has more than one successor' % node) + return list(node.succs)[0].dstnode + + def get_true_successor(self, node): + assert isinstance(node.stmt, gcc.GimpleCond) + for edge in node.succs: + if edge.true_value: + return edge.dstnode + self._error_at_node(node) + raise ValueError('could not find true successor of node %s' % node) + + def get_intraprocedural_successor(self, node): + """ + Given a callsite, get the next node within that function + i.e. the second half of the callsite: wrapping the assignment of the + return value to the LHS + """ + assert isinstance(node, CallNode) + assert isinstance(node.stmt, gcc.GimpleCall) + for edge in node.succs: + if isinstance(edge, CallToReturnSiteEdge): + assert isinstance(edge.dstnode, ReturnNode) + assert isinstance(edge.dstnode.stmt, gcc.GimpleCall) + return edge.dstnode + self._error_at_node(node) + raise ValueError('could not find intraprocedural successor of node %s' + % node) + + def find_var(self, node, varname): + for var in self.allexprs[node.function]: + if isinstance(var, (gcc.VarDecl, gcc.ParmDecl)): + if var.name == varname: + return var + self._error_at_node(node) + raise ValueError('variable %s not found' % varname) + + def get_expr_by_str(self, node, exprstr): + for expr in self.allexprs[node.function]: + if str(expr) == exprstr: + return expr + self._error_at_node(node) + raise ValueError('expression %s not found' % exprstr) + + def assert_fact(self, node, lhs, op, rhs): + from sm.facts import Fact + if isinstance(lhs, str): + lhs = self.get_expr_by_str(node, lhs) + expectedfact = Fact(lhs, op, rhs) + actualfacts = self.facts_for_node[node] + if expectedfact not in actualfacts.set_: + self._error_at_node(node) + raise ValueError('%s not in %s' % (expectedfact, actualfacts)) + + def assert_no_facts(self, node): + actualfacts = self.facts_for_node[node] + if actualfacts.set_: + raise ValueError('unexpectedly found facts: %s' % (actualfacts, )) + + def assert_not_fact(self, node, lhs, op, rhs): + from sm.facts import Fact + if isinstance(lhs, str): + lhs = self.get_expr_by_str(node, lhs) + expectedfact = Fact(lhs, op, rhs) + actualfacts = self.facts_for_node[node] + if expectedfact in actualfacts.set_: + self._error_at_node(node) + raise ValueError('%s unexpectedly within %s' % (expectedfact, actualfacts)) + + def assert_states_for_expr(self, node, expr, expectedstates): + expr = simplify(expr) + actualstates = self.states_for_node[node].get_states_for_expr(self, expr) + if actualstates != expectedstates: + self._error_at_node(node) + raise ValueError('wrong states for %s at %r: expected %s but got %s' + % (expr, + str(node), + stateset_to_str(expectedstates), + stateset_to_str(actualstates))) + + def assert_states_for_varname(self, node, varname, expectedstates): + var = self.find_var(node, varname) + self.assert_states_for_expr(node, var, expectedstates) + + def assert_statenames_for_expr(self, node, expr, expectedstatenames): + expectedstates = set([State(name) + for name in expectedstatenames]) + self.assert_states_for_expr(node, expr, expectedstates) + + def assert_statenames_for_varname(self, node, varname, expectedstatenames): + var = self.find_var(node, varname) + self.assert_statenames_for_expr(node, var, expectedstatenames) + + def assert_error_is_impossible(self, err, solution): + stateful_gccvar = err.match.get_stateful_gccvar(self) + equivcls = self.get_aliases(err.srcnode, stateful_gccvar) + path = solution.get_shortest_path_to(err.srcnode, + equivcls, + err.state) + if path is not None: + raise ValueError('expected %r to be impossible due to there' + ' being no possible path to %s:%s at %s\n' + 'but found path: %s' + % (err.msg, equivcls, err.state, err.srcnode, + path)) + + def assert_edge_matches_pattern(self, edge, patternsrc): + for pm in self.possible_matches_for_edge[edge]: + if patternsrc == str(pm.match.pattern): + # We have a match + return pm + srcs = [str(pm.match.pattern) + for pm in edge.possible_matches] + raise ValueError('pattern %r not found in %s' + % (patternsrc, srcs)) + +def solve(ctxt, name, selftest): + ctxt.log('running %s', ctxt.sm.name) + ctxt.log('len(ctxt.graph.nodes): %i', len(ctxt.graph.nodes)) + ctxt.log('len(ctxt.graph.edges): %i', len(ctxt.graph.edges)) + with Timer(ctxt, 'generating solution'): + solution = ctxt.solve(name) + if ctxt.options.dump_solution: + solution.dump(sys.stderr) + if ctxt.options.show_solution: + dot = solution.to_dot(name) + # Debug: view the solution: + if 0: + ctxt.debug(dot) + invoke_dot(dot, name) + + ctxt.log('len(ctxt._errors): %i', len(ctxt._errors)) + + # Now report the errors, grouped by function, and in source order: + ctxt._errors.sort() + + with Timer(ctxt, 'emitting errors'): + ctxt.emit_errors(solution) + + if selftest: + selftest(ctxt, solution) diff --git a/sm/utils.py b/sm/utils.py new file mode 100644 index 00000000..152ca289 --- /dev/null +++ b/sm/utils.py @@ -0,0 +1,83 @@ +# Copyright 2012, 2013 David Malcolm +# Copyright 2012, 2013 Red Hat, Inc. +# +# This is free software: you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see +# . + +############################################################################ +# Various small utility classes and functions +############################################################################ + +import time + +import gcc + +class Timer: + """ + Context manager for logging the start/finish of a particular activity + and how long it takes + """ + def __init__(self, ctxt, name): + self.ctxt = ctxt + self.name = name + self.starttime = time.time() + + def get_elapsed_time(self): + """Get elapsed time in seconds as a float""" + curtime = time.time() + return curtime - self.starttime + + def elapsed_time_as_str(self): + """Get elapsed time as a string (with units)""" + elapsed = self.get_elapsed_time() + result = '%0.3f seconds' % elapsed + if elapsed > 120: + result += ' (%i minutes)' % int(elapsed / 60) + return result + + def __enter__(self): + self.ctxt.timing('START: %s', self.name) + self.ctxt._indent += 1 + + def __exit__(self, exc_type, exc_value, traceback): + self.ctxt._indent -= 1 + self.ctxt.timing('%s: %s TIME TAKEN: %s', + 'STOP' if exc_type is None else 'ERROR', + self.name, + self.elapsed_time_as_str()) + +def simplify(gccexpr): + if isinstance(gccexpr, gcc.SsaName): + return gccexpr.var + return gccexpr + +def stateset_to_str(states): + return '{%s}' % ', '.join([str(state) for state in states]) + +def equivcls_to_str(equivcls): + if equivcls is None: + return 'None' + return '{%s}' % ', '.join([str(expr) for expr in equivcls]) + +def get_retval_aliases(ctxt, supernode): + exitstmtnode = supernode.stmtg.exit + retval = exitstmtnode.returnval + if retval is None: + # No return value + return frozenset() + + retval = simplify(retval) + ctxt.debug('retval: %s', retval) + exitsupernode = ctxt.graph.supernode_for_stmtnode[exitstmtnode] + return ctxt.get_aliases(exitsupernode, retval) diff --git a/tests/gccutils/graph/input.c b/tests/gccutils/graph/input.c new file mode 100644 index 00000000..8b137891 --- /dev/null +++ b/tests/gccutils/graph/input.c @@ -0,0 +1 @@ + diff --git a/tests/gccutils/graph/metadata.ini b/tests/gccutils/graph/metadata.ini new file mode 100644 index 00000000..ba7428ca --- /dev/null +++ b/tests/gccutils/graph/metadata.ini @@ -0,0 +1,4 @@ +[ExpectedBehavior] +# This test case emits warnings on stderr; +# don't treat the stderr output as leading to an expected failure: +exitcode = 0 diff --git a/tests/gccutils/graph/script.py b/tests/gccutils/graph/script.py new file mode 100644 index 00000000..5d091aef --- /dev/null +++ b/tests/gccutils/graph/script.py @@ -0,0 +1,173 @@ +# -*- coding: utf-8 -*- +# Copyright 2012 David Malcolm +# Copyright 2012 Red Hat, Inc. +# +# This is free software: you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see +# . + +# Note: the line diagrams in the comments use the +# Unicode "Box Drawing" characters: +# ─ : U+2500 BOX DRAWINGS LIGHT HORIZONTAL +# │ : U+2502 BOX DRAWINGS LIGHT VERTICAL +# ┐ : U+2510 BOX DRAWINGS LIGHT DOWN AND LEFT +# └ : U+2514 BOX DRAWINGS LIGHT UP AND RIGHT +# ┘ : U+2518 BOX DRAWINGS LIGHT UP AND LEFT +# (the arrows are the greater than/less than and the letters A and V) + +import unittest + +from gccutils.graph import Graph, Node, Edge + +class NamedNode(Node): + def __init__(self, name=None): + Node.__init__(self) + self.name = name + + def __str__(self): + if self.name: + return self.name + return 'node' + + def __repr__(self): + return '%r' % self.name + +def make_trivial_graph(): + """ + Construct a trivial graph: + a ─> b + """ + g = Graph() + a = g.add_node(NamedNode('a')) + b = g.add_node(NamedNode('b')) + ab = g.add_edge(a, b) + return g, a, b, ab + +def add_long_path(g, length): + """ + Construct a path of the form: + first -> n0 -> n1 -> .... -> last + where there are "length" edges + """ + first = g.add_node(Node()) + last = first + cur = first + for i in range(length): + last = g.add_node(Node()) + g.add_edge(cur, last) + cur = last + return first, last + +def add_cycle(g, length): + """ + Construct a cycle of the form + first ─> n0 ─> n1 ─> ... ─> nN ┐ + A │ + └────────────────────────────┘ + where there are "length" edges + """ + assert length > 0 + first, last = add_long_path(g, length - 1) + g.add_edge(last, first) + return first + +class GraphTests(unittest.TestCase): + def test_to_dot(self): + g, a, b, ab = make_trivial_graph() + dot = g.to_dot('example') + + def test_long_path(self): + LENGTH = 1000 + g = Graph() + first, last = add_long_path(g, LENGTH) + self.assertEqual(len(g.edges), LENGTH) + self.assertEqual(len(g.nodes), LENGTH + 1) + dot = g.to_dot('example') + + def test_cycle(self): + LENGTH = 5 + g = Graph() + first = add_cycle(g, LENGTH) + self.assertEqual(len(g.edges), LENGTH) + self.assertEqual(len(g.nodes), LENGTH) + dot = g.to_dot('example') + +class PathfindingTests(unittest.TestCase): + def test_no_path(self): + g = Graph() + a = g.add_node(Node()) + b = g.add_node(Node()) + # no edges between them + path = g.get_shortest_path(a, b) + self.assertEqual(path, []) # FIXME: shouldn't this be None? + + def test_trivial_path(self): + g, a, b, ab = make_trivial_graph() + path = g.get_shortest_path(a, b) + self.assertEqual(path, [ab]) + + def test_long_path(self): + # Verify that get_shortest_path() can handle reasonably-sized graphs: + #LENGTH = 100 + LENGTH = 10000 + g = Graph() + first, last = add_long_path(g, LENGTH) + path = g.get_shortest_path(first, last) + self.assertEqual(len(path), LENGTH) + self.assertEqual(path[0].srcnode, first) + self.assertEqual(path[-1].dstnode, last) + + def test_cycles(self): + LENGTH = 5 + g = Graph() + a = add_cycle(g, LENGTH) + b = add_cycle(g, LENGTH) + c = add_cycle(g, LENGTH) + ab = g.add_edge(a, b) + bc = g.add_edge(b, c) + path = g.get_shortest_path(a, c) + self.assertEqual(len(path), 2) + p0, p1 = path + self.assertEqual(p0, ab) + self.assertEqual(p1, bc) + + def test_fork(self): + # Verify that it figures out the shortest path for: + # a ─> b─┬─> c ─> d ─┬─> f + # └─> e ──────┘ + g, a, b, ab = make_trivial_graph() + + c = g.add_node(NamedNode('c')) + bc = g.add_edge(b, c) + + d = g.add_node(NamedNode('d')) + cd = g.add_edge(c, d) + + e = g.add_node(NamedNode('e')) + be = g.add_edge(b, e) + + f = g.add_node(NamedNode('f')) + df = g.add_edge(d, f) + ef = g.add_edge(e, f) + + path = g.get_shortest_path(a, f) + self.assertEqual(len(path), 3) + p0, p1, p2 = path + self.assertEqual(p0, ab) + self.assertEqual(p1, be) + self.assertEqual(p2, ef) + +import sys +sys.argv = ['foo', '-v'] + +unittest.main() diff --git a/tests/gccutils/graph/stderr.txt b/tests/gccutils/graph/stderr.txt new file mode 100644 index 00000000..3885e472 --- /dev/null +++ b/tests/gccutils/graph/stderr.txt @@ -0,0 +1,13 @@ +test_cycle (__main__.GraphTests) ... ok +test_long_path (__main__.GraphTests) ... ok +test_to_dot (__main__.GraphTests) ... ok +test_cycles (__main__.PathfindingTests) ... ok +test_fork (__main__.PathfindingTests) ... ok +test_long_path (__main__.PathfindingTests) ... ok +test_no_path (__main__.PathfindingTests) ... ok +test_trivial_path (__main__.PathfindingTests) ... ok + +---------------------------------------------------------------------- +Ran 8 tests in #s + +OK diff --git a/tests/gccutils/graph/stdout.txt b/tests/gccutils/graph/stdout.txt new file mode 100644 index 00000000..e69de29b diff --git a/tests/nextsteps/synthetic/buffer-overflow-in-loop/input.c b/tests/nextsteps/synthetic/buffer-overflow-in-loop/input.c new file mode 100644 index 00000000..7ae82965 --- /dev/null +++ b/tests/nextsteps/synthetic/buffer-overflow-in-loop/input.c @@ -0,0 +1,45 @@ +/* + Copyright 2012 David Malcolm + Copyright 2012 Red Hat, Inc. + + This is free software: you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + . +*/ + +#include + +/* + Ensure that the checker complains about a buffer overflow in a loop + (due to an off-by-one) +*/ + +void test(void) +{ + char buf[4096]; + int i; + + /* BUG: condition should have been <, not <=, so it will + write one past the end of the array: */ + for (i = 0; i <= 4096; i++) { + buf[i] = 42; + } +} + +/* + PEP-7 +Local variables: +c-basic-offset: 4 +indent-tabs-mode: nil +End: +*/ diff --git a/tests/nextsteps/synthetic/buffer-overflow-in-loop/script.py b/tests/nextsteps/synthetic/buffer-overflow-in-loop/script.py new file mode 100644 index 00000000..fca3cbcd --- /dev/null +++ b/tests/nextsteps/synthetic/buffer-overflow-in-loop/script.py @@ -0,0 +1,21 @@ +# -*- coding: utf-8 -*- +# Copyright 2012 David Malcolm +# Copyright 2012 Red Hat, Inc. +# +# This is free software: you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see +# . + +from libcpychecker.constraints import main + +main() diff --git a/tests/nextsteps/synthetic/buffer-overflow-in-loop/stderr.txt b/tests/nextsteps/synthetic/buffer-overflow-in-loop/stderr.txt new file mode 100644 index 00000000..8b137891 --- /dev/null +++ b/tests/nextsteps/synthetic/buffer-overflow-in-loop/stderr.txt @@ -0,0 +1 @@ + diff --git a/tests/nextsteps/synthetic/buffer-overflow-in-loop/stdout.txt b/tests/nextsteps/synthetic/buffer-overflow-in-loop/stdout.txt new file mode 100644 index 00000000..8b137891 --- /dev/null +++ b/tests/nextsteps/synthetic/buffer-overflow-in-loop/stdout.txt @@ -0,0 +1 @@ + diff --git a/tests/nextsteps/synthetic/list-of-random-ints/input.c b/tests/nextsteps/synthetic/list-of-random-ints/input.c new file mode 100644 index 00000000..a9830848 --- /dev/null +++ b/tests/nextsteps/synthetic/list-of-random-ints/input.c @@ -0,0 +1,22 @@ +#include + +PyObject * +make_a_list_of_random_ints_badly(PyObject *self, + PyObject *args) +{ + PyObject *list, *item; + long count, i; + + if (!PyArg_ParseTuple(args, "i", &count)) { + return NULL; + } + + list = PyList_New(0); + + for (i = 0; i < count; i++) { + item = PyLong_FromLong(random()); + PyList_Append(list, item); + } + + return list; +} diff --git a/tests/nextsteps/synthetic/list-of-random-ints/metadata.ini b/tests/nextsteps/synthetic/list-of-random-ints/metadata.ini new file mode 100644 index 00000000..0d5711cb --- /dev/null +++ b/tests/nextsteps/synthetic/list-of-random-ints/metadata.ini @@ -0,0 +1,3 @@ +[ExpectedBehavior] +# We expect only compilation *warnings*, so we expect a 0 exit code +exitcode = 0 diff --git a/tests/nextsteps/synthetic/list-of-random-ints/script.py b/tests/nextsteps/synthetic/list-of-random-ints/script.py new file mode 100644 index 00000000..fca3cbcd --- /dev/null +++ b/tests/nextsteps/synthetic/list-of-random-ints/script.py @@ -0,0 +1,21 @@ +# -*- coding: utf-8 -*- +# Copyright 2012 David Malcolm +# Copyright 2012 Red Hat, Inc. +# +# This is free software: you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see +# . + +from libcpychecker.constraints import main + +main() diff --git a/tests/nextsteps/synthetic/list-of-random-ints/stderr.txt b/tests/nextsteps/synthetic/list-of-random-ints/stderr.txt new file mode 100644 index 00000000..8b137891 --- /dev/null +++ b/tests/nextsteps/synthetic/list-of-random-ints/stderr.txt @@ -0,0 +1 @@ + diff --git a/tests/nextsteps/synthetic/list-of-random-ints/stdout.txt b/tests/nextsteps/synthetic/list-of-random-ints/stdout.txt new file mode 100644 index 00000000..8b137891 --- /dev/null +++ b/tests/nextsteps/synthetic/list-of-random-ints/stdout.txt @@ -0,0 +1 @@ + diff --git a/tests/plugin/constants/script.py b/tests/plugin/constants/script.py index c5ba145c..5268a53f 100644 --- a/tests/plugin/constants/script.py +++ b/tests/plugin/constants/script.py @@ -28,6 +28,17 @@ def on_finish_unit(): assert isinstance(var.decl.initial, gcc.IntegerCst) print('%s: %s' % (name, hex(var.decl.initial.constant))) assert int(var.decl.initial) == var.decl.initial.constant + assert hash(var.decl.initial) == hash(var.decl.initial.constant) + # Verify that rich comparisons between gcc.IntegerCst and int are sane: + assert var.decl.initial == var.decl.initial.constant + assert var.decl.initial >= var.decl.initial.constant + assert var.decl.initial <= var.decl.initial.constant + assert var.decl.initial != var.decl.initial.constant + 1 + # and the other way around: + assert var.decl.initial.constant == var.decl.initial + assert var.decl.initial.constant >= var.decl.initial + assert var.decl.initial.constant <= var.decl.initial + assert var.decl.initial.constant + 1 != var.decl.initial elif name.startswith('f') or name.startswith('d'): assert isinstance(var.decl.initial, gcc.RealCst) print('%s: %r %s' % (name, diff --git a/tests/plugin/functions/script.py b/tests/plugin/functions/script.py index 1e708b7d..1e91ad6b 100644 --- a/tests/plugin/functions/script.py +++ b/tests/plugin/functions/script.py @@ -38,6 +38,7 @@ def on_pass_execution(p, fn): print('local_decls[%i]' % i) print(' type(local): %r' % type(local)) print(' local.name: %r' % local.name) + print(' local.context: %r' % local.context) # The "initial" only seems to be present for static variables # with initializers. Other variables seem to get initialized # in explicit gimple statements (see below) diff --git a/tests/plugin/functions/stdout.txt b/tests/plugin/functions/stdout.txt index 1732fb26..e893a9af 100644 --- a/tests/plugin/functions/stdout.txt +++ b/tests/plugin/functions/stdout.txt @@ -4,31 +4,37 @@ len(fn.local_decls): 6 local_decls[0] type(local): local.name: 'f' + local.context: gcc.FunctionDecl('foo') local.initial: None str(local.type): 'float' local_decls[1] type(local): local.name: 'g' + local.context: gcc.FunctionDecl('foo') local.initial: None str(local.type): 'float' local_decls[2] type(local): local.name: 'h' + local.context: gcc.FunctionDecl('foo') local.initial: None str(local.type): 'float' local_decls[3] type(local): local.name: 'i' + local.context: gcc.FunctionDecl('foo') local.initial.constant: 42 str(local.type): 'int' local_decls[4] type(local): local.name: 'keywords' + local.context: gcc.FunctionDecl('foo') local.initial: None str(local.type): 'char *[2]' local_decls[5] type(local): local.name: 'd' + local.context: gcc.FunctionDecl('foo') local.initial: None str(local.type): 'const double' fn.funcdef_no: 0 diff --git a/tests/sm/assignments/dereference-on-lhs/input.c b/tests/sm/assignments/dereference-on-lhs/input.c new file mode 100644 index 00000000..8234afad --- /dev/null +++ b/tests/sm/assignments/dereference-on-lhs/input.c @@ -0,0 +1,27 @@ +/* + Copyright 2012 David Malcolm + Copyright 2012 Red Hat, Inc. + + This is free software: you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + . +*/ + +#include + +int *test(int i) +{ + int *foo = (int*)malloc(sizeof(int*)); + *foo = i; /* BUG: result of malloc could be NULL */ + return foo; +} diff --git a/tests/sm/assignments/dereference-on-lhs/script.py b/tests/sm/assignments/dereference-on-lhs/script.py new file mode 100644 index 00000000..5c59776e --- /dev/null +++ b/tests/sm/assignments/dereference-on-lhs/script.py @@ -0,0 +1,37 @@ +# -*- coding: utf-8 -*- +# Copyright 2012, 2013 David Malcolm +# Copyright 2012, 2013 Red Hat, Inc. +# +# This is free software: you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see +# . + +from sm import main +from sm.parser import parse_file + +def selftest(ctxt, solution): + if 0: + import sys + solution.dump(sys.stderr) + + # Verify that the: + # foo = malloc() + # transitions "foo" from "ptr.start" to "ptr.unchecked" + node = ctxt.find_call_of('malloc') + ctxt.assert_statenames_for_varname(node, 'foo', {'ptr.start'}) + + node = ctxt.get_successor(node) + ctxt.assert_statenames_for_varname(node, 'foo', {'ptr.unchecked'}) + +checker = parse_file('sm/checkers/malloc_checker.sm') +main([checker], selftest=selftest) diff --git a/tests/sm/assignments/dereference-on-lhs/stderr.txt b/tests/sm/assignments/dereference-on-lhs/stderr.txt new file mode 100644 index 00000000..10b4b241 --- /dev/null +++ b/tests/sm/assignments/dereference-on-lhs/stderr.txt @@ -0,0 +1,4 @@ +tests/sm/assignments/dereference-on-lhs/input.c: In function 'test': +tests/sm/assignments/dereference-on-lhs/input.c:25:nn: error: dereference of possibly-NULL pointer foo [CWE-690] +tests/sm/assignments/dereference-on-lhs/input.c:24:nn: note: foo assigned to the result of malloc() +tests/sm/assignments/dereference-on-lhs/input.c:25:nn: note: dereference of possibly-NULL pointer foo diff --git a/tests/sm/assignments/dereference-on-lhs/stdout.txt b/tests/sm/assignments/dereference-on-lhs/stdout.txt new file mode 100644 index 00000000..e69de29b diff --git a/tests/sm/assignments/dereference-on-rhs/input.c b/tests/sm/assignments/dereference-on-rhs/input.c new file mode 100644 index 00000000..6a1c472e --- /dev/null +++ b/tests/sm/assignments/dereference-on-rhs/input.c @@ -0,0 +1,28 @@ +/* + Copyright 2012 David Malcolm + Copyright 2012 Red Hat, Inc. + + This is free software: you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + . +*/ + +#include + +int *test(int i) +{ + int *foo = (int*)malloc(sizeof(int*)); + i = *foo; /* BUG: result of malloc could be NULL + (fwiw it's also uninitialized) */ + return foo; +} diff --git a/tests/sm/assignments/dereference-on-rhs/script.py b/tests/sm/assignments/dereference-on-rhs/script.py new file mode 100644 index 00000000..f5439a23 --- /dev/null +++ b/tests/sm/assignments/dereference-on-rhs/script.py @@ -0,0 +1,23 @@ +# -*- coding: utf-8 -*- +# Copyright 2012, 2013 David Malcolm +# Copyright 2012, 2013 Red Hat, Inc. +# +# This is free software: you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see +# . + +from sm import main +from sm.parser import parse_file + +checker = parse_file('sm/checkers/malloc_checker.sm') +main([checker]) diff --git a/tests/sm/assignments/dereference-on-rhs/stderr.txt b/tests/sm/assignments/dereference-on-rhs/stderr.txt new file mode 100644 index 00000000..dd633844 --- /dev/null +++ b/tests/sm/assignments/dereference-on-rhs/stderr.txt @@ -0,0 +1,4 @@ +tests/sm/assignments/dereference-on-rhs/input.c: In function 'test': +tests/sm/assignments/dereference-on-rhs/input.c:25:nn: error: dereference of possibly-NULL pointer foo [CWE-690] +tests/sm/assignments/dereference-on-rhs/input.c:24:nn: note: foo assigned to the result of malloc() +tests/sm/assignments/dereference-on-rhs/input.c:25:nn: note: dereference of possibly-NULL pointer foo diff --git a/tests/sm/assignments/dereference-on-rhs/stdout.txt b/tests/sm/assignments/dereference-on-rhs/stdout.txt new file mode 100644 index 00000000..e69de29b diff --git a/tests/sm/assignments/phi/input.c b/tests/sm/assignments/phi/input.c new file mode 100644 index 00000000..b1c939a3 --- /dev/null +++ b/tests/sm/assignments/phi/input.c @@ -0,0 +1,38 @@ +/* + Copyright 2012 David Malcolm + Copyright 2012 Red Hat, Inc. + + This is free software: you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + . +*/ + +#include + +extern void marker(char *buffer); + +char *test(int flag) +{ + char *buffer; + + if (flag) { + buffer = (char*)malloc(4096); + } else { + buffer = NULL; + } + + marker(buffer); + buffer[0] = 'a'; + + return buffer; +} diff --git a/tests/sm/assignments/phi/script.py b/tests/sm/assignments/phi/script.py new file mode 100644 index 00000000..6de41cab --- /dev/null +++ b/tests/sm/assignments/phi/script.py @@ -0,0 +1,34 @@ +# -*- coding: utf-8 -*- +# Copyright 2012, 2013 David Malcolm +# Copyright 2012, 2013 Red Hat, Inc. +# +# This is free software: you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see +# . + +from sm import main +from sm.parser import parse_file + +def selftest(ctxt, solution): + if 0: + import sys + solution.dump(sys.stderr) + + # Verify that at + # marker(buffer) + # that we have the states from both paths: + node = ctxt.find_call_of('marker') + ctxt.assert_statenames_for_varname(node, 'buffer', {'ptr.unchecked', 'ptr.null'}) + +checker = parse_file('sm/checkers/malloc_checker.sm') +main([checker], selftest=selftest) diff --git a/tests/sm/assignments/phi/stderr.txt b/tests/sm/assignments/phi/stderr.txt new file mode 100644 index 00000000..13d76caf --- /dev/null +++ b/tests/sm/assignments/phi/stderr.txt @@ -0,0 +1,7 @@ +tests/sm/assignments/phi/input.c: In function 'test': +tests/sm/assignments/phi/input.c:35:nn: error: dereference of NULL pointer buffer [CWE-690] +tests/sm/assignments/phi/input.c:31:nn: note: buffer assigned to 0 +tests/sm/assignments/phi/input.c:35:nn: note: dereference of NULL pointer buffer +tests/sm/assignments/phi/input.c:35:nn: error: dereference of possibly-NULL pointer buffer [CWE-690] +tests/sm/assignments/phi/input.c:29:nn: note: buffer assigned to the result of malloc() +tests/sm/assignments/phi/input.c:35:nn: note: dereference of possibly-NULL pointer buffer diff --git a/tests/sm/assignments/phi/stdout.txt b/tests/sm/assignments/phi/stdout.txt new file mode 100644 index 00000000..e69de29b diff --git a/tests/sm/checkers/malloc-checker/arg_must_not_be_null/input.c b/tests/sm/checkers/malloc-checker/arg_must_not_be_null/input.c new file mode 100644 index 00000000..141e55cb --- /dev/null +++ b/tests/sm/checkers/malloc-checker/arg_must_not_be_null/input.c @@ -0,0 +1,30 @@ +/* + Copyright 2013 David Malcolm + Copyright 2013 Red Hat, Inc. + + This is free software: you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + . +*/ + +#include + +extern void foo(void *ptrA, void *ptrB, void *ptrC) + __attribute__((nonnull (1, 3))); + +void test(void *p, void *q, void *r) +{ + p = NULL; + + foo(p, q, r); +} diff --git a/tests/sm/checkers/malloc-checker/arg_must_not_be_null/script.py b/tests/sm/checkers/malloc-checker/arg_must_not_be_null/script.py new file mode 100644 index 00000000..d6390c5f --- /dev/null +++ b/tests/sm/checkers/malloc-checker/arg_must_not_be_null/script.py @@ -0,0 +1,23 @@ +# -*- coding: utf-8 -*- +# Copyright 2012 David Malcolm +# Copyright 2012 Red Hat, Inc. +# +# This is free software: you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see +# . + +from sm import main +from sm.parser import parse_file + +checker = parse_file('sm/checkers/malloc_checker.sm') +main([checker]) diff --git a/tests/sm/checkers/malloc-checker/arg_must_not_be_null/stderr.txt b/tests/sm/checkers/malloc-checker/arg_must_not_be_null/stderr.txt new file mode 100644 index 00000000..cc5466f1 --- /dev/null +++ b/tests/sm/checkers/malloc-checker/arg_must_not_be_null/stderr.txt @@ -0,0 +1,4 @@ +tests/sm/checkers/malloc-checker/arg_must_not_be_null/input.c: In function 'test': +tests/sm/checkers/malloc-checker/arg_must_not_be_null/input.c:29:nn: error: NULL pointer p passed as argument 1 to foo [CWE-690] +tests/sm/checkers/malloc-checker/arg_must_not_be_null/input.c:27:nn: note: p assigned to 0 +tests/sm/checkers/malloc-checker/arg_must_not_be_null/input.c:29:nn: note: NULL pointer p passed as argument 1 to foo diff --git a/tests/sm/checkers/malloc-checker/arg_must_not_be_null/stdout.txt b/tests/sm/checkers/malloc-checker/arg_must_not_be_null/stdout.txt new file mode 100644 index 00000000..e69de29b diff --git a/tests/sm/checkers/malloc-checker/correct_usage/input.c b/tests/sm/checkers/malloc-checker/correct_usage/input.c new file mode 100644 index 00000000..c357663c --- /dev/null +++ b/tests/sm/checkers/malloc-checker/correct_usage/input.c @@ -0,0 +1,31 @@ +/* + Copyright 2012 David Malcolm + Copyright 2012 Red Hat, Inc. + + This is free software: you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + . +*/ + +#include +#include + +int test(void) +{ + void *ptr = malloc(4096); + if (!ptr) + return -1; /* FIXME: with a plain return we have a BB with no gimple, and that breaks my checker */ + memset(ptr, 0, 4096); + free(ptr); + return 0; +} diff --git a/tests/sm/checkers/malloc-checker/correct_usage/script.py b/tests/sm/checkers/malloc-checker/correct_usage/script.py new file mode 100644 index 00000000..d6390c5f --- /dev/null +++ b/tests/sm/checkers/malloc-checker/correct_usage/script.py @@ -0,0 +1,23 @@ +# -*- coding: utf-8 -*- +# Copyright 2012 David Malcolm +# Copyright 2012 Red Hat, Inc. +# +# This is free software: you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see +# . + +from sm import main +from sm.parser import parse_file + +checker = parse_file('sm/checkers/malloc_checker.sm') +main([checker]) diff --git a/tests/sm/checkers/malloc-checker/correct_usage/stdout.txt b/tests/sm/checkers/malloc-checker/correct_usage/stdout.txt new file mode 100644 index 00000000..e69de29b diff --git a/tests/sm/checkers/malloc-checker/double_free/input.c b/tests/sm/checkers/malloc-checker/double_free/input.c new file mode 100644 index 00000000..56385c3e --- /dev/null +++ b/tests/sm/checkers/malloc-checker/double_free/input.c @@ -0,0 +1,29 @@ +/* + Copyright 2012 David Malcolm + Copyright 2012 Red Hat, Inc. + + This is free software: you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + . +*/ + +#include +#include + +void test(void *ptr) +{ + free(ptr); + + /* BUG: double-free: */ + free(ptr); +} diff --git a/tests/sm/checkers/malloc-checker/double_free/script.py b/tests/sm/checkers/malloc-checker/double_free/script.py new file mode 100644 index 00000000..d6390c5f --- /dev/null +++ b/tests/sm/checkers/malloc-checker/double_free/script.py @@ -0,0 +1,23 @@ +# -*- coding: utf-8 -*- +# Copyright 2012 David Malcolm +# Copyright 2012 Red Hat, Inc. +# +# This is free software: you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see +# . + +from sm import main +from sm.parser import parse_file + +checker = parse_file('sm/checkers/malloc_checker.sm') +main([checker]) diff --git a/tests/sm/checkers/malloc-checker/double_free/stderr.txt b/tests/sm/checkers/malloc-checker/double_free/stderr.txt new file mode 100644 index 00000000..6bdd7a24 --- /dev/null +++ b/tests/sm/checkers/malloc-checker/double_free/stderr.txt @@ -0,0 +1,7 @@ +tests/sm/checkers/malloc-checker/double_free/input.c: In function 'test': +tests/sm/checkers/malloc-checker/double_free/input.c:28:nn: error: double-free of ptr [CWE-415] +tests/sm/checkers/malloc-checker/double_free/input.c:25:nn: note: ptr passed to free() +tests/sm/checkers/malloc-checker/double_free/input.c:28:nn: note: double-free of ptr +tests/sm/checkers/malloc-checker/double_free/input.c:28:nn: error: use-after-free of ptr [CWE-416] +tests/sm/checkers/malloc-checker/double_free/input.c:25:nn: note: ptr passed to free() +tests/sm/checkers/malloc-checker/double_free/input.c:28:nn: note: use-after-free of ptr diff --git a/tests/sm/checkers/malloc-checker/double_free/stdout.txt b/tests/sm/checkers/malloc-checker/double_free/stdout.txt new file mode 100644 index 00000000..e69de29b diff --git a/tests/sm/checkers/malloc-checker/fancy_control_flow/input.c b/tests/sm/checkers/malloc-checker/fancy_control_flow/input.c new file mode 100644 index 00000000..01a2387b --- /dev/null +++ b/tests/sm/checkers/malloc-checker/fancy_control_flow/input.c @@ -0,0 +1,42 @@ +/* + Copyright 2012 David Malcolm + Copyright 2012 Red Hat, Inc. + + This is free software: you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + . +*/ + +#include +#include + +void test(int i, int j) +{ + int k; + void *ptr; + for (k = i; k < j; k++) { + switch(k) { + case 0: + ptr = malloc(1024); + break; + case 1: + break; + case 2: + break; + default: + break; + } + } + memset(ptr, 0, 4096); + free(ptr); +} diff --git a/tests/sm/checkers/malloc-checker/fancy_control_flow/script.py b/tests/sm/checkers/malloc-checker/fancy_control_flow/script.py new file mode 100644 index 00000000..d6390c5f --- /dev/null +++ b/tests/sm/checkers/malloc-checker/fancy_control_flow/script.py @@ -0,0 +1,23 @@ +# -*- coding: utf-8 -*- +# Copyright 2012 David Malcolm +# Copyright 2012 Red Hat, Inc. +# +# This is free software: you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see +# . + +from sm import main +from sm.parser import parse_file + +checker = parse_file('sm/checkers/malloc_checker.sm') +main([checker]) diff --git a/tests/sm/checkers/malloc-checker/fancy_control_flow/stderr.txt b/tests/sm/checkers/malloc-checker/fancy_control_flow/stderr.txt new file mode 100644 index 00000000..2744fa46 --- /dev/null +++ b/tests/sm/checkers/malloc-checker/fancy_control_flow/stderr.txt @@ -0,0 +1,4 @@ +tests/sm/checkers/malloc-checker/fancy_control_flow/input.c: In function 'test': +tests/sm/checkers/malloc-checker/fancy_control_flow/input.c:40:nn: error: possibly-NULL pointer ptr passed as argument 1 to memset [CWE-690] +tests/sm/checkers/malloc-checker/fancy_control_flow/input.c:30:nn: note: ptr assigned to the result of malloc() +tests/sm/checkers/malloc-checker/fancy_control_flow/input.c:40:nn: note: possibly-NULL pointer ptr passed as argument 1 to memset diff --git a/tests/sm/checkers/malloc-checker/fancy_control_flow/stdout.txt b/tests/sm/checkers/malloc-checker/fancy_control_flow/stdout.txt new file mode 100644 index 00000000..e69de29b diff --git a/tests/sm/checkers/malloc-checker/multiple-errors/deref-of-null/input.c b/tests/sm/checkers/malloc-checker/multiple-errors/deref-of-null/input.c new file mode 100644 index 00000000..7db1b58f --- /dev/null +++ b/tests/sm/checkers/malloc-checker/multiple-errors/deref-of-null/input.c @@ -0,0 +1,41 @@ +/* + Copyright 2013 David Malcolm + Copyright 2013 Red Hat, Inc. + + This is free software: you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + . +*/ + +#include +#include + +struct foo { + int i; + int j; + int k; +}; + +struct foo *test(void) +{ + struct foo *p = NULL; + + /* BUG: dereference of NULL ptr: */ + p->i = 1; + + /* Only the first such usage should be reported: */ + p->j = 2; + p->k = 3; + + return p; +} diff --git a/tests/sm/checkers/malloc-checker/multiple-errors/deref-of-null/script.py b/tests/sm/checkers/malloc-checker/multiple-errors/deref-of-null/script.py new file mode 100644 index 00000000..71617ba0 --- /dev/null +++ b/tests/sm/checkers/malloc-checker/multiple-errors/deref-of-null/script.py @@ -0,0 +1,46 @@ +# -*- coding: utf-8 -*- +# Copyright 2013 David Malcolm +# Copyright 2013 Red Hat, Inc. +# +# This is free software: you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see +# . + +from sm import main +from sm.parser import parse_file + +def selftest(ctxt, solution): + if 0: + import sys + solution.dump(sys.stderr) + + node_write_to_p = ctxt.get_nodes().assigning_constant(0).one() + ssaname = node_write_to_p.stmt.lhs + + p = ctxt.find_var(node_write_to_p, 'p') + node_write_to_p_i = ctxt.get_nodes().assigning_constant(1).one() + ctxt.assert_statenames_for_expr(node_write_to_p_i, p, + frozenset(['ptr.null'])) + + # It should now have given up, with the p in the "ptr.stop" state: + node_write_to_p_j = ctxt.get_nodes().assigning_constant(2).one() + ctxt.assert_statenames_for_expr(node_write_to_p_j, p, + frozenset(['ptr.stop'])) + + node_write_to_p_k = ctxt.get_nodes().assigning_constant(3).one() + ctxt.assert_statenames_for_expr(node_write_to_p_k, p, + frozenset(['ptr.stop'])) + + +checker = parse_file('sm/checkers/malloc_checker.sm') +main([checker], selftest=selftest) diff --git a/tests/sm/checkers/malloc-checker/multiple-errors/deref-of-null/stderr.txt b/tests/sm/checkers/malloc-checker/multiple-errors/deref-of-null/stderr.txt new file mode 100644 index 00000000..c1d6d40d --- /dev/null +++ b/tests/sm/checkers/malloc-checker/multiple-errors/deref-of-null/stderr.txt @@ -0,0 +1,4 @@ +tests/sm/checkers/malloc-checker/multiple-errors/deref-of-null/input.c: In function 'test': +tests/sm/checkers/malloc-checker/multiple-errors/deref-of-null/input.c:34:nn: error: dereference of NULL pointer p [CWE-690] +tests/sm/checkers/malloc-checker/multiple-errors/deref-of-null/input.c:31:nn: note: p assigned to 0 +tests/sm/checkers/malloc-checker/multiple-errors/deref-of-null/input.c:34:nn: note: dereference of NULL pointer p diff --git a/tests/sm/checkers/malloc-checker/multiple-errors/deref-of-null/stdout.txt b/tests/sm/checkers/malloc-checker/multiple-errors/deref-of-null/stdout.txt new file mode 100644 index 00000000..e69de29b diff --git a/tests/sm/checkers/malloc-checker/multiple-errors/unchecked_malloc/input.c b/tests/sm/checkers/malloc-checker/multiple-errors/unchecked_malloc/input.c new file mode 100644 index 00000000..e1b0e28b --- /dev/null +++ b/tests/sm/checkers/malloc-checker/multiple-errors/unchecked_malloc/input.c @@ -0,0 +1,41 @@ +/* + Copyright 2013 David Malcolm + Copyright 2013 Red Hat, Inc. + + This is free software: you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + . +*/ + +#include +#include + +struct foo { + int i; + int j; + int k; +}; + +struct foo *test(void) +{ + struct foo *p = (struct foo*)malloc(4096); + + /* BUG: usage of p without checking if malloc return NULL: */ + p->i = 1; + + /* Only the first such usage should be reported: */ + p->j = 2; + p->k = 3; + + return p; +} diff --git a/tests/sm/checkers/malloc-checker/multiple-errors/unchecked_malloc/script.py b/tests/sm/checkers/malloc-checker/multiple-errors/unchecked_malloc/script.py new file mode 100644 index 00000000..263594c4 --- /dev/null +++ b/tests/sm/checkers/malloc-checker/multiple-errors/unchecked_malloc/script.py @@ -0,0 +1,46 @@ +# -*- coding: utf-8 -*- +# Copyright 2013 David Malcolm +# Copyright 2013 Red Hat, Inc. +# +# This is free software: you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see +# . + +from sm import main +from sm.parser import parse_file + +def selftest(ctxt, solution): + if 0: + import sys + solution.dump(sys.stderr) + + node_call = ctxt.find_call_of('malloc') + ssaname = node_call.stmt.lhs + + p = ctxt.find_var(node_call, 'p') + node_write_to_p_i = ctxt.get_nodes().assigning_constant(1).one() + ctxt.assert_statenames_for_expr(node_write_to_p_i, p, + frozenset(['ptr.unchecked'])) + + # It should now have given up, with the p in the "ptr.nonnull" state: + node_write_to_p_j = ctxt.get_nodes().assigning_constant(2).one() + ctxt.assert_statenames_for_expr(node_write_to_p_j, p, + frozenset(['ptr.nonnull'])) + + node_write_to_p_k = ctxt.get_nodes().assigning_constant(3).one() + ctxt.assert_statenames_for_expr(node_write_to_p_k, p, + frozenset(['ptr.nonnull'])) + + +checker = parse_file('sm/checkers/malloc_checker.sm') +main([checker], selftest=selftest) diff --git a/tests/sm/checkers/malloc-checker/multiple-errors/unchecked_malloc/stderr.txt b/tests/sm/checkers/malloc-checker/multiple-errors/unchecked_malloc/stderr.txt new file mode 100644 index 00000000..7861bb45 --- /dev/null +++ b/tests/sm/checkers/malloc-checker/multiple-errors/unchecked_malloc/stderr.txt @@ -0,0 +1,4 @@ +tests/sm/checkers/malloc-checker/multiple-errors/unchecked_malloc/input.c: In function 'test': +tests/sm/checkers/malloc-checker/multiple-errors/unchecked_malloc/input.c:34:nn: error: dereference of possibly-NULL pointer p [CWE-690] +tests/sm/checkers/malloc-checker/multiple-errors/unchecked_malloc/input.c:31:nn: note: p assigned to the result of malloc() +tests/sm/checkers/malloc-checker/multiple-errors/unchecked_malloc/input.c:34:nn: note: dereference of possibly-NULL pointer p diff --git a/tests/sm/checkers/malloc-checker/multiple-errors/unchecked_malloc/stdout.txt b/tests/sm/checkers/malloc-checker/multiple-errors/unchecked_malloc/stdout.txt new file mode 100644 index 00000000..e69de29b diff --git a/tests/sm/checkers/malloc-checker/two_ptrs/input.c b/tests/sm/checkers/malloc-checker/two_ptrs/input.c new file mode 100644 index 00000000..28a1cdd9 --- /dev/null +++ b/tests/sm/checkers/malloc-checker/two_ptrs/input.c @@ -0,0 +1,35 @@ +/* + Copyright 2012 David Malcolm + Copyright 2012 Red Hat, Inc. + + This is free software: you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + . +*/ + +#include +#include + +int test(void) +{ + void *p = malloc(4096); + void *q = malloc(4096); + if (p) { + memset(p, 0, 4096); /* Not a bug: checked */ + } else { + memset(q, 0, 4096); /* BUG: not checked */ + } + free(p); + free(q); + return 0; +} diff --git a/tests/sm/checkers/malloc-checker/two_ptrs/script.py b/tests/sm/checkers/malloc-checker/two_ptrs/script.py new file mode 100644 index 00000000..d6390c5f --- /dev/null +++ b/tests/sm/checkers/malloc-checker/two_ptrs/script.py @@ -0,0 +1,23 @@ +# -*- coding: utf-8 -*- +# Copyright 2012 David Malcolm +# Copyright 2012 Red Hat, Inc. +# +# This is free software: you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see +# . + +from sm import main +from sm.parser import parse_file + +checker = parse_file('sm/checkers/malloc_checker.sm') +main([checker]) diff --git a/tests/sm/checkers/malloc-checker/two_ptrs/stderr.txt b/tests/sm/checkers/malloc-checker/two_ptrs/stderr.txt new file mode 100644 index 00000000..7f11a6ac --- /dev/null +++ b/tests/sm/checkers/malloc-checker/two_ptrs/stderr.txt @@ -0,0 +1,4 @@ +tests/sm/checkers/malloc-checker/two_ptrs/input.c: In function 'test': +tests/sm/checkers/malloc-checker/two_ptrs/input.c:30:nn: error: possibly-NULL pointer q passed as argument 1 to memset [CWE-690] +tests/sm/checkers/malloc-checker/two_ptrs/input.c:26:nn: note: q assigned to the result of malloc() +tests/sm/checkers/malloc-checker/two_ptrs/input.c:30:nn: note: possibly-NULL pointer q passed as argument 1 to memset diff --git a/tests/sm/checkers/malloc-checker/two_ptrs/stdout.txt b/tests/sm/checkers/malloc-checker/two_ptrs/stdout.txt new file mode 100644 index 00000000..e69de29b diff --git a/tests/sm/checkers/malloc-checker/unchecked_malloc/input.c b/tests/sm/checkers/malloc-checker/unchecked_malloc/input.c new file mode 100644 index 00000000..358f4c94 --- /dev/null +++ b/tests/sm/checkers/malloc-checker/unchecked_malloc/input.c @@ -0,0 +1,28 @@ +/* + Copyright 2012 David Malcolm + Copyright 2012 Red Hat, Inc. + + This is free software: you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + . +*/ + +#include +#include + +void *test(void) +{ + void *ptr = malloc(4096); + memset(ptr, 0, 4096); + return ptr; +} diff --git a/tests/sm/checkers/malloc-checker/unchecked_malloc/script.py b/tests/sm/checkers/malloc-checker/unchecked_malloc/script.py new file mode 100644 index 00000000..d6390c5f --- /dev/null +++ b/tests/sm/checkers/malloc-checker/unchecked_malloc/script.py @@ -0,0 +1,23 @@ +# -*- coding: utf-8 -*- +# Copyright 2012 David Malcolm +# Copyright 2012 Red Hat, Inc. +# +# This is free software: you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see +# . + +from sm import main +from sm.parser import parse_file + +checker = parse_file('sm/checkers/malloc_checker.sm') +main([checker]) diff --git a/tests/sm/checkers/malloc-checker/unchecked_malloc/stderr.txt b/tests/sm/checkers/malloc-checker/unchecked_malloc/stderr.txt new file mode 100644 index 00000000..c08d6985 --- /dev/null +++ b/tests/sm/checkers/malloc-checker/unchecked_malloc/stderr.txt @@ -0,0 +1,4 @@ +tests/sm/checkers/malloc-checker/unchecked_malloc/input.c: In function 'test': +tests/sm/checkers/malloc-checker/unchecked_malloc/input.c:26:nn: error: possibly-NULL pointer ptr passed as argument 1 to memset [CWE-690] +tests/sm/checkers/malloc-checker/unchecked_malloc/input.c:25:nn: note: ptr assigned to the result of malloc() +tests/sm/checkers/malloc-checker/unchecked_malloc/input.c:26:nn: note: possibly-NULL pointer ptr passed as argument 1 to memset diff --git a/tests/sm/checkers/malloc-checker/unchecked_malloc/stdout.txt b/tests/sm/checkers/malloc-checker/unchecked_malloc/stdout.txt new file mode 100644 index 00000000..e69de29b diff --git a/tests/sm/checkers/malloc-checker/use_after_free/input.c b/tests/sm/checkers/malloc-checker/use_after_free/input.c new file mode 100644 index 00000000..ca015963 --- /dev/null +++ b/tests/sm/checkers/malloc-checker/use_after_free/input.c @@ -0,0 +1,34 @@ +/* + Copyright 2012 David Malcolm + Copyright 2012 Red Hat, Inc. + + This is free software: you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + . +*/ + +#include +#include + +void foo(void *ptr); + +void test(int i) +{ + void *p; + + p = malloc(1024); + if (p) { + free(p); + foo(p); + } +} diff --git a/tests/sm/checkers/malloc-checker/use_after_free/script.py b/tests/sm/checkers/malloc-checker/use_after_free/script.py new file mode 100644 index 00000000..d6390c5f --- /dev/null +++ b/tests/sm/checkers/malloc-checker/use_after_free/script.py @@ -0,0 +1,23 @@ +# -*- coding: utf-8 -*- +# Copyright 2012 David Malcolm +# Copyright 2012 Red Hat, Inc. +# +# This is free software: you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see +# . + +from sm import main +from sm.parser import parse_file + +checker = parse_file('sm/checkers/malloc_checker.sm') +main([checker]) diff --git a/tests/sm/checkers/malloc-checker/use_after_free/stderr.txt b/tests/sm/checkers/malloc-checker/use_after_free/stderr.txt new file mode 100644 index 00000000..f0ce7d1e --- /dev/null +++ b/tests/sm/checkers/malloc-checker/use_after_free/stderr.txt @@ -0,0 +1,4 @@ +tests/sm/checkers/malloc-checker/use_after_free/input.c: In function 'test': +tests/sm/checkers/malloc-checker/use_after_free/input.c:32:nn: error: use-after-free of p [CWE-416] +tests/sm/checkers/malloc-checker/use_after_free/input.c:31:nn: note: p passed to free() +tests/sm/checkers/malloc-checker/use_after_free/input.c:32:nn: note: use-after-free of p diff --git a/tests/sm/checkers/malloc-checker/use_after_free/stdout.txt b/tests/sm/checkers/malloc-checker/use_after_free/stdout.txt new file mode 100644 index 00000000..e69de29b diff --git a/tests/sm/checkers/points_to_stack/return_of_ptr_to_local/input.c b/tests/sm/checkers/points_to_stack/return_of_ptr_to_local/input.c new file mode 100644 index 00000000..730a400b --- /dev/null +++ b/tests/sm/checkers/points_to_stack/return_of_ptr_to_local/input.c @@ -0,0 +1,69 @@ +/* + Copyright 2013 David Malcolm + Copyright 2013 Red Hat, Inc. + + This is free software: you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + . +*/ + +#include + +char *test(const char *name) +{ + char buffer[1024]; + char *result = NULL; + + snprintf(buffer, 1024, "hello %s", name); + + /* Note that if we simply + "return buffer;" + gcc is smart enough by itself to issue + "warning: function returns address of local variable [enabled by default]" + from c-typeck.c:c_finish_return as it builds the return statement. + + However, if we introduce some dataflow, this warning isn't issued by + that code - but our points_to_stack.sm script can it: + */ + result = buffer; + + /* BUG: returns pointer to variable on the stack: */ + return result; +} + +char *test2(const char *name) +{ + static char buffer[1024]; + char *result = NULL; + + snprintf(buffer, 1024, "hello %s", name); + + result = buffer; + + /* Not a bug: buffer is static */ + return result; +} + +char other_buffer[1024]; + +char *test3(const char *name) +{ + char *result = NULL; + + snprintf(other_buffer, 1024, "hello %s", name); + + result = other_buffer; + + /* Not a bug: other_buffer is global */ + return result; +} diff --git a/tests/sm/checkers/points_to_stack/return_of_ptr_to_local/script.py b/tests/sm/checkers/points_to_stack/return_of_ptr_to_local/script.py new file mode 100644 index 00000000..60feca72 --- /dev/null +++ b/tests/sm/checkers/points_to_stack/return_of_ptr_to_local/script.py @@ -0,0 +1,30 @@ +# -*- coding: utf-8 -*- +# Copyright 2012, 2013 David Malcolm +# Copyright 2012, 2013 Red Hat, Inc. +# +# This is free software: you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see +# . + +import gcc + +from sm import main +from sm.parser import parse_file + +def selftest(ctxt, solution): + if 0: + import sys + solution.dump(sys.stderr) + +checker = parse_file('sm/checkers/points_to_stack.sm') +main([checker], selftest=selftest) diff --git a/tests/sm/checkers/points_to_stack/return_of_ptr_to_local/stderr.txt b/tests/sm/checkers/points_to_stack/return_of_ptr_to_local/stderr.txt new file mode 100644 index 00000000..55b4ce00 --- /dev/null +++ b/tests/sm/checkers/points_to_stack/return_of_ptr_to_local/stderr.txt @@ -0,0 +1,5 @@ +tests/sm/checkers/points_to_stack/return_of_ptr_to_local/input.c: In function 'test': +tests/sm/checkers/points_to_stack/return_of_ptr_to_local/input.c:41:nn: error: return of "result" returns address within stack of local variable "buffer" [CWE-562] +tests/sm/checkers/points_to_stack/return_of_ptr_to_local/input.c:38:nn: note: assignment of result to &buffer +tests/sm/checkers/points_to_stack/return_of_ptr_to_local/input.c:41:nn: note: state of result ("State('ptr.points_to_stack', 'var_pointed_to'=gcc.VarDecl('buffer'))") propagated to result +tests/sm/checkers/points_to_stack/return_of_ptr_to_local/input.c:41:nn: note: return of "result" returns address within stack of local variable "buffer" diff --git a/tests/sm/checkers/points_to_stack/return_of_ptr_to_local/stdout.txt b/tests/sm/checkers/points_to_stack/return_of_ptr_to_local/stdout.txt new file mode 100644 index 00000000..e69de29b diff --git a/tests/sm/checkers/sizeof_allocation/calloc_too_small/input.c b/tests/sm/checkers/sizeof_allocation/calloc_too_small/input.c new file mode 100644 index 00000000..e5053c50 --- /dev/null +++ b/tests/sm/checkers/sizeof_allocation/calloc_too_small/input.c @@ -0,0 +1,34 @@ +/* + Copyright 2012 David Malcolm + Copyright 2012 Red Hat, Inc. + + This is free software: you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + . +*/ + +#include + +struct foo { + char buffer[1024]; +}; + +struct bar { + char buffer[128]; +}; + +struct foo *test(void) +{ + return (struct foo*)calloc(4, sizeof(struct bar)); + /* BUG: sizeof(bar), rather than sizeof(foo), hence not enough space */ +} diff --git a/tests/sm/checkers/sizeof_allocation/calloc_too_small/script.py b/tests/sm/checkers/sizeof_allocation/calloc_too_small/script.py new file mode 100644 index 00000000..cd01c165 --- /dev/null +++ b/tests/sm/checkers/sizeof_allocation/calloc_too_small/script.py @@ -0,0 +1,23 @@ +# -*- coding: utf-8 -*- +# Copyright 2012 David Malcolm +# Copyright 2012 Red Hat, Inc. +# +# This is free software: you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see +# . + +from sm import main +from sm.parser import parse_file + +checker = parse_file('sm/checkers/sizeof_allocation.sm') +main([checker]) diff --git a/tests/sm/checkers/sizeof_allocation/calloc_too_small/stderr.txt b/tests/sm/checkers/sizeof_allocation/calloc_too_small/stderr.txt new file mode 100644 index 00000000..f62e50ea --- /dev/null +++ b/tests/sm/checkers/sizeof_allocation/calloc_too_small/stderr.txt @@ -0,0 +1,2 @@ +tests/sm/checkers/sizeof_allocation/calloc_too_small/input.c: In function 'test': +tests/sm/checkers/sizeof_allocation/calloc_too_small/input.c:32:nn: error: allocation too small: pointer to struct foo (1024 bytes) initialized with allocation of 512 bytes [CWE-131] diff --git a/tests/sm/checkers/sizeof_allocation/calloc_too_small/stdout.txt b/tests/sm/checkers/sizeof_allocation/calloc_too_small/stdout.txt new file mode 100644 index 00000000..e69de29b diff --git a/tests/sm/checkers/sizeof_allocation/malloc_correct_size/input.c b/tests/sm/checkers/sizeof_allocation/malloc_correct_size/input.c new file mode 100644 index 00000000..e0a994ec --- /dev/null +++ b/tests/sm/checkers/sizeof_allocation/malloc_correct_size/input.c @@ -0,0 +1,31 @@ +/* + Copyright 2012 David Malcolm + Copyright 2012 Red Hat, Inc. + + This is free software: you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + . +*/ + +#include + +struct foo { + char buffer[256]; + int i, j, k; + float x, y, z; +}; + +struct foo *test(void) +{ + return (struct foo*)malloc(sizeof(struct foo)); +} diff --git a/tests/sm/checkers/sizeof_allocation/malloc_correct_size/script.py b/tests/sm/checkers/sizeof_allocation/malloc_correct_size/script.py new file mode 100644 index 00000000..cd01c165 --- /dev/null +++ b/tests/sm/checkers/sizeof_allocation/malloc_correct_size/script.py @@ -0,0 +1,23 @@ +# -*- coding: utf-8 -*- +# Copyright 2012 David Malcolm +# Copyright 2012 Red Hat, Inc. +# +# This is free software: you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see +# . + +from sm import main +from sm.parser import parse_file + +checker = parse_file('sm/checkers/sizeof_allocation.sm') +main([checker]) diff --git a/tests/sm/checkers/sizeof_allocation/malloc_correct_size/stdout.txt b/tests/sm/checkers/sizeof_allocation/malloc_correct_size/stdout.txt new file mode 100644 index 00000000..e69de29b diff --git a/tests/sm/checkers/sizeof_allocation/malloc_too_large/input.c b/tests/sm/checkers/sizeof_allocation/malloc_too_large/input.c new file mode 100644 index 00000000..71b619e3 --- /dev/null +++ b/tests/sm/checkers/sizeof_allocation/malloc_too_large/input.c @@ -0,0 +1,36 @@ +/* + Copyright 2012 David Malcolm + Copyright 2012 Red Hat, Inc. + + This is free software: you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + . +*/ + +#include + +struct foo { + char buffer[128]; +}; + +struct bar { + char buffer[256]; +}; + +struct foo *test(void) +{ + return (struct foo*)malloc(sizeof(struct bar)); + /* not a reportable bug: although the sizeof() is wrong, the size is large + enough, and some code does runtime size calculation that we don't want to + issue a false positive over */ +} diff --git a/tests/sm/checkers/sizeof_allocation/malloc_too_large/script.py b/tests/sm/checkers/sizeof_allocation/malloc_too_large/script.py new file mode 100644 index 00000000..cd01c165 --- /dev/null +++ b/tests/sm/checkers/sizeof_allocation/malloc_too_large/script.py @@ -0,0 +1,23 @@ +# -*- coding: utf-8 -*- +# Copyright 2012 David Malcolm +# Copyright 2012 Red Hat, Inc. +# +# This is free software: you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see +# . + +from sm import main +from sm.parser import parse_file + +checker = parse_file('sm/checkers/sizeof_allocation.sm') +main([checker]) diff --git a/tests/sm/checkers/sizeof_allocation/malloc_too_large/stdout.txt b/tests/sm/checkers/sizeof_allocation/malloc_too_large/stdout.txt new file mode 100644 index 00000000..e69de29b diff --git a/tests/sm/checkers/sizeof_allocation/malloc_too_small/input.c b/tests/sm/checkers/sizeof_allocation/malloc_too_small/input.c new file mode 100644 index 00000000..948a2de4 --- /dev/null +++ b/tests/sm/checkers/sizeof_allocation/malloc_too_small/input.c @@ -0,0 +1,34 @@ +/* + Copyright 2012 David Malcolm + Copyright 2012 Red Hat, Inc. + + This is free software: you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + . +*/ + +#include + +struct foo { + char buffer[256]; +}; + +struct bar { + char buffer[128]; +}; + +struct foo *test(void) +{ + return (struct foo*)malloc(sizeof(struct bar)); + /* BUG: sizeof(bar), rather than sizeof(foo), hence not enough space */ +} diff --git a/tests/sm/checkers/sizeof_allocation/malloc_too_small/script.py b/tests/sm/checkers/sizeof_allocation/malloc_too_small/script.py new file mode 100644 index 00000000..cd01c165 --- /dev/null +++ b/tests/sm/checkers/sizeof_allocation/malloc_too_small/script.py @@ -0,0 +1,23 @@ +# -*- coding: utf-8 -*- +# Copyright 2012 David Malcolm +# Copyright 2012 Red Hat, Inc. +# +# This is free software: you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see +# . + +from sm import main +from sm.parser import parse_file + +checker = parse_file('sm/checkers/sizeof_allocation.sm') +main([checker]) diff --git a/tests/sm/checkers/sizeof_allocation/malloc_too_small/stderr.txt b/tests/sm/checkers/sizeof_allocation/malloc_too_small/stderr.txt new file mode 100644 index 00000000..fcb1fd0b --- /dev/null +++ b/tests/sm/checkers/sizeof_allocation/malloc_too_small/stderr.txt @@ -0,0 +1,2 @@ +tests/sm/checkers/sizeof_allocation/malloc_too_small/input.c: In function 'test': +tests/sm/checkers/sizeof_allocation/malloc_too_small/input.c:32:nn: error: allocation too small: pointer to struct foo (256 bytes) initialized with allocation of 128 bytes [CWE-131] diff --git a/tests/sm/checkers/sizeof_allocation/malloc_too_small/stdout.txt b/tests/sm/checkers/sizeof_allocation/malloc_too_small/stdout.txt new file mode 100644 index 00000000..e69de29b diff --git a/tests/sm/checkers/sizeof_allocation/stateful_malloc_too_small/input.c b/tests/sm/checkers/sizeof_allocation/stateful_malloc_too_small/input.c new file mode 100644 index 00000000..a3b3f9ad --- /dev/null +++ b/tests/sm/checkers/sizeof_allocation/stateful_malloc_too_small/input.c @@ -0,0 +1,36 @@ +/* + Copyright 2012 David Malcolm + Copyright 2012 Red Hat, Inc. + + This is free software: you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + . +*/ + +#include + +struct foo { + char buffer[256]; +}; + +struct bar { + char buffer[128]; +}; + +struct foo *test(void) +{ + void *ptr; + ptr = malloc(sizeof(struct bar)); + /* BUG: sizeof(bar), rather than sizeof(foo), hence not enough space */ + return ptr; +} diff --git a/tests/sm/checkers/sizeof_allocation/stateful_malloc_too_small/script.py b/tests/sm/checkers/sizeof_allocation/stateful_malloc_too_small/script.py new file mode 100644 index 00000000..4f333a08 --- /dev/null +++ b/tests/sm/checkers/sizeof_allocation/stateful_malloc_too_small/script.py @@ -0,0 +1,40 @@ +# -*- coding: utf-8 -*- +# Copyright 2012 David Malcolm +# Copyright 2012 Red Hat, Inc. +# +# This is free software: you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see +# . + +from sm import main +from sm.parser import parse_file +from sm.solver import State + +def selftest(ctxt, solution): + if 0: + import sys + solution.dump(sys.stderr) + + # Verify that the set_state() due to the pattern match for: + # ptr = malloc(sizeof(struct bar)); + # transitions the state of ptr from "ptr.start" to a "ptr.sized" instance + # with the correct size + node = ctxt.find_call_of('malloc') + ctxt.assert_statenames_for_varname(node, 'ptr', {'ptr.start'}) + + node = ctxt.get_successor(node) + ctxt.assert_states_for_varname(node, 'ptr', + {State('ptr.sized', size=128)}) + +checker = parse_file('sm/checkers/sizeof_allocation.sm') +main([checker], selftest=selftest) diff --git a/tests/sm/checkers/sizeof_allocation/stateful_malloc_too_small/stderr.txt b/tests/sm/checkers/sizeof_allocation/stateful_malloc_too_small/stderr.txt new file mode 100644 index 00000000..59aeba18 --- /dev/null +++ b/tests/sm/checkers/sizeof_allocation/stateful_malloc_too_small/stderr.txt @@ -0,0 +1,4 @@ +tests/sm/checkers/sizeof_allocation/stateful_malloc_too_small/input.c: In function 'test': +tests/sm/checkers/sizeof_allocation/stateful_malloc_too_small/input.c:35:nn: error: allocation too small: pointer to struct foo (256 bytes) initialized with allocation of 128 bytes [CWE-131] +tests/sm/checkers/sizeof_allocation/stateful_malloc_too_small/input.c:33:nn: note: ptr assigned to the result of malloc(128) +tests/sm/checkers/sizeof_allocation/stateful_malloc_too_small/input.c:35:nn: note: allocation too small: pointer to struct foo (256 bytes) initialized with allocation of 128 bytes diff --git a/tests/sm/checkers/sizeof_allocation/stateful_malloc_too_small/stdout.txt b/tests/sm/checkers/sizeof_allocation/stateful_malloc_too_small/stdout.txt new file mode 100644 index 00000000..e69de29b diff --git a/tests/sm/checkers/sizeof_allocation/unknown_size/input.c b/tests/sm/checkers/sizeof_allocation/unknown_size/input.c new file mode 100644 index 00000000..ffaab541 --- /dev/null +++ b/tests/sm/checkers/sizeof_allocation/unknown_size/input.c @@ -0,0 +1,32 @@ +/* + Copyright 2012 David Malcolm + Copyright 2012 Red Hat, Inc. + + This is free software: you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + . +*/ + +#include + +struct foo { + char buffer[256]; +}; + +struct foo *test(int i) +{ + void *ptr; + ptr = malloc(i); + /* might or might not be a bug; we don't know, so don't issue a bug */ + return ptr; +} diff --git a/tests/sm/checkers/sizeof_allocation/unknown_size/script.py b/tests/sm/checkers/sizeof_allocation/unknown_size/script.py new file mode 100644 index 00000000..02f31463 --- /dev/null +++ b/tests/sm/checkers/sizeof_allocation/unknown_size/script.py @@ -0,0 +1,38 @@ +# -*- coding: utf-8 -*- +# Copyright 2012 David Malcolm +# Copyright 2012 Red Hat, Inc. +# +# This is free software: you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see +# . + +from sm import main +from sm.parser import parse_file +from sm.solver import State + +def selftest(ctxt, solution): + if 0: + import sys + solution.dump(sys.stderr) + + # Verify that ptr doesn't change state due to + # ptr = malloc(i); + # since the checker only tracks constant integer values: + node = ctxt.find_call_of('malloc') + ctxt.assert_statenames_for_varname(node, 'ptr', {'ptr.start'}) + + node = ctxt.get_successor(node) + ctxt.assert_statenames_for_varname(node, 'ptr', {'ptr.start'}) + +checker = parse_file('sm/checkers/sizeof_allocation.sm') +main([checker], selftest=selftest) diff --git a/tests/sm/checkers/sizeof_allocation/unknown_size/stdout.txt b/tests/sm/checkers/sizeof_allocation/unknown_size/stdout.txt new file mode 100644 index 00000000..e69de29b diff --git a/tests/sm/checkers/taint/example/input.c b/tests/sm/checkers/taint/example/input.c new file mode 100644 index 00000000..ef9acb58 --- /dev/null +++ b/tests/sm/checkers/taint/example/input.c @@ -0,0 +1,131 @@ +/* + Copyright 2012 David Malcolm + Copyright 2012 Red Hat, Inc. + + This is free software: you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + . +*/ + +#include +#include +#include + +struct foo +{ + signed int i; + char buf[256]; +}; + +char test(FILE *f) +{ + struct foo tmp; + + if (1 == fread(&tmp, sizeof(tmp), 1, f)) { + /* BUG: the following array lookup trusts that the input data's index is + in the range 0 <= i < 256; otherwise it's accessing the stack */ + return tmp.buf[tmp.i]; + } + return 0; +} + +char test2(struct foo *f, int i) +{ + /* not a bug: the data is not known to be tainted: */ + return f->buf[f->i]; +} + +char test3(FILE *f) +{ + struct foo tmp; + + if (1 == fread(&tmp, sizeof(tmp), 1, f)) { + if (tmp.i >= 0 && tmp.i < 256) { + /* not a bug: the access is guarded by upper and lower bounds: */ + return tmp.buf[tmp.i]; + } + } + return 0; +} + +char test4(FILE *f) +{ + struct foo tmp; + + if (1 == fread(&tmp, sizeof(tmp), 1, f)) { + if (tmp.i >= 0) { + /* BUG: has a lower bound, but not an upper bound: */ + return tmp.buf[tmp.i]; + } + } + return 0; +} + +char test5(FILE *f) +{ + struct foo tmp; + + if (1 == fread(&tmp, sizeof(tmp), 1, f)) { + if (tmp.i < 256) { + /* BUG: has an upper bound, but not a lower bound: */ + return tmp.buf[tmp.i]; + } + } + return 0; +} + +/* unsigned types have a natural lower bound of 0 */ +struct bar +{ + unsigned int i; + char buf[256]; +}; + +char test6(FILE *f) +{ + struct bar tmp; + + if (1 == fread(&tmp, sizeof(tmp), 1, f)) { + if (tmp.i >= 0) { + /* BUG: has an implicit lower bound, but not an upper bound: */ + return tmp.buf[tmp.i]; + } + } + return 0; +} + +char test7(FILE *f) +{ + struct bar tmp; + + if (1 == fread(&tmp, sizeof(tmp), 1, f)) { + if (tmp.i < 256) { + /* not a bug: has an upper bound, and an implicit lower bound: */ + return tmp.buf[tmp.i]; + } + } + return 0; +} + +char test8(FILE *f) +{ + struct foo tmp; + + if (1 == fread(&tmp, sizeof(tmp), 1, f)) { + if (tmp.i == 42) { + /* not a bug: tmp.i compared against a specific value: */ + return tmp.buf[tmp.i]; + } + } + return 0; +} diff --git a/tests/sm/checkers/taint/example/script.py b/tests/sm/checkers/taint/example/script.py new file mode 100644 index 00000000..92875341 --- /dev/null +++ b/tests/sm/checkers/taint/example/script.py @@ -0,0 +1,71 @@ +# -*- coding: utf-8 -*- +# Copyright 2012 David Malcolm +# Copyright 2012 Red Hat, Inc. +# +# This is free software: you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see +# . + +import gcc + +from sm import main +from sm.parser import parse_file + +def selftest(ctxt, solution): + if 0: + import sys + solution.dump(sys.stderr) + + # Verify the various state transitions within "test3" + + # Verify that the: + # D.2837_3 = fread (&tmp, 260, 1, f_2(D)); + # transitions "tmp" from "x.start" to "x.tainted" + node = ctxt.find_call_of('fread', within='test3') + ctxt.assert_statenames_for_varname(node, 'tmp', {'x.start'}) + + node = ctxt.get_successor(node) + ctxt.assert_statenames_for_varname(node, 'tmp', {'x.tainted'}) + + # Verify that the: + # if (D.n >= 0) + # transitions "D.n" from "x.tainted" to "x.has_lb" + node = ctxt.find_comparison_against(gcc.GeExpr, 0, within='test3') + tempvar = node.stmt.lhs + ctxt.assert_statenames_for_expr(node, tempvar, {'x.tainted'}) + + node = ctxt.get_true_successor(node) + ctxt.assert_statenames_for_expr(node, tempvar, {'x.has_lb'}) + + # Verify that the: + # if (D.n <= 255) + # transitions "D.n" from "x.has_lb" to "x.ok" + node = ctxt.find_comparison_against(gcc.LeExpr, 255, within='test3') + tempvar = node.stmt.lhs + ctxt.assert_statenames_for_expr(node, tempvar, {'x.has_lb'}) + + node = ctxt.get_true_successor(node) + ctxt.assert_statenames_for_expr(node, tempvar, {'x.ok'}) + + # Verify that it's within the same equivcls as "tmp.i" + tmp_i = ctxt.get_expr_by_str(node, 'tmp.i') + ctxt.assert_statenames_for_expr(node, tmp_i, {'x.ok'}) + +checker = parse_file('sm/checkers/taint.sm') +#print(checker) +dot = checker.to_dot('test_script') +#print(dot) +if 0: + from gccutils import invoke_dot + invoke_dot(dot) +main([checker], selftest=selftest) diff --git a/tests/sm/checkers/taint/example/stderr.txt b/tests/sm/checkers/taint/example/stderr.txt new file mode 100644 index 00000000..f0976f08 --- /dev/null +++ b/tests/sm/checkers/taint/example/stderr.txt @@ -0,0 +1,22 @@ +tests/sm/checkers/taint/example/input.c: In function 'test': +tests/sm/checkers/taint/example/input.c:37:nn: error: use of tainted value tmp.i in array lookup without bounds checking [CWE-129] +tests/sm/checkers/taint/example/input.c:34:nn: note: tmp passed to fread() +tests/sm/checkers/taint/example/input.c:37:nn: note: state of tmp ("x.tainted") propagated to tmp.i +tests/sm/checkers/taint/example/input.c:37:nn: note: use of tainted value tmp.i in array lookup without bounds checking +tests/sm/checkers/taint/example/input.c: In function 'test4': +tests/sm/checkers/taint/example/input.c:68:nn: error: use of tainted value tmp.i in array lookup without upper-bound check [CWE-129] +tests/sm/checkers/taint/example/input.c:65:nn: note: tmp passed to fread() +tests/sm/checkers/taint/example/input.c:66:nn: note: state of tmp ("x.tainted") propagated to tmp.i +tests/sm/checkers/taint/example/input.c:66:nn: note: tmp.i compared against 0 +tests/sm/checkers/taint/example/input.c:68:nn: note: use of tainted value tmp.i in array lookup without upper-bound check +tests/sm/checkers/taint/example/input.c: In function 'test5': +tests/sm/checkers/taint/example/input.c:81:nn: error: use of tainted value tmp.i in array lookup without lower-bound check [CWE-129] +tests/sm/checkers/taint/example/input.c:78:nn: note: tmp passed to fread() +tests/sm/checkers/taint/example/input.c:79:nn: note: state of tmp ("x.tainted") propagated to tmp.i +tests/sm/checkers/taint/example/input.c:79:nn: note: tmp.i compared against 255 +tests/sm/checkers/taint/example/input.c:81:nn: note: use of tainted value tmp.i in array lookup without lower-bound check +tests/sm/checkers/taint/example/input.c: In function 'test6': +tests/sm/checkers/taint/example/input.c:101:nn: error: use of tainted value tmp.i in array lookup without bounds checking [CWE-129] +tests/sm/checkers/taint/example/input.c:98:nn: note: tmp passed to fread() +tests/sm/checkers/taint/example/input.c:101:nn: note: state of tmp ("x.tainted") propagated to tmp.i +tests/sm/checkers/taint/example/input.c:101:nn: note: use of tainted value tmp.i in array lookup without bounds checking diff --git a/tests/sm/checkers/taint/example/stdout.txt b/tests/sm/checkers/taint/example/stdout.txt new file mode 100644 index 00000000..e69de29b diff --git a/tests/sm/dot/input.c b/tests/sm/dot/input.c new file mode 100644 index 00000000..8b137891 --- /dev/null +++ b/tests/sm/dot/input.c @@ -0,0 +1 @@ + diff --git a/tests/sm/dot/script.py b/tests/sm/dot/script.py new file mode 100644 index 00000000..450cdd64 --- /dev/null +++ b/tests/sm/dot/script.py @@ -0,0 +1,55 @@ +# -*- coding: utf-8 -*- +# Copyright 2012 David Malcolm +# Copyright 2012 Red Hat, Inc. +# +# This is free software: you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see +# . + +SCRIPT = ''' +sm malloc_checker { + state decl any_pointer ptr; + + ptr.all: + { ptr = malloc() } => ptr.unknown; + + ptr.unknown, ptr.null, ptr.nonnull: + { ptr == 0 } => true=ptr.null, false=ptr.nonnull + | { ptr != 0 } => true=ptr.nonnull, false=ptr.null + ; + + ptr.unknown: + { *ptr } => { error('use of possibly-NULL pointer %s' % ptr)}; + + ptr.null: + { *ptr } => { error('use of NULL pointer %s' % ptr)}; + + ptr.all, ptr.unknown, ptr.null, ptr.nonnull: + { free(ptr) } => ptr.free; + + ptr.free: + { free(ptr) } => { error('double-free of %s' % ptr)} + | { ptr } => {error('use-after-free of %s' % ptr)} + ; +} +''' + +from sm.parser import parse_file +ch = parse_file('sm/checkers/malloc_checker.sm') +if 0: + print(ch) +dot = ch.to_dot('test_script') +print(dot) +if 0: + from gccutils import invoke_dot + invoke_dot(dot) diff --git a/tests/sm/dot/stdout.txt b/tests/sm/dot/stdout.txt new file mode 100644 index 00000000..15abf238 --- /dev/null +++ b/tests/sm/dot/stdout.txt @@ -0,0 +1,83 @@ +digraph test_script { + subgraph malloc_checker { + ptr_unchecked [label=]; + ptr_null [label=]; + ptr_start [label=]; + ptr_nonnull [label=]; + ptr_free [label=]; + + ptr_unchecked -> ptr_unchecked [label=<{ ptr = malloc() }>]; + ptr_null -> ptr_unchecked [label=<{ ptr = malloc() }>]; + ptr_start -> ptr_unchecked [label=<{ ptr = malloc() }>]; + ptr_nonnull -> ptr_unchecked [label=<{ ptr = malloc() }>]; + ptr_free -> ptr_unchecked [label=<{ ptr = malloc() }>]; + ptr_unchecked -> ptr_null [label=<{ ptr = 0 }>]; + ptr_null -> ptr_null [label=<{ ptr = 0 }>]; + ptr_start -> ptr_null [label=<{ ptr = 0 }>]; + ptr_nonnull -> ptr_null [label=<{ ptr = 0 }>]; + ptr_free -> ptr_null [label=<{ ptr = 0 }>]; + ptr_unchecked -> ptr_null [label=<{ ptr == 0 } is True>]; + ptr_unchecked -> ptr_nonnull [label=<{ ptr == 0 } is False>]; + ptr_unchecked -> ptr_nonnull [label=<{ ptr != 0 } is True>]; + ptr_unchecked -> ptr_null [label=<{ ptr != 0 } is False>]; + ptr_null -> ptr_null [label=<{ ptr == 0 } is True>]; + ptr_null -> ptr_nonnull [label=<{ ptr == 0 } is False>]; + ptr_null -> ptr_nonnull [label=<{ ptr != 0 } is True>]; + ptr_null -> ptr_null [label=<{ ptr != 0 } is False>]; + ptr_start -> ptr_null [label=<{ ptr == 0 } is True>]; + ptr_start -> ptr_nonnull [label=<{ ptr == 0 } is False>]; + ptr_start -> ptr_nonnull [label=<{ ptr != 0 } is True>]; + ptr_start -> ptr_null [label=<{ ptr != 0 } is False>]; + ptr_nonnull -> ptr_null [label=<{ ptr == 0 } is True>]; + ptr_nonnull -> ptr_nonnull [label=<{ ptr == 0 } is False>]; + ptr_nonnull -> ptr_nonnull [label=<{ ptr != 0 } is True>]; + ptr_nonnull -> ptr_null [label=<{ ptr != 0 } is False>]; + ptr_free -> ptr_null [label=<{ ptr == 0 } is True>]; + ptr_free -> ptr_nonnull [label=<{ ptr == 0 } is False>]; + ptr_free -> ptr_nonnull [label=<{ ptr != 0 } is True>]; + ptr_free -> ptr_null [label=<{ ptr != 0 } is False>]; + ptr_unchecked -> ptr_unchecked [label=<{ *ptr }: + error('dereference of possibly-NULL pointer %s' % ptr, + # "CWE-690: Unchecked Return Value to NULL Pointer Dereference" + cwe='CWE-690') + >]; + ptr_unchecked -> ptr_nonnull [label=<{ *ptr }>]; + ptr_null -> ptr_null [label=<{ *ptr }: + error('dereference of NULL pointer %s' % ptr, + # "CWE-690: Unchecked Return Value to NULL Pointer Dereference" + cwe='CWE-690') + >]; + ptr_null -> ptr_stop [label=<{ *ptr }>]; + ptr_start -> ptr_free [label=<{ free(ptr) } >]; + ptr_unchecked -> ptr_free [label=<{ free(ptr) } >]; + ptr_null -> ptr_free [label=<{ free(ptr) } >]; + ptr_nonnull -> ptr_free [label=<{ free(ptr) } >]; + ptr_free -> ptr_free [label=<{ free(ptr) } : + error('double-free of %s' % ptr, + # "CWE-415: Double Free" + cwe='CWE-415') + >]; + ptr_free -> ptr_stop [label=<{ free(ptr) } >]; + ptr_free -> ptr_free [label=<{ ptr }: + error('use-after-free of %s' % ptr, + # "CWE-416: Use After Free" + cwe='CWE-416') + >]; + ptr_free -> ptr_stop [label=<{ ptr }>]; + ptr_unchecked -> ptr_unchecked [label=<$arg_must_not_be_null$: + error('possibly-NULL pointer %s passed as argument %i to %s' + % (ptr, argnumber, function), + # "CWE-690: Unchecked Return Value to NULL Pointer Dereference" + cwe='CWE-690') + >]; + ptr_unchecked -> ptr_nonnull [label=<$arg_must_not_be_null$>]; + ptr_null -> ptr_null [label=<$arg_must_not_be_null$: + error('NULL pointer %s passed as argument %i to %s' + % (ptr, argnumber, function), + # "CWE-690: Unchecked Return Value to NULL Pointer Dereference" + cwe='CWE-690') + >]; + ptr_null -> ptr_stop [label=<$arg_must_not_be_null$>]; + } +} + diff --git a/tests/sm/facts/infinite-recursion/input.c b/tests/sm/facts/infinite-recursion/input.c new file mode 100644 index 00000000..8749bd34 --- /dev/null +++ b/tests/sm/facts/infinite-recursion/input.c @@ -0,0 +1,40 @@ +/* + Copyright 2012 David Malcolm + Copyright 2012 Red Hat, Inc. + + This is free software: you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + . +*/ + +extern void marker_A(void); +extern void marker_B(void); +extern void marker_C(void); +extern void marker_D(void); + +void test(int flag) +{ + marker_A(); + + if (flag) { + marker_B(); + + /* Recurse, infinitely, as it happens: */ + test(flag); + + marker_C(); + } + + marker_D(); + +} diff --git a/tests/sm/facts/infinite-recursion/script.py b/tests/sm/facts/infinite-recursion/script.py new file mode 100644 index 00000000..baa3f214 --- /dev/null +++ b/tests/sm/facts/infinite-recursion/script.py @@ -0,0 +1,32 @@ +# -*- coding: utf-8 -*- +# Copyright 2012, 2013 David Malcolm +# Copyright 2012, 2013 Red Hat, Inc. +# +# This is free software: you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see +# . + +from sm import main +from sm.parser import parse_file + +def selftest(ctxt, solution): + if 0: + import sys + solution.dump(sys.stderr) + + # All the fact-finder knows is that flag != 0 at marker_B: + node = ctxt.find_call_of('marker_B') + ctxt.assert_fact(node, 'flag', '!=', 0) + +checker = parse_file('sm/checkers/malloc_checker.sm') +main([checker], selftest=selftest) diff --git a/tests/sm/facts/infinite-recursion/stdout.txt b/tests/sm/facts/infinite-recursion/stdout.txt new file mode 100644 index 00000000..e69de29b diff --git a/tests/sm/facts/loop/input.c b/tests/sm/facts/loop/input.c new file mode 100644 index 00000000..40a0dc68 --- /dev/null +++ b/tests/sm/facts/loop/input.c @@ -0,0 +1,36 @@ +/* + Copyright 2012 David Malcolm + Copyright 2012 Red Hat, Inc. + + This is free software: you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + . +*/ + +extern void marker_A(void); +extern void marker_B(void); +extern void marker_C(void); +extern void marker_D(void); + +void test(void) +{ + int i; + + marker_A(); + + for (i=0; i<256; i++) { + marker_B(); + } + + marker_C(); +} diff --git a/tests/sm/facts/loop/script.py b/tests/sm/facts/loop/script.py new file mode 100644 index 00000000..9e0681de --- /dev/null +++ b/tests/sm/facts/loop/script.py @@ -0,0 +1,42 @@ +# -*- coding: utf-8 -*- +# Copyright 2012, 2013 David Malcolm +# Copyright 2012, 2013 Red Hat, Inc. +# +# This is free software: you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see +# . + +from sm import main +from sm.parser import parse_file + +def selftest(ctxt, solution): + if 0: + import sys + solution.dump(sys.stderr) + + node = ctxt.find_call_of('marker_A') + + # Check that we know the constraints on i within the loop: + node = ctxt.find_call_of('marker_B') + ctxt.assert_fact(node, 'i', '<=', 255) + ctxt.assert_not_fact(node, 'i', '==', 0) + + # Check that we know the constraints on i after the loop: + node = ctxt.find_call_of('marker_C') + ctxt.assert_fact(node, 'i', '>', 255) + + # TODO: we don't yet do interval analysis, so we can't prove + # that (i == 256) after the loop. + +checker = parse_file('sm/checkers/malloc_checker.sm') +main([checker], selftest=selftest) diff --git a/tests/sm/facts/loop/stdout.txt b/tests/sm/facts/loop/stdout.txt new file mode 100644 index 00000000..e69de29b diff --git a/tests/sm/facts/meet/input.c b/tests/sm/facts/meet/input.c new file mode 100644 index 00000000..b222c952 --- /dev/null +++ b/tests/sm/facts/meet/input.c @@ -0,0 +1,41 @@ +/* + Copyright 2012 David Malcolm + Copyright 2012 Red Hat, Inc. + + This is free software: you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + . +*/ + +extern void marker_A(void); +extern void marker_B(void); +extern void marker_C(void); +extern void marker_D(void); + +void test(int flag, unsigned int i) +{ + if (!flag) { + return; + } + + marker_A(); + + if (i>0) { + marker_B(); + } else { + marker_C(); + } + + marker_D(); + +} diff --git a/tests/sm/facts/meet/script.py b/tests/sm/facts/meet/script.py new file mode 100644 index 00000000..6574be76 --- /dev/null +++ b/tests/sm/facts/meet/script.py @@ -0,0 +1,46 @@ +# -*- coding: utf-8 -*- +# Copyright 2012, 2013 David Malcolm +# Copyright 2012, 2013 Red Hat, Inc. +# +# This is free software: you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see +# . + +from sm import main +from sm.parser import parse_file + +def selftest(ctxt, solution): + if 0: + import sys + solution.dump(sys.stderr) + + # Verify that we know flag != 0 at marker_A: + node = ctxt.find_call_of('marker_A') + ctxt.assert_fact(node, 'flag', '!=', 0) + + # We should also know it after the conditionals: + node = ctxt.find_call_of('marker_B') + ctxt.assert_fact(node, 'flag', '!=', 0) + ctxt.assert_fact(node, 'i', '!=', 0) + + # and that due to the restricted range of i that i == 0 when !(i>0): + node = ctxt.find_call_of('marker_C') + ctxt.assert_fact(node, 'flag', '!=', 0) + ctxt.assert_fact(node, 'i', '==', 0) + + # We should also know (flag != 0) when control flow merges: + node = ctxt.find_call_of('marker_D') + ctxt.assert_fact(node, 'flag', '!=', 0) + +checker = parse_file('sm/checkers/malloc_checker.sm') +main([checker], selftest=selftest) diff --git a/tests/sm/facts/meet/stdout.txt b/tests/sm/facts/meet/stdout.txt new file mode 100644 index 00000000..e69de29b diff --git a/tests/sm/facts/noreturn/abort/input.c b/tests/sm/facts/noreturn/abort/input.c new file mode 100644 index 00000000..cd13c297 --- /dev/null +++ b/tests/sm/facts/noreturn/abort/input.c @@ -0,0 +1,38 @@ +/* + Copyright 2012 David Malcolm + Copyright 2012 Red Hat, Inc. + + This is free software: you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + . +*/ + +#include + +extern void marker_A(void); +extern void marker_B(void); + +void test(void *ptr) +{ + marker_A(); + + /* + abort() is labelled with __attribute__ ((__noreturn__)) + */ + if (!ptr) { + abort(); + } + + /* Hence the fact-finder ought to know (ptr != 0) here: */ + marker_B(); +} diff --git a/tests/sm/facts/noreturn/abort/script.py b/tests/sm/facts/noreturn/abort/script.py new file mode 100644 index 00000000..e71cf1d6 --- /dev/null +++ b/tests/sm/facts/noreturn/abort/script.py @@ -0,0 +1,33 @@ +# -*- coding: utf-8 -*- +# Copyright 2012, 2013 David Malcolm +# Copyright 2012, 2013 Red Hat, Inc. +# +# This is free software: you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see +# . + +from sm import main +from sm.parser import parse_file + +def selftest(ctxt, solution): + if 0: + import sys + solution.dump(sys.stderr) + + node = ctxt.find_call_of('marker_A') + + node = ctxt.find_call_of('marker_B') + ctxt.assert_fact(node, 'ptr', '!=', 0) + +checker = parse_file('sm/checkers/malloc_checker.sm') +main([checker], selftest=selftest) diff --git a/tests/sm/facts/noreturn/abort/stdout.txt b/tests/sm/facts/noreturn/abort/stdout.txt new file mode 100644 index 00000000..e69de29b diff --git a/tests/sm/facts/noreturn/assert/input.c b/tests/sm/facts/noreturn/assert/input.c new file mode 100644 index 00000000..15c932a0 --- /dev/null +++ b/tests/sm/facts/noreturn/assert/input.c @@ -0,0 +1,37 @@ +/* + Copyright 2012 David Malcolm + Copyright 2012 Red Hat, Inc. + + This is free software: you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + . +*/ + +#include + +extern void marker_A(void); +extern void marker_B(void); + +void test(int i) +{ + marker_A(); + + /* + We have not defined NDEBUG, so this will call __assert_fail if + i <= 10, which is labelled with __attribute__ ((__noreturn__)) + */ + assert(i > 10); + + /* Hence the fact-finder ought to know (i > 10) here: */ + marker_B(); +} diff --git a/tests/sm/facts/noreturn/assert/script.py b/tests/sm/facts/noreturn/assert/script.py new file mode 100644 index 00000000..51ea56f2 --- /dev/null +++ b/tests/sm/facts/noreturn/assert/script.py @@ -0,0 +1,33 @@ +# -*- coding: utf-8 -*- +# Copyright 2012, 2013 David Malcolm +# Copyright 2012, 2013 Red Hat, Inc. +# +# This is free software: you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see +# . + +from sm import main +from sm.parser import parse_file + +def selftest(ctxt, solution): + if 0: + import sys + solution.dump(sys.stderr) + + node = ctxt.find_call_of('marker_A') + + node = ctxt.find_call_of('marker_B') + ctxt.assert_fact(node, 'i', '>', 10) + +checker = parse_file('sm/checkers/malloc_checker.sm') +main([checker], selftest=selftest) diff --git a/tests/sm/facts/noreturn/assert/stdout.txt b/tests/sm/facts/noreturn/assert/stdout.txt new file mode 100644 index 00000000..e69de29b diff --git a/tests/sm/facts/noreturn/custom_abort/input.c b/tests/sm/facts/noreturn/custom_abort/input.c new file mode 100644 index 00000000..79c7c4e8 --- /dev/null +++ b/tests/sm/facts/noreturn/custom_abort/input.c @@ -0,0 +1,42 @@ +/* + Copyright 2012 David Malcolm + Copyright 2012 Red Hat, Inc. + + This is free software: you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + . +*/ + +#include +#include + +extern void marker_A(void); +extern void marker_B(void); + +void custom_abort(const char *msg) +{ + fprintf(stderr, "%s", msg); + abort(); +} + +void test(void *ptr) +{ + marker_A(); + + if (!ptr) { + custom_abort("ptr was NULL"); + } + + /* Hence the fact-finder ought to know (ptr != 0) here: */ + marker_B(); +} diff --git a/tests/sm/facts/noreturn/custom_abort/script.py b/tests/sm/facts/noreturn/custom_abort/script.py new file mode 100644 index 00000000..e71cf1d6 --- /dev/null +++ b/tests/sm/facts/noreturn/custom_abort/script.py @@ -0,0 +1,33 @@ +# -*- coding: utf-8 -*- +# Copyright 2012, 2013 David Malcolm +# Copyright 2012, 2013 Red Hat, Inc. +# +# This is free software: you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see +# . + +from sm import main +from sm.parser import parse_file + +def selftest(ctxt, solution): + if 0: + import sys + solution.dump(sys.stderr) + + node = ctxt.find_call_of('marker_A') + + node = ctxt.find_call_of('marker_B') + ctxt.assert_fact(node, 'ptr', '!=', 0) + +checker = parse_file('sm/checkers/malloc_checker.sm') +main([checker], selftest=selftest) diff --git a/tests/sm/facts/noreturn/custom_abort/stdout.txt b/tests/sm/facts/noreturn/custom_abort/stdout.txt new file mode 100644 index 00000000..e69de29b diff --git a/tests/sm/facts/noreturn/custom_abort_with_noreturn_attribute/input.c b/tests/sm/facts/noreturn/custom_abort_with_noreturn_attribute/input.c new file mode 100644 index 00000000..0d82dd8a --- /dev/null +++ b/tests/sm/facts/noreturn/custom_abort_with_noreturn_attribute/input.c @@ -0,0 +1,44 @@ +/* + Copyright 2012 David Malcolm + Copyright 2012 Red Hat, Inc. + + This is free software: you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + . +*/ + +#include +#include + +extern void marker_A(void); +extern void marker_B(void); + +void custom_abort(const char *msg) __attribute__ ((__noreturn__)); + +void custom_abort(const char *msg) +{ + fprintf(stderr, "%s", msg); + abort(); +} + +void test(void *ptr) +{ + marker_A(); + + if (!ptr) { + custom_abort("ptr was NULL"); + } + + /* Hence the fact-finder ought to know (ptr != 0) here: */ + marker_B(); +} diff --git a/tests/sm/facts/noreturn/custom_abort_with_noreturn_attribute/script.py b/tests/sm/facts/noreturn/custom_abort_with_noreturn_attribute/script.py new file mode 100644 index 00000000..e71cf1d6 --- /dev/null +++ b/tests/sm/facts/noreturn/custom_abort_with_noreturn_attribute/script.py @@ -0,0 +1,33 @@ +# -*- coding: utf-8 -*- +# Copyright 2012, 2013 David Malcolm +# Copyright 2012, 2013 Red Hat, Inc. +# +# This is free software: you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see +# . + +from sm import main +from sm.parser import parse_file + +def selftest(ctxt, solution): + if 0: + import sys + solution.dump(sys.stderr) + + node = ctxt.find_call_of('marker_A') + + node = ctxt.find_call_of('marker_B') + ctxt.assert_fact(node, 'ptr', '!=', 0) + +checker = parse_file('sm/checkers/malloc_checker.sm') +main([checker], selftest=selftest) diff --git a/tests/sm/facts/noreturn/custom_abort_with_noreturn_attribute/stdout.txt b/tests/sm/facts/noreturn/custom_abort_with_noreturn_attribute/stdout.txt new file mode 100644 index 00000000..e69de29b diff --git a/tests/sm/facts/operations/input.c b/tests/sm/facts/operations/input.c new file mode 100644 index 00000000..b326aff6 --- /dev/null +++ b/tests/sm/facts/operations/input.c @@ -0,0 +1,69 @@ +/* + Copyright 2012 David Malcolm + Copyright 2012 Red Hat, Inc. + + This is free software: you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + . +*/ + +extern void marker_A(void); +extern void marker_B(void); +extern void marker_C(void); +extern void marker_D(void); +extern void marker_E(void); +extern void marker_F(void); + +/* + Ensure that the fact-finder can sanely propagate information about + arithmetic operations +*/ + +void test(int i, int j) +{ + int k, m; + + if (i > 42) { + marker_A(); + + i += 3; + + /* (we should now know that i > 45) */ + marker_B(); + + i -= 1; + + /* (and now have i > 44) */ + marker_C(); + + i = 3 * i; + + /* (likewise now i > 132) */ + marker_D(); + + i /= 2; + + /* (should now have i > 66: */ + marker_E(); + + /* We don't know anything about j, so we don't know anything about k: */ + k = i + j; + + /* However, we should now know that m > 67: */ + m = i + 1; + + marker_F(); + + } + +} diff --git a/tests/sm/facts/operations/script.py b/tests/sm/facts/operations/script.py new file mode 100644 index 00000000..bb020af6 --- /dev/null +++ b/tests/sm/facts/operations/script.py @@ -0,0 +1,51 @@ +# -*- coding: utf-8 -*- +# Copyright 2012, 2013 David Malcolm +# Copyright 2012, 2013 Red Hat, Inc. +# +# This is free software: you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see +# . + +from sm import main +from sm.parser import parse_file + +def selftest(ctxt, solution): + if 0: + import sys + solution.dump(sys.stderr) + + # Verify that we know i > 42 at marker_A: + node = ctxt.find_call_of('marker_A') + ctxt.assert_fact(node, 'i', '>', 42) + + # Verify the effect of the various arithmetic operations on the + # factfinder: + node = ctxt.find_call_of('marker_B') + ctxt.assert_fact(node, 'i', '>', 45) + + node = ctxt.find_call_of('marker_C') + ctxt.assert_fact(node, 'i', '>', 44) + + node = ctxt.find_call_of('marker_D') + ctxt.assert_fact(node, 'i', '>', 132) + + node = ctxt.find_call_of('marker_E') + ctxt.assert_fact(node, 'i', '>', 66) + + node = ctxt.find_call_of('marker_F') + ctxt.assert_fact(node, 'i', '>', 66) + ctxt.assert_fact(node, 'm', '>', 67) + # though we don't know anything about the relationship of i and m + +checker = parse_file('sm/checkers/malloc_checker.sm') +main([checker], selftest=selftest) diff --git a/tests/sm/facts/operations/stdout.txt b/tests/sm/facts/operations/stdout.txt new file mode 100644 index 00000000..e69de29b diff --git a/tests/sm/facts/params/input.c b/tests/sm/facts/params/input.c new file mode 100644 index 00000000..e63270cf --- /dev/null +++ b/tests/sm/facts/params/input.c @@ -0,0 +1,53 @@ +/* + Copyright 2012 David Malcolm + Copyright 2012 Red Hat, Inc. + + This is free software: you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + . +*/ + +extern void marker_A(void); +extern void marker_B(void); +extern void marker_C(void); +extern void marker_D(void); +extern void marker_E(void); + +static int called_function(int j) +{ + int k; + + marker_C(); + + k = j - 1; + + marker_D(); + + return k; +} + +void test(int i) +{ + marker_A(); + + if (i > 4) { + + marker_B(); + + i = called_function(i); + + marker_E(); + } + + marker_F(); +} diff --git a/tests/sm/facts/params/script.py b/tests/sm/facts/params/script.py new file mode 100644 index 00000000..da8ac343 --- /dev/null +++ b/tests/sm/facts/params/script.py @@ -0,0 +1,54 @@ +# -*- coding: utf-8 -*- +# Copyright 2012 David Malcolm +# Copyright 2012 Red Hat, Inc. +# +# This is free software: you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see +# . + +from sm import main, Options +from sm.parser import parse_file + +def selftest(ctxt, solution): + if 0: + import sys + solution.dump(sys.stderr) + + node = ctxt.find_call_of('marker_A') + + node = ctxt.find_call_of('marker_B') + ctxt.assert_fact(node, 'i', '>', 4) + + # Within called_function(), + # arg i was passed to param j: + node = ctxt.find_call_of('marker_C') + #ctxt.assert_fact(node, 'i', '>', 4) + ctxt.assert_fact(node, 'j', '>', 4) + + node = ctxt.find_call_of('marker_D') + #ctxt.assert_fact(node, 'i', '>', 4) + ctxt.assert_fact(node, 'j', '>', 4) + ctxt.assert_fact(node, 'k', '>', 3) + + # Back to test: + # return value k was returned to i: + node = ctxt.find_call_of('marker_E') + ctxt.assert_fact(node, 'i', '>', 3) + + # After control merges, we have no information: + node = ctxt.find_call_of('marker_F') + ctxt.assert_no_facts(node) + + +checker = parse_file('sm/checkers/malloc_checker.sm') +main([checker], selftest=selftest) diff --git a/tests/sm/facts/params/stdout.txt b/tests/sm/facts/params/stdout.txt new file mode 100644 index 00000000..e69de29b diff --git a/tests/sm/facts/switch/various-cases/input.c b/tests/sm/facts/switch/various-cases/input.c new file mode 100644 index 00000000..98632c56 --- /dev/null +++ b/tests/sm/facts/switch/various-cases/input.c @@ -0,0 +1,57 @@ +/* + Copyright 2012 David Malcolm + Copyright 2012 Red Hat, Inc. + + This is free software: you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + . +*/ + +extern void marker_A(void); +extern void marker_B(void); +extern void marker_C(void); +extern void marker_D(void); +extern void marker_E(void); +extern void marker_F(void); +extern void marker_G(void); + +void test(int i) +{ + marker_A(); + + switch (i) { + case 0 ... 10: /* GCC's "case ranges" extension to C */ + case 20 ... 30: + marker_B(); + break; + + case 0x42: + case 42: + marker_C(); + break; + + case 70: + marker_D(); + /* fallthrough: */ + case 80: + marker_E(); + break; + + default: + marker_F(); + break; + + } + + marker_G(); +} diff --git a/tests/sm/facts/switch/various-cases/script.py b/tests/sm/facts/switch/various-cases/script.py new file mode 100644 index 00000000..3fecff09 --- /dev/null +++ b/tests/sm/facts/switch/various-cases/script.py @@ -0,0 +1,62 @@ +# -*- coding: utf-8 -*- +# Copyright 2012, 2013 David Malcolm +# Copyright 2012, 2013 Red Hat, Inc. +# +# This is free software: you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see +# . + +from sm import main +from sm.parser import parse_file + +def selftest(ctxt, solution): + if 0: + import sys + solution.dump(sys.stderr) + + node = ctxt.find_call_of('marker_A') + + # Check that we know the constraints on i at each case label + + # Combination of case ranges: 0 ... 10 and 20 ... 30 + node = ctxt.find_call_of('marker_B') + ctxt.assert_fact(node, 'i', '>=', 0) + ctxt.assert_fact(node, 'i', '<=', 30) + + # Combination of case labels 0x42 and 42: + node = ctxt.find_call_of('marker_C') + ctxt.assert_fact(node, 'i', '>=', 42) + ctxt.assert_fact(node, 'i', '<=', 0x42) + + # Single case labels 70: + node = ctxt.find_call_of('marker_D') + ctxt.assert_fact(node, 'i', '==', 70) + + # Case label 80 plus fallthrough of case label 70: + node = ctxt.find_call_of('marker_E') + # currently the fact-finder gives no information for the combination of + # the two: + ctxt.assert_no_facts(node) + + # "default" gives no information: + node = ctxt.find_call_of('marker_F') + ctxt.assert_no_facts(node) + + # Similarly, we have no information after control merges from all of + # the cases: + node = ctxt.find_call_of('marker_G') + ctxt.assert_no_facts(node) + + +checker = parse_file('sm/checkers/malloc_checker.sm') +main([checker], selftest=selftest) diff --git a/tests/sm/facts/switch/various-cases/stdout.txt b/tests/sm/facts/switch/various-cases/stdout.txt new file mode 100644 index 00000000..e69de29b diff --git a/tests/sm/facts/unittests/input.c b/tests/sm/facts/unittests/input.c new file mode 100644 index 00000000..8b137891 --- /dev/null +++ b/tests/sm/facts/unittests/input.c @@ -0,0 +1 @@ + diff --git a/tests/sm/facts/unittests/metadata.ini b/tests/sm/facts/unittests/metadata.ini new file mode 100644 index 00000000..ba7428ca --- /dev/null +++ b/tests/sm/facts/unittests/metadata.ini @@ -0,0 +1,4 @@ +[ExpectedBehavior] +# This test case emits warnings on stderr; +# don't treat the stderr output as leading to an expected failure: +exitcode = 0 diff --git a/tests/sm/facts/unittests/script.py b/tests/sm/facts/unittests/script.py new file mode 100644 index 00000000..55013d5b --- /dev/null +++ b/tests/sm/facts/unittests/script.py @@ -0,0 +1,118 @@ +# -*- coding: utf-8 -*- +# Copyright 2012 David Malcolm +# Copyright 2012 Red Hat, Inc. +# +# This is free software: you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see +# . + +import unittest + +from sm.facts import Facts, Fact + +def make_facts(factlist): + f = Facts() + f.set_ = set(factlist) + return f + +class FactsTests(unittest.TestCase): + def test_str(self): + f = make_facts([Fact('b', '==', 'c'), + Fact('a', '==', 'b')]) + self.assertEqual(str(f), + '(a == b && b == c)') + + def test_equivclasses(self): + f = make_facts([Fact('a', '==', 'b'), + Fact('b', '==', 'c')]) + self.assertEqual(f.get_equiv_classes(), + frozenset([frozenset(['a', 'b', 'c'])])) + + f = make_facts([Fact('a', '==', 'b'), + Fact('c', '>', 0)]) + self.assertEqual(f.get_equiv_classes(), + frozenset([frozenset(['a', 'b'])])) + + f = make_facts([Fact('a', '==', 'b'), + Fact('c', '==', 0)]) + self.assertEqual(f.get_equiv_classes(), + frozenset([frozenset(['a', 'b']), + frozenset(['c', 0])])) + + f = make_facts([Fact('a', '==', 'b'), + Fact('c', '==', 'd'), + Fact('c', '==', 'b')]) + self.assertEqual(f.get_equiv_classes(), + frozenset([frozenset(['a', 'b', 'c', 'd'])])) + + f = make_facts([Fact('a', '==', 'b'), + Fact('b', '==', 0), + Fact('b', '!=', 1)]) + self.assertEqual(f.get_equiv_classes(), + frozenset([frozenset(['a', 'b', 0])])) + + def test_aliases(self): + f = make_facts([Fact('a', '==', 'b'), + Fact('b', '==', 'c')]) + self.assertEqual(f.get_aliases('a'), + frozenset(['a', 'b', 'c'])) + self.assertEqual(f.get_aliases('d'), + frozenset(['d'])) + +class FakeContext: + def __init__(self, verbose=0): + self.verbose = verbose + def debug(self, msg, *args): + if self.verbose: + print(msg % args) + +class IsPossibleTests(unittest.TestCase): + def assertPossible(self, f): + self.assertEqual(f.is_possible(FakeContext()), True) + + def assertNotPossible(self, f): + self.assertEqual(f.is_possible(FakeContext()), False) + + def test_empty(self): + f = make_facts([]) + self.assertPossible(f) + + def test_single_values(self): + f = make_facts([Fact('a', '==', 'b'), + Fact('b', '==', 0)]) + self.assertPossible(f) + + def test_inconsistent_equalities(self): + f = make_facts([Fact('a', '==', 'b'), + Fact('b', '==', 0), + Fact('b', '==', 1)]) + self.assertNotPossible(f) + + def test_inequalities(self): + f = make_facts([Fact('b', '==', 0), + Fact('b', '!=', 1)]) + self.assertPossible(f) + + f = make_facts([Fact('b', '==', 0), + Fact('b', '!=', 0)]) + self.assertNotPossible(f) + + f = make_facts([Fact('b', '==', 0), + Fact('b', '>', 0)]) + self.assertNotPossible(f) + + +import sys +sys.argv = ['foo', '-v'] + +unittest.main() diff --git a/tests/sm/facts/unittests/stderr.txt b/tests/sm/facts/unittests/stderr.txt new file mode 100644 index 00000000..d8f2681a --- /dev/null +++ b/tests/sm/facts/unittests/stderr.txt @@ -0,0 +1,12 @@ +test_aliases (__main__.FactsTests) ... ok +test_equivclasses (__main__.FactsTests) ... ok +test_str (__main__.FactsTests) ... ok +test_empty (__main__.IsPossibleTests) ... ok +test_inconsistent_equalities (__main__.IsPossibleTests) ... ok +test_inequalities (__main__.IsPossibleTests) ... ok +test_single_values (__main__.IsPossibleTests) ... ok + +---------------------------------------------------------------------- +Ran 7 tests in #s + +OK diff --git a/tests/sm/facts/unittests/stdout.txt b/tests/sm/facts/unittests/stdout.txt new file mode 100644 index 00000000..e69de29b diff --git a/tests/sm/interprocedural/arg-affected-by-usage-as-param/input.c b/tests/sm/interprocedural/arg-affected-by-usage-as-param/input.c new file mode 100644 index 00000000..958038a3 --- /dev/null +++ b/tests/sm/interprocedural/arg-affected-by-usage-as-param/input.c @@ -0,0 +1,34 @@ +/* + Copyright 2012 David Malcolm + Copyright 2012 Red Hat, Inc. + + This is free software: you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + . +*/ + +#include +#include + +static void calls_free(int *q) +{ + free(q); +} + +void test(void *p) +{ + calls_free(p); + + /* BUG: double-free of p: already freed within calls_free() above */ + free(p); +} diff --git a/tests/sm/interprocedural/arg-affected-by-usage-as-param/script.py b/tests/sm/interprocedural/arg-affected-by-usage-as-param/script.py new file mode 100644 index 00000000..063e9c4e --- /dev/null +++ b/tests/sm/interprocedural/arg-affected-by-usage-as-param/script.py @@ -0,0 +1,36 @@ +# -*- coding: utf-8 -*- +# Copyright 2012 David Malcolm +# Copyright 2012 Red Hat, Inc. +# +# This is free software: you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see +# . + +from sm import main +from sm.parser import parse_file + +def selftest(ctxt, solution): + if 0: + import sys + solution.dump(sys.stderr) + + # Verify that calls_free() converts "p" within the caller from "ptr.start" + # to "ptr.free": + node = ctxt.find_call_of('calls_free') + ctxt.assert_statenames_for_varname(node, 'p', {'ptr.start'}) + + node = ctxt.get_intraprocedural_successor(node) + ctxt.assert_statenames_for_varname(node, 'p', {'ptr.free'}) + +checker = parse_file('sm/checkers/malloc_checker.sm') +main([checker], selftest=selftest) diff --git a/tests/sm/interprocedural/arg-affected-by-usage-as-param/stderr.txt b/tests/sm/interprocedural/arg-affected-by-usage-as-param/stderr.txt new file mode 100644 index 00000000..4cc823c8 --- /dev/null +++ b/tests/sm/interprocedural/arg-affected-by-usage-as-param/stderr.txt @@ -0,0 +1,11 @@ +tests/sm/interprocedural/arg-affected-by-usage-as-param/input.c: In function 'test': +tests/sm/interprocedural/arg-affected-by-usage-as-param/input.c:33:nn: error: double-free of p [CWE-415] +tests/sm/interprocedural/arg-affected-by-usage-as-param/input.c:30:nn: note: call from test() to calls_free() +tests/sm/interprocedural/arg-affected-by-usage-as-param/input.c:25:nn: note: q passed to free() +tests/sm/interprocedural/arg-affected-by-usage-as-param/input.c:30:nn: note: return from calls_free() to test(): state of q ("ptr.free") propagated to p +tests/sm/interprocedural/arg-affected-by-usage-as-param/input.c:33:nn: note: double-free of p +tests/sm/interprocedural/arg-affected-by-usage-as-param/input.c:33:nn: error: use-after-free of p [CWE-416] +tests/sm/interprocedural/arg-affected-by-usage-as-param/input.c:30:nn: note: call from test() to calls_free() +tests/sm/interprocedural/arg-affected-by-usage-as-param/input.c:25:nn: note: q passed to free() +tests/sm/interprocedural/arg-affected-by-usage-as-param/input.c:30:nn: note: return from calls_free() to test(): state of q ("ptr.free") propagated to p +tests/sm/interprocedural/arg-affected-by-usage-as-param/input.c:33:nn: note: use-after-free of p diff --git a/tests/sm/interprocedural/arg-affected-by-usage-as-param/stdout.txt b/tests/sm/interprocedural/arg-affected-by-usage-as-param/stdout.txt new file mode 100644 index 00000000..e69de29b diff --git a/tests/sm/interprocedural/callsite-lhs-inherits-from-return-val/input.c b/tests/sm/interprocedural/callsite-lhs-inherits-from-return-val/input.c new file mode 100644 index 00000000..3ae8558f --- /dev/null +++ b/tests/sm/interprocedural/callsite-lhs-inherits-from-return-val/input.c @@ -0,0 +1,35 @@ +/* + Copyright 2012 David Malcolm + Copyright 2012 Red Hat, Inc. + + This is free software: you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + . +*/ + +#include +#include + +static int *calls_malloc(void) +{ + return malloc(sizeof(int)); +} + +int *test(void) +{ + int *p = calls_malloc(); + /* BUG: malloc could have returned NULL */ + *p = 42; + return p; +} + diff --git a/tests/sm/interprocedural/callsite-lhs-inherits-from-return-val/script.py b/tests/sm/interprocedural/callsite-lhs-inherits-from-return-val/script.py new file mode 100644 index 00000000..096ac60c --- /dev/null +++ b/tests/sm/interprocedural/callsite-lhs-inherits-from-return-val/script.py @@ -0,0 +1,37 @@ +# -*- coding: utf-8 -*- +# Copyright 2012 David Malcolm +# Copyright 2012 Red Hat, Inc. +# +# This is free software: you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see +# . + +from sm import main +from sm.parser import parse_file + +def selftest(ctxt, solution): + if 0: + import sys + solution.dump(sys.stderr) + + # Verify that the: + # int *p = calls_malloc(); + # transitions "p" from "ptr.start" to "ptr.unknown" + node = ctxt.find_call_of('calls_malloc') + ctxt.assert_statenames_for_varname(node, 'p', {'ptr.start'}) + + node = ctxt.get_intraprocedural_successor(node) + ctxt.assert_statenames_for_varname(node, 'p', {'ptr.unchecked'}) + +checker = parse_file('sm/checkers/malloc_checker.sm') +main([checker], selftest=selftest) diff --git a/tests/sm/interprocedural/callsite-lhs-inherits-from-return-val/stderr.txt b/tests/sm/interprocedural/callsite-lhs-inherits-from-return-val/stderr.txt new file mode 100644 index 00000000..338e9b49 --- /dev/null +++ b/tests/sm/interprocedural/callsite-lhs-inherits-from-return-val/stderr.txt @@ -0,0 +1,6 @@ +tests/sm/interprocedural/callsite-lhs-inherits-from-return-val/input.c: In function 'test': +tests/sm/interprocedural/callsite-lhs-inherits-from-return-val/input.c:32:nn: error: dereference of possibly-NULL pointer p [CWE-690] +tests/sm/interprocedural/callsite-lhs-inherits-from-return-val/input.c:30:nn: note: call from test() to calls_malloc() +tests/sm/interprocedural/callsite-lhs-inherits-from-return-val/input.c:25:nn: note: return value assigned to the result of malloc() +tests/sm/interprocedural/callsite-lhs-inherits-from-return-val/input.c:30:nn: note: return from calls_malloc() to test(): state of return value ("ptr.unchecked") propagated to p +tests/sm/interprocedural/callsite-lhs-inherits-from-return-val/input.c:32:nn: note: dereference of possibly-NULL pointer p diff --git a/tests/sm/interprocedural/callsite-lhs-inherits-from-return-val/stdout.txt b/tests/sm/interprocedural/callsite-lhs-inherits-from-return-val/stdout.txt new file mode 100644 index 00000000..e69de29b diff --git a/tests/sm/interprocedural/complex/input.c b/tests/sm/interprocedural/complex/input.c new file mode 100644 index 00000000..5a011388 --- /dev/null +++ b/tests/sm/interprocedural/complex/input.c @@ -0,0 +1,56 @@ +/* + Copyright 2012 David Malcolm + Copyright 2012 Red Hat, Inc. + + This is free software: you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + . +*/ + +#include +#include + +static void *f(int a) +{ + if (a) { + void *p = malloc(4096); + return p; + } else { + return NULL; + } +} + +static void g(int b, void *q) +{ + if (b==2) { + g(b, q); /* contrived infinite recursion */ + } + free(q); +} + +void h(int c) +{ + int *r = (int*)f(c); + r[0] = 42; /* BUG: the malloc in f could have failed */ + g(c, r); + free(r); /* BUG: doublefree here, given that g frees the ptr */ +} + +static int factorial(int n) +{ + if (n < 2) { + return 1; + } else { + return n * factorial(n-1); + } +} diff --git a/tests/sm/interprocedural/complex/script.py b/tests/sm/interprocedural/complex/script.py new file mode 100644 index 00000000..d6390c5f --- /dev/null +++ b/tests/sm/interprocedural/complex/script.py @@ -0,0 +1,23 @@ +# -*- coding: utf-8 -*- +# Copyright 2012 David Malcolm +# Copyright 2012 Red Hat, Inc. +# +# This is free software: you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see +# . + +from sm import main +from sm.parser import parse_file + +checker = parse_file('sm/checkers/malloc_checker.sm') +main([checker]) diff --git a/tests/sm/interprocedural/complex/stderr.txt b/tests/sm/interprocedural/complex/stderr.txt new file mode 100644 index 00000000..c2191059 --- /dev/null +++ b/tests/sm/interprocedural/complex/stderr.txt @@ -0,0 +1,13 @@ +tests/sm/interprocedural/complex/input.c: In function 'h': +tests/sm/interprocedural/complex/input.c:44:nn: error: dereference of NULL pointer r [CWE-690] +tests/sm/interprocedural/complex/input.c:44:nn: note: dereference of NULL pointer r +tests/sm/interprocedural/complex/input.c:44:nn: error: dereference of possibly-NULL pointer r [CWE-690] +tests/sm/interprocedural/complex/input.c:44:nn: note: dereference of possibly-NULL pointer r +tests/sm/interprocedural/complex/input.c:46:nn: error: double-free of r [CWE-415] +tests/sm/interprocedural/complex/input.c:44:nn: note: dereference of r +tests/sm/interprocedural/complex/input.c:38:nn: note: q passed to free() +tests/sm/interprocedural/complex/input.c:46:nn: note: double-free of r +tests/sm/interprocedural/complex/input.c:46:nn: error: use-after-free of r [CWE-416] +tests/sm/interprocedural/complex/input.c:44:nn: note: dereference of r +tests/sm/interprocedural/complex/input.c:38:nn: note: q passed to free() +tests/sm/interprocedural/complex/input.c:46:nn: note: use-after-free of r diff --git a/tests/sm/interprocedural/complex/stdout.txt b/tests/sm/interprocedural/complex/stdout.txt new file mode 100644 index 00000000..e69de29b diff --git a/tests/sm/interprocedural/param-inherits-from-arg/input.c b/tests/sm/interprocedural/param-inherits-from-arg/input.c new file mode 100644 index 00000000..062cf99d --- /dev/null +++ b/tests/sm/interprocedural/param-inherits-from-arg/input.c @@ -0,0 +1,34 @@ +/* + Copyright 2012 David Malcolm + Copyright 2012 Red Hat, Inc. + + This is free software: you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + . +*/ + +#include +#include + +static void uses_ptr(int *q) +{ + /* BUG: ptr came from unchecked malloc in test() */ + *q = 42; +} + +int *test(void) +{ + int *p = (int*)malloc(sizeof(int)); + uses_ptr(p); + return p; +} diff --git a/tests/sm/interprocedural/param-inherits-from-arg/script.py b/tests/sm/interprocedural/param-inherits-from-arg/script.py new file mode 100644 index 00000000..78a1740c --- /dev/null +++ b/tests/sm/interprocedural/param-inherits-from-arg/script.py @@ -0,0 +1,33 @@ +# -*- coding: utf-8 -*- +# Copyright 2012 David Malcolm +# Copyright 2012 Red Hat, Inc. +# +# This is free software: you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see +# . + +from sm import main +from sm.parser import parse_file + +def selftest(ctxt, solution): + if 0: + import sys + solution.dump(sys.stderr) + + # Verify that uses_ptr()'s "q" param inherits "ptr.unchecked" + # from the argument "p" + node = ctxt.find_implementation_of('uses_ptr') + ctxt.assert_statenames_for_varname(node, 'q', {'ptr.unchecked'}) + +checker = parse_file('sm/checkers/malloc_checker.sm') +main([checker], selftest=selftest) diff --git a/tests/sm/interprocedural/param-inherits-from-arg/stderr.txt b/tests/sm/interprocedural/param-inherits-from-arg/stderr.txt new file mode 100644 index 00000000..4f59744b --- /dev/null +++ b/tests/sm/interprocedural/param-inherits-from-arg/stderr.txt @@ -0,0 +1,5 @@ +tests/sm/interprocedural/param-inherits-from-arg/input.c: In function 'uses_ptr': +tests/sm/interprocedural/param-inherits-from-arg/input.c:26:nn: error: dereference of possibly-NULL pointer q [CWE-690] +tests/sm/interprocedural/param-inherits-from-arg/input.c:31:nn: note: p assigned to the result of malloc() +tests/sm/interprocedural/param-inherits-from-arg/input.c:32:nn: note: call from test() to uses_ptr(): state of p ("ptr.unchecked") propagated to q +tests/sm/interprocedural/param-inherits-from-arg/input.c:26:nn: note: dereference of possibly-NULL pointer q diff --git a/tests/sm/interprocedural/param-inherits-from-arg/stdout.txt b/tests/sm/interprocedural/param-inherits-from-arg/stdout.txt new file mode 100644 index 00000000..e69de29b diff --git a/tests/sm/json/input.c b/tests/sm/json/input.c new file mode 100644 index 00000000..8234afad --- /dev/null +++ b/tests/sm/json/input.c @@ -0,0 +1,27 @@ +/* + Copyright 2012 David Malcolm + Copyright 2012 Red Hat, Inc. + + This is free software: you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + . +*/ + +#include + +int *test(int i) +{ + int *foo = (int*)malloc(sizeof(int*)); + *foo = i; /* BUG: result of malloc could be NULL */ + return foo; +} diff --git a/tests/sm/json/script.py b/tests/sm/json/script.py new file mode 100644 index 00000000..281ccc94 --- /dev/null +++ b/tests/sm/json/script.py @@ -0,0 +1,104 @@ +# -*- coding: utf-8 -*- +# Copyright 2012, 2013 David Malcolm +# Copyright 2012, 2013 Red Hat, Inc. +# +# This is free software: you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see +# . + +########################################################################### +# Selftest of JSON report dumping +# +# This is the same code and checker as: +# tests/sm/assignments/dereference-on-lhs +# but with the output sent to JSON +########################################################################### + +import glob +import json + +from sm import main +from sm.options import Options +from sm.parser import parse_file + +def get_json_reports(): + return glob.glob('tests/sm/json/input.c.*.sm.json') + +def delete_json_reports(): + for jsonfile in get_json_reports(): + os.unlink(jsonfile) + +def selftest(ctxt, solution): + if 0: + import sys + solution.dump(sys.stderr) + + # Verify the JSON version of the report directly from the Error object: + assert len(ctxt._errors) == 1 + err = ctxt._errors[0] + report = err.make_report(ctxt, solution) + + jsonobj = report.as_json() + verify_json(jsonobj) + print('OK: verified in-memory JSON object') + + # Verify that a JSON file was written and check its contents: + jsonfiles = get_json_reports() + assert len(jsonfiles) == 1 + with open(jsonfiles[0], 'r') as f: + jsonobj = json.load(f) + verify_json(jsonobj) + print('OK: verified JSON object parsed from disk') + + # On success, keep the directory clean: + if 1: + delete_json_reports() + +def verify_json(jsonobj): + if 0: + print(json.dumps(jsonobj, + sort_keys=True, + indent=4, separators=(',', ': '))) + + # Verify jsonobj['cwe']: + assert jsonobj['cwe'] == 'CWE-690' + + # Verify jsonobj['sm']: + assert jsonobj['sm']['name'] == 'malloc_checker' + assert jsonobj['sm']['filename'] == 'sm/checkers/malloc_checker.sm' + assert jsonobj['sm']['line'] == 23 + + # Verify jsonobj['loc']: + assert 'input.c' in jsonobj['loc']['actualfilename'] + assert jsonobj['loc']['givenfilename'] == "tests/sm/json/input.c" + assert jsonobj['loc']['line'] == 25 + assert jsonobj['loc']['column'] == 8 + + # Verify jsonobj['message']: + assert jsonobj['message'] == "dereference of possibly-NULL pointer foo" + + # Verify jsonobj['notes']: + assert len(jsonobj['notes']) == 2 + note = jsonobj['notes'][0] + assert jsonobj['loc']['givenfilename'] == "tests/sm/json/input.c" + assert note['loc']['line'] == 24 + assert note['message'] == "foo assigned to the result of malloc()" + +# Preprocessing: remove any existing json error reports: +delete_json_reports() + +checker = parse_file('sm/checkers/malloc_checker.sm') +main([checker], + # Dump JSON files rather than writing to stderr: + options=Options(dump_json=True), + selftest=selftest, ) diff --git a/tests/sm/json/stdout.txt b/tests/sm/json/stdout.txt new file mode 100644 index 00000000..ad3aa195 --- /dev/null +++ b/tests/sm/json/stdout.txt @@ -0,0 +1,2 @@ +OK: verified in-memory JSON object +OK: verified JSON object parsed from disk diff --git a/tests/sm/looping/input.c b/tests/sm/looping/input.c new file mode 100644 index 00000000..1bf8f6f1 --- /dev/null +++ b/tests/sm/looping/input.c @@ -0,0 +1,36 @@ +/* + Copyright 2012 David Malcolm + Copyright 2012 Red Hat, Inc. + + This is free software: you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + . +*/ + +#include + +void test(int c) +{ + int i; + char *buffer = (char*)malloc(256); + + for (i=0; i<255; i++) { + buffer[i] = c; /* BUG: the malloc could have failed + TODO: the checker doesn't yet pick up on this due to + the pointer arithmetic not picking up on the state */ + /* BUG use-after-free the second time through the loop */ + + free(buffer); /* BUG: doublefree here on second time through the loop */ + } + +} diff --git a/tests/sm/looping/script.py b/tests/sm/looping/script.py new file mode 100644 index 00000000..f5439a23 --- /dev/null +++ b/tests/sm/looping/script.py @@ -0,0 +1,23 @@ +# -*- coding: utf-8 -*- +# Copyright 2012, 2013 David Malcolm +# Copyright 2012, 2013 Red Hat, Inc. +# +# This is free software: you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see +# . + +from sm import main +from sm.parser import parse_file + +checker = parse_file('sm/checkers/malloc_checker.sm') +main([checker]) diff --git a/tests/sm/looping/stderr.txt b/tests/sm/looping/stderr.txt new file mode 100644 index 00000000..ce8212e1 --- /dev/null +++ b/tests/sm/looping/stderr.txt @@ -0,0 +1,13 @@ +tests/sm/looping/input.c: In function 'test': +tests/sm/looping/input.c:28:nn: error: use-after-free of buffer [CWE-416] +tests/sm/looping/input.c:25:nn: note: buffer assigned to the result of malloc() +tests/sm/looping/input.c:33:nn: note: buffer passed to free() +tests/sm/looping/input.c:28:nn: note: use-after-free of buffer +tests/sm/looping/input.c:33:nn: error: double-free of buffer [CWE-415] +tests/sm/looping/input.c:25:nn: note: buffer assigned to the result of malloc() +tests/sm/looping/input.c:33:nn: note: buffer passed to free() +tests/sm/looping/input.c:33:nn: note: double-free of buffer +tests/sm/looping/input.c:33:nn: error: use-after-free of buffer [CWE-416] +tests/sm/looping/input.c:25:nn: note: buffer assigned to the result of malloc() +tests/sm/looping/input.c:33:nn: note: buffer passed to free() +tests/sm/looping/input.c:33:nn: note: use-after-free of buffer diff --git a/tests/sm/looping/stdout.txt b/tests/sm/looping/stdout.txt new file mode 100644 index 00000000..e69de29b diff --git a/tests/sm/lto/input-f.c b/tests/sm/lto/input-f.c new file mode 100644 index 00000000..3a2360c2 --- /dev/null +++ b/tests/sm/lto/input-f.c @@ -0,0 +1,34 @@ +/* + Copyright 2012 David Malcolm + Copyright 2012 Red Hat, Inc. + + This is free software: you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + . +*/ + +#include +#include + +#include "test.h" + +void *f(int a) +{ + if (a) { + void *p = malloc(4096); + return p; + } else { + return NULL; + } +} + diff --git a/tests/sm/lto/input-g.c b/tests/sm/lto/input-g.c new file mode 100644 index 00000000..816527da --- /dev/null +++ b/tests/sm/lto/input-g.c @@ -0,0 +1,31 @@ +/* + Copyright 2012 David Malcolm + Copyright 2012 Red Hat, Inc. + + This is free software: you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + . +*/ + +#include +#include + +#include "test.h" + +void g(int b, void *q) +{ + if (b==2) { + g(b, q); /* contrived infinite recursion */ + } + free(q); +} diff --git a/tests/sm/lto/input-h.c b/tests/sm/lto/input-h.c new file mode 100644 index 00000000..0ae7fd34 --- /dev/null +++ b/tests/sm/lto/input-h.c @@ -0,0 +1,31 @@ +/* + Copyright 2012 David Malcolm + Copyright 2012 Red Hat, Inc. + + This is free software: you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + . +*/ + +#include +#include + +#include "test.h" + +void h(int c) +{ + int *r = (int*)f(c); + r[0] = 42; /* BUG: the malloc in f could have failed */ + g(c, r); + free(r); /* BUG: doublefree here, given that g frees the ptr */ +} diff --git a/tests/sm/lto/script.py b/tests/sm/lto/script.py new file mode 100644 index 00000000..e0cf5dbb --- /dev/null +++ b/tests/sm/lto/script.py @@ -0,0 +1,25 @@ +# -*- coding: utf-8 -*- +# Copyright 2012, 2013 David Malcolm +# Copyright 2012, 2013 Red Hat, Inc. +# +# This is free software: you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see +# . + +from sm import main +from sm.options import Options +from sm.parser import parse_file + +opt = Options(during_lto=True) +checker = parse_file('sm/checkers/malloc_checker.sm') +main([checker], options=opt) diff --git a/tests/sm/lto/stderr.txt b/tests/sm/lto/stderr.txt new file mode 100644 index 00000000..28afeb98 --- /dev/null +++ b/tests/sm/lto/stderr.txt @@ -0,0 +1,29 @@ +tests/sm/lto/input-g.c: In function 'g': +tests/sm/lto/input-g.c:30:7: error: double-free of q [CWE-415] +tests/sm/lto/input-g.c:30:7: note: q passed to free() +tests/sm/lto/input-g.c:28:6: note: return from g() to g() +tests/sm/lto/input-g.c:30:7: note: double-free of q +tests/sm/lto/input-g.c:30:7: error: use-after-free of q [CWE-416] +tests/sm/lto/input-g.c:30:7: note: q passed to free() +tests/sm/lto/input-g.c:28:6: note: return from g() to g() +tests/sm/lto/input-g.c:30:7: note: use-after-free of q +tests/sm/lto/input-h.c: In function 'h': +tests/sm/lto/input-h.c:28:8: error: dereference of NULL pointer r [CWE-690] +tests/sm/lto/input-f.c:31:5: note: return value assigned to 0 +tests/sm/lto/input-h.c:27:8: note: return from f() to h(): state of return value ("ptr.null") propagated to r +tests/sm/lto/input-h.c:28:8: note: dereference of NULL pointer r +tests/sm/lto/input-h.c:28:8: error: dereference of possibly-NULL pointer r [CWE-690] +tests/sm/lto/input-f.c:28:11: note: p assigned to the result of malloc() +tests/sm/lto/input-h.c:27:8: note: return from f() to h(): state of return value ("ptr.unchecked") propagated to r +tests/sm/lto/input-h.c:28:8: note: dereference of possibly-NULL pointer r +tests/sm/lto/input-h.c:30:7: error: double-free of r [CWE-415] +tests/sm/lto/input-g.c:30:7: note: q passed to free() +tests/sm/lto/input-h.c:29:4: note: return from g() to h(): state of q ("ptr.free") propagated to r +tests/sm/lto/input-h.c:30:7: note: double-free of r +tests/sm/lto/input-h.c:30:7: error: use-after-free of r [CWE-416] +tests/sm/lto/input-g.c:30:7: note: q passed to free() +tests/sm/lto/input-h.c:29:4: note: return from g() to h(): state of q ("ptr.free") propagated to r +tests/sm/lto/input-h.c:30:7: note: use-after-free of r +lto-wrapper: gcc returned 1 exit status +/usr/bin/ld: lto-wrapper failed +collect2: error: ld returned 1 exit status diff --git a/tests/sm/lto/stdout.txt b/tests/sm/lto/stdout.txt new file mode 100644 index 00000000..e69de29b diff --git a/tests/sm/lto/test.h b/tests/sm/lto/test.h new file mode 100644 index 00000000..f5825c8c --- /dev/null +++ b/tests/sm/lto/test.h @@ -0,0 +1,22 @@ +/* + Copyright 2012 David Malcolm + Copyright 2012 Red Hat, Inc. + + This is free software: you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + . +*/ + +extern void *f(int a); +extern void g(int b, void *q); +extern void h(int c); diff --git a/tests/sm/parser/malloc-checker/input.c b/tests/sm/parser/malloc-checker/input.c new file mode 100644 index 00000000..aafcadd4 --- /dev/null +++ b/tests/sm/parser/malloc-checker/input.c @@ -0,0 +1,91 @@ +/* + Copyright 2012 David Malcolm + Copyright 2012 Red Hat, Inc. + + This is free software: you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + . +*/ + +#include +#include + +#if 1 +void double_free(void *ptr) +{ + free(ptr); + + /* BUG: double-free: */ + free(ptr); +} +#endif + + +#if 1 +void unchecked_malloc(void) +{ + void *ptr = malloc(4096); + memset(ptr, 0, 4096); +} +#endif + +#if 1 +int correct_usage(void) +{ + void *ptr = malloc(4096); + if (!ptr) + return -1; /* FIXME: with a plain return we have a BB with no gimple, and that breaks my checker */ + memset(ptr, 0, 4096); + free(ptr); + return 0; +} +#endif + +#if 1 +int two_ptrs(void) +{ + void *p = malloc(4096); + void *q = malloc(4096); + if (p) { + memset(p, 0, 4096); /* Not a bug: checked */ + } else { + memset(q, 0, 4096); /* BUG: not checked */ + } + free(p); + free(q); + return 0; +} +#endif + +#if 1 +void fancy_control_flow(int i, int j) +{ + int k; + void *ptr; + for (k = i; k < j; k++) { + switch(k) { + case 0: + ptr = malloc(1024); + break; + case 1: + break; + case 2: + break; + default: + break; + } + } + memset(ptr, 0, 4096); + free(ptr); +} +#endif diff --git a/tests/sm/parser/malloc-checker/metadata.ini b/tests/sm/parser/malloc-checker/metadata.ini new file mode 100644 index 00000000..ba7428ca --- /dev/null +++ b/tests/sm/parser/malloc-checker/metadata.ini @@ -0,0 +1,4 @@ +[ExpectedBehavior] +# This test case emits warnings on stderr; +# don't treat the stderr output as leading to an expected failure: +exitcode = 0 diff --git a/tests/sm/parser/malloc-checker/script.py b/tests/sm/parser/malloc-checker/script.py new file mode 100644 index 00000000..a75230a1 --- /dev/null +++ b/tests/sm/parser/malloc-checker/script.py @@ -0,0 +1,189 @@ +# -*- coding: utf-8 -*- +# Copyright 2012 David Malcolm +# Copyright 2012 Red Hat, Inc. +# +# This is free software: you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see +# . + +import unittest + +from sm.checker import Checker, Sm, AnyPointer, StateClause, PatternRule, \ + ResultOfFnCall, ArgsOfFnCall, Comparison, VarDereference, VarUsage, \ + TransitionTo, BooleanOutcome, PythonOutcome +from sm.parser import parse_string + +#parse('tests/sm/parser/malloc-checker.sm') + +class ParserTests(unittest.TestCase): + def test_complex_example(self): + ch = parse_string(''' +sm malloc_checker { + stateful decl any_pointer ptr; + + ptr.all: + { ptr = malloc() } => ptr.unknown; + + ptr.unknown, ptr.null, ptr.nonnull: + { ptr == 0 } => true=ptr.null, false=ptr.nonnull + | { ptr != 0 } => true=ptr.nonnull, false=ptr.null + ; + + ptr.unknown: + { *ptr } => {{ error('use of possibly-NULL pointer %s' % ptr) }}; + + ptr.null: + { *ptr } => {{ error('use of NULL pointer %s' % ptr) }}; + + ptr.all, ptr.unknown, ptr.null, ptr.nonnull: + { free(ptr) } => ptr.free; + + ptr.free: + { free(ptr) } => {{ error('double-free of %s' % ptr) }} + | { ptr } => {{ error('use-after-free of %s' % ptr) }} + ; +} +''') + self.assert_(isinstance(ch, Checker)) + + # Verify that the str() of the parsed Checker looks sane: + self.assertEqual(str(ch).splitlines(), +'''sm malloc_checker { + stateful decl any_pointer ptr; + + ptr.all: + { ptr = malloc() } => ptr.unknown; + + ptr.unknown, ptr.null, ptr.nonnull: + { ptr == 0 } => true=ptr.null, false=ptr.nonnull; + | { ptr != 0 } => true=ptr.nonnull, false=ptr.null; + + ptr.unknown: + { *ptr } => {{ error('use of possibly-NULL pointer %s' % ptr) }}; + + ptr.null: + { *ptr } => {{ error('use of NULL pointer %s' % ptr) }}; + + ptr.all, ptr.unknown, ptr.null, ptr.nonnull: + { free(ptr) } => ptr.free; + + ptr.free: + { free(ptr) } => {{ error('double-free of %s' % ptr) }}; + | { ptr } => {{ error('use-after-free of %s' % ptr) }}; + +}'''.splitlines()) + + self.assertEqual(len(ch.sms), 1) + sm = ch.sms[0] + self.assertEqual(sm.name, 'malloc_checker') + self.assertEqual(len(sm.clauses), 7) + + decl = sm.clauses[0] + self.assertEqual(decl, AnyPointer(has_state=True, name='ptr')) + + # Verify parsing of: + # ptr.all: + # { ptr = malloc() } => ptr.unknown; + sc = sm.clauses[1] + self.assertEqual(sc.statelist, ['ptr.all']) + self.assertEqual(len(sc.patternrulelist), 1) + pr = sc.patternrulelist[0] + self.assertEqual(pr.pattern, + ResultOfFnCall(lhs='ptr', + fnname='malloc', + args=[])) + self.assertEqual(pr.outcomes, + [TransitionTo(statename='ptr.unknown')]) + + # Verify parsing of: + # ptr.unknown, ptr.null, ptr.nonnull: + # { ptr == 0 } => true=ptr.null, false=ptr.nonnull + # | { ptr != 0 } => true=ptr.nonnull, false=ptr.null + # ; + sc = sm.clauses[2] + self.assertEqual(sc.statelist, + ['ptr.unknown', 'ptr.null', 'ptr.nonnull']) + self.assertEqual(len(sc.patternrulelist), 2) + pr = sc.patternrulelist[0] + self.assertEqual(pr.pattern, Comparison(lhs='ptr', op='==', rhs=0)) + self.assertEqual(pr.outcomes, + [BooleanOutcome(guard=True, outcome=TransitionTo(statename='ptr.null')), + BooleanOutcome(guard=False, outcome=TransitionTo(statename='ptr.nonnull'))]) + pr = sc.patternrulelist[1] + self.assertEqual(pr.pattern, Comparison(lhs='ptr', op='!=', rhs=0)) + self.assertEqual(pr.outcomes, + [BooleanOutcome(guard=True, outcome=TransitionTo(statename='ptr.nonnull')), + BooleanOutcome(guard=False, outcome=TransitionTo(statename='ptr.null'))]) + + + # Verify parsing of: + # ptr.unknown: + # { *ptr } => {{ error('use of possibly-NULL pointer %s' % ptr) }}; + sc = sm.clauses[3] + self.assertEqual(sc.statelist, + ['ptr.unknown']) + self.assertEqual(len(sc.patternrulelist), 1) + pr = sc.patternrulelist[0] + self.assertEqual(pr.pattern, VarDereference(var='ptr')) + self.assertEqual(pr.outcomes, + [PythonOutcome(src=(" error('use of possibly-NULL pointer %s' % ptr) "), + lineoffset=13)]) + + # Verify parsing of: + # ptr.null: + # { *ptr } => {{ error('use of NULL pointer %s' % ptr) }}; + sc = sm.clauses[4] + self.assertEqual(sc.statelist, + ['ptr.null']) + self.assertEqual(len(sc.patternrulelist), 1) + pr = sc.patternrulelist[0] + self.assertEqual(pr.pattern, VarDereference(var='ptr')) + self.assertEqual(pr.outcomes, + [PythonOutcome(src=(" error('use of NULL pointer %s' % ptr) "), + lineoffset=16)]) + + # Verify parsing of: + # ptr.all, ptr.unknown, ptr.null, ptr.nonnull: + # { free(ptr) } => ptr.free; + sc = sm.clauses[5] + self.assertEqual(sc.statelist, + ['ptr.all', 'ptr.unknown', 'ptr.null', 'ptr.nonnull']) + self.assertEqual(len(sc.patternrulelist), 1) + pr = sc.patternrulelist[0] + self.assertEqual(pr.pattern, ArgsOfFnCall(fnname='free', args=['ptr'])) + self.assertEqual(pr.outcomes, + [TransitionTo(statename='ptr.free')]) + + # Verify parsing of: + # ptr.free: + # { free(ptr) } => {{ error('double-free of %s' % ptr) }} + # | { ptr } => {{ error('use-after-free of %s' % ptr) }} + sc = sm.clauses[6] + self.assertEqual(sc.statelist, + ['ptr.free']) + self.assertEqual(len(sc.patternrulelist), 2) + pr = sc.patternrulelist[0] + self.assertEqual(pr.pattern, ArgsOfFnCall(fnname='free', args=['ptr'])) + self.assertEqual(pr.outcomes, + [PythonOutcome(src=" error('double-free of %s' % ptr) ", + lineoffset=22)]) + pr = sc.patternrulelist[1] + self.assertEqual(pr.pattern, VarUsage(var='ptr')) + self.assertEqual(pr.outcomes, + [PythonOutcome(src=" error('use-after-free of %s' % ptr) ", + lineoffset=23)]) + +import sys +sys.argv = ['foo', '-v'] + +unittest.main() diff --git a/tests/sm/parser/malloc-checker/stderr.txt b/tests/sm/parser/malloc-checker/stderr.txt new file mode 100644 index 00000000..eb60ae5b --- /dev/null +++ b/tests/sm/parser/malloc-checker/stderr.txt @@ -0,0 +1,6 @@ +test_complex_example (__main__.ParserTests) ... ok + +---------------------------------------------------------------------- +Ran 1 test in #s + +OK diff --git a/tests/sm/parser/malloc-checker/stdout.txt b/tests/sm/parser/malloc-checker/stdout.txt new file mode 100644 index 00000000..e69de29b diff --git a/tests/sm/patterns/arg-of-fn-call/input.c b/tests/sm/patterns/arg-of-fn-call/input.c new file mode 100644 index 00000000..04a68807 --- /dev/null +++ b/tests/sm/patterns/arg-of-fn-call/input.c @@ -0,0 +1,25 @@ +/* + Copyright 2012 David Malcolm + Copyright 2012 Red Hat, Inc. + + This is free software: you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + . +*/ + +extern void test2(void *); + +void test1(void *ptr) +{ + test2(ptr); +} diff --git a/tests/sm/patterns/arg-of-fn-call/script.py b/tests/sm/patterns/arg-of-fn-call/script.py new file mode 100644 index 00000000..b9f41c21 --- /dev/null +++ b/tests/sm/patterns/arg-of-fn-call/script.py @@ -0,0 +1,32 @@ +# -*- coding: utf-8 -*- +# Copyright 2012 David Malcolm +# Copyright 2012 Red Hat, Inc. +# +# This is free software: you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see +# . + +from sm import main +from sm.parser import parse_string + +SCRIPT = ''' +sm arg_of_fn_call { + stateful decl any_pointer ptr; + + ptr.*: + { test2(ptr) } => {{ error('test2() was called with %s' % ptr) }}; +} +''' + +checker = parse_string(SCRIPT) +main([checker]) diff --git a/tests/sm/patterns/arg-of-fn-call/stderr.txt b/tests/sm/patterns/arg-of-fn-call/stderr.txt new file mode 100644 index 00000000..1c2cba27 --- /dev/null +++ b/tests/sm/patterns/arg-of-fn-call/stderr.txt @@ -0,0 +1,2 @@ +tests/sm/patterns/arg-of-fn-call/input.c: In function 'test1': +tests/sm/patterns/arg-of-fn-call/input.c:24:nn: error: test2() was called with ptr diff --git a/tests/sm/patterns/arg-of-fn-call/stdout.txt b/tests/sm/patterns/arg-of-fn-call/stdout.txt new file mode 100644 index 00000000..e69de29b diff --git a/tests/sm/patterns/arg_must_not_be_null/input.c b/tests/sm/patterns/arg_must_not_be_null/input.c new file mode 100644 index 00000000..fdef90c6 --- /dev/null +++ b/tests/sm/patterns/arg_must_not_be_null/input.c @@ -0,0 +1,44 @@ +/* + Copyright 2012 David Malcolm + Copyright 2012 Red Hat, Inc. + + This is free software: you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + . +*/ + +#include + +extern void foo(void *ptrA, void *ptrB, void *ptrC) + __attribute__((nonnull (1, 3))); + +extern void bar(void *ptrA, void *ptrB, void *ptrC) + __attribute__((nonnull (1, 3))); + +void foo(void *ptrA, void *ptrB, void *ptrC) +{ +} + +void test(void) +{ + void *p; + void *q; + void *r; + + p = NULL; + q = NULL; + r = NULL; + + foo(p, q, r); + bar(p, q, r); +} diff --git a/tests/sm/patterns/arg_must_not_be_null/script.py b/tests/sm/patterns/arg_must_not_be_null/script.py new file mode 100644 index 00000000..a270eb0b --- /dev/null +++ b/tests/sm/patterns/arg_must_not_be_null/script.py @@ -0,0 +1,39 @@ +# -*- coding: utf-8 -*- +# Copyright 2012 David Malcolm +# Copyright 2012 Red Hat, Inc. +# +# This is free software: you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see +# . + +from sm import main +from sm.parser import parse_string + +SCRIPT = ''' +sm example_usage_of_nonnull_arg { + stateful decl any_pointer ptr; + + ptr.*: + { ptr = 0 } => ptr.null; + + ptr.null: + $arg_must_not_be_null$ + => {{ + error('%s() was called with NULL %s as argument %i/index %i: %r' + % (function, ptr, argnumber, argindex, parameter)) + }}; +} +''' + +checker = parse_string(SCRIPT) +main([checker]) diff --git a/tests/sm/patterns/arg_must_not_be_null/stderr.txt b/tests/sm/patterns/arg_must_not_be_null/stderr.txt new file mode 100644 index 00000000..554a1aab --- /dev/null +++ b/tests/sm/patterns/arg_must_not_be_null/stderr.txt @@ -0,0 +1,21 @@ +tests/sm/patterns/arg_must_not_be_null/input.c: In function 'test': +tests/sm/patterns/arg_must_not_be_null/input.c:42:nn: error: foo() was called with NULL p as argument 1/index 0: gcc.ParmDecl('ptrA') +tests/sm/patterns/arg_must_not_be_null/input.c:38:nn: note: p assigned to 0 +tests/sm/patterns/arg_must_not_be_null/input.c:42:nn: note: call from test() to foo(): state of p ("ptr.null") propagated to ptrA +tests/sm/patterns/arg_must_not_be_null/input.c:42:nn: note: return from foo() to test(): state of ptrA ("ptr.null") propagated to p +tests/sm/patterns/arg_must_not_be_null/input.c:42:nn: note: foo() was called with NULL p as argument 1/index 0: gcc.ParmDecl('ptrA') +tests/sm/patterns/arg_must_not_be_null/input.c:42:nn: error: foo() was called with NULL r as argument 3/index 2: gcc.ParmDecl('ptrC') +tests/sm/patterns/arg_must_not_be_null/input.c:40:nn: note: r assigned to 0 +tests/sm/patterns/arg_must_not_be_null/input.c:42:nn: note: call from test() to foo(): state of p ("ptr.null") propagated to ptrC +tests/sm/patterns/arg_must_not_be_null/input.c:42:nn: note: return from foo() to test(): state of ptrC ("ptr.null") propagated to r +tests/sm/patterns/arg_must_not_be_null/input.c:42:nn: note: foo() was called with NULL r as argument 3/index 2: gcc.ParmDecl('ptrC') +tests/sm/patterns/arg_must_not_be_null/input.c:43:nn: error: bar() was called with NULL p as argument 1/index 0: None +tests/sm/patterns/arg_must_not_be_null/input.c:40:nn: note: r assigned to 0 +tests/sm/patterns/arg_must_not_be_null/input.c:42:nn: note: call from test() to foo(): state of p ("ptr.null") propagated to ptrA +tests/sm/patterns/arg_must_not_be_null/input.c:42:nn: note: return from foo() to test(): state of ptrA ("ptr.null") propagated to p +tests/sm/patterns/arg_must_not_be_null/input.c:43:nn: note: bar() was called with NULL p as argument 1/index 0: None +tests/sm/patterns/arg_must_not_be_null/input.c:43:nn: error: bar() was called with NULL r as argument 3/index 2: None +tests/sm/patterns/arg_must_not_be_null/input.c:39:nn: note: q assigned to 0 +tests/sm/patterns/arg_must_not_be_null/input.c:42:nn: note: call from test() to foo(): state of p ("ptr.null") propagated to ptrC +tests/sm/patterns/arg_must_not_be_null/input.c:42:nn: note: return from foo() to test(): state of ptrC ("ptr.null") propagated to r +tests/sm/patterns/arg_must_not_be_null/input.c:43:nn: note: bar() was called with NULL r as argument 3/index 2: None diff --git a/tests/sm/patterns/arg_must_not_be_null/stdout.txt b/tests/sm/patterns/arg_must_not_be_null/stdout.txt new file mode 100644 index 00000000..e69de29b diff --git a/tests/sm/patterns/args-of-fn-call/input.c b/tests/sm/patterns/args-of-fn-call/input.c new file mode 100644 index 00000000..6c912b70 --- /dev/null +++ b/tests/sm/patterns/args-of-fn-call/input.c @@ -0,0 +1,25 @@ +/* + Copyright 2012 David Malcolm + Copyright 2012 Red Hat, Inc. + + This is free software: you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + . +*/ + +extern void test2(void *param0, int param1, char param2); + +void test1(void *ptr, int i) +{ + test2(ptr, 0, 'A'); +} diff --git a/tests/sm/patterns/args-of-fn-call/script.py b/tests/sm/patterns/args-of-fn-call/script.py new file mode 100644 index 00000000..691e9a4f --- /dev/null +++ b/tests/sm/patterns/args-of-fn-call/script.py @@ -0,0 +1,34 @@ +# -*- coding: utf-8 -*- +# Copyright 2012 David Malcolm +# Copyright 2012 Red Hat, Inc. +# +# This is free software: you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see +# . + +from sm import main +from sm.parser import parse_string + +SCRIPT = ''' +sm arg_of_fn_call { + stateful decl any_pointer arg0; + decl any_expr arg1; + decl any_expr arg2; + + arg0.*: + { test2(arg0, arg1, arg2) } => {{ error('test2() was called with arg0:%s arg1:%s arg2:%s' % (arg0, arg1, arg2)) }}; +} +''' + +checker = parse_string(SCRIPT) +main([checker]) diff --git a/tests/sm/patterns/args-of-fn-call/stderr.txt b/tests/sm/patterns/args-of-fn-call/stderr.txt new file mode 100644 index 00000000..798cb97b --- /dev/null +++ b/tests/sm/patterns/args-of-fn-call/stderr.txt @@ -0,0 +1,2 @@ +tests/sm/patterns/args-of-fn-call/input.c: In function 'test1': +tests/sm/patterns/args-of-fn-call/input.c:24:nn: error: test2() was called with arg0:ptr arg1:0 arg2:65 diff --git a/tests/sm/patterns/args-of-fn-call/stdout.txt b/tests/sm/patterns/args-of-fn-call/stdout.txt new file mode 100644 index 00000000..e69de29b diff --git a/tests/sm/patterns/array/input.c b/tests/sm/patterns/array/input.c new file mode 100644 index 00000000..6ce27acd --- /dev/null +++ b/tests/sm/patterns/array/input.c @@ -0,0 +1,25 @@ +/* + Copyright 2012 David Malcolm + Copyright 2012 Red Hat, Inc. + + This is free software: you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + . +*/ + +void test1(void) +{ + char buf[256]; + + buf[0] = 42; +} diff --git a/tests/sm/patterns/array/script.py b/tests/sm/patterns/array/script.py new file mode 100644 index 00000000..4f72951f --- /dev/null +++ b/tests/sm/patterns/array/script.py @@ -0,0 +1,33 @@ +# -*- coding: utf-8 -*- +# Copyright 2012 David Malcolm +# Copyright 2012 Red Hat, Inc. +# +# This is free software: you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see +# . + +from sm import main +from sm.parser import parse_string + +SCRIPT = ''' +sm array_access { + decl any_expr idx; + stateful decl any_expr arr; + + arr.*: + { arr[idx] } => {{ error('lookup within array "%s" at index %s' % (arr, idx)) }}; +} +''' + +checker = parse_string(SCRIPT) +main([checker]) diff --git a/tests/sm/patterns/array/stderr.txt b/tests/sm/patterns/array/stderr.txt new file mode 100644 index 00000000..ff30854e --- /dev/null +++ b/tests/sm/patterns/array/stderr.txt @@ -0,0 +1,2 @@ +tests/sm/patterns/array/input.c: In function 'test1': +tests/sm/patterns/array/input.c:24:nn: error: lookup within array "buf" at index 0 diff --git a/tests/sm/patterns/array/stdout.txt b/tests/sm/patterns/array/stdout.txt new file mode 100644 index 00000000..e69de29b diff --git a/tests/sm/patterns/conditional/input.c b/tests/sm/patterns/conditional/input.c new file mode 100644 index 00000000..a6f25ed5 --- /dev/null +++ b/tests/sm/patterns/conditional/input.c @@ -0,0 +1,30 @@ +/* + Copyright 2012 David Malcolm + Copyright 2012 Red Hat, Inc. + + This is free software: you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + . +*/ + +extern void foo(void *); +extern void bar(void *); + +void test1(void *ptr) +{ + if (ptr) { + foo(ptr); + } else { + bar(ptr); + } +} diff --git a/tests/sm/patterns/conditional/script.py b/tests/sm/patterns/conditional/script.py new file mode 100644 index 00000000..696d51ad --- /dev/null +++ b/tests/sm/patterns/conditional/script.py @@ -0,0 +1,35 @@ +# -*- coding: utf-8 -*- +# Copyright 2012 David Malcolm +# Copyright 2012 Red Hat, Inc. +# +# This is free software: you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see +# . + +from sm import main +from sm.parser import parse_string + +SCRIPT = ''' +sm checked_against_null { + stateful decl any_pointer ptr; + + ptr.*: + { ptr != 0 } => {{ error('%s was checked against NULL' % ptr) }}; +} +''' + +# TODO: this currently emits the warning for both the true and the false path +# from the conditional, rather than just once. + +checker = parse_string(SCRIPT) +main([checker]) diff --git a/tests/sm/patterns/conditional/stderr.txt b/tests/sm/patterns/conditional/stderr.txt new file mode 100644 index 00000000..546bf957 --- /dev/null +++ b/tests/sm/patterns/conditional/stderr.txt @@ -0,0 +1,2 @@ +tests/sm/patterns/conditional/input.c: In function 'test1': +tests/sm/patterns/conditional/input.c:25:nn: error: ptr was checked against NULL diff --git a/tests/sm/patterns/conditional/stdout.txt b/tests/sm/patterns/conditional/stdout.txt new file mode 100644 index 00000000..e69de29b diff --git a/tests/sm/patterns/named-pattern/input.c b/tests/sm/patterns/named-pattern/input.c new file mode 100644 index 00000000..a6f25ed5 --- /dev/null +++ b/tests/sm/patterns/named-pattern/input.c @@ -0,0 +1,30 @@ +/* + Copyright 2012 David Malcolm + Copyright 2012 Red Hat, Inc. + + This is free software: you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + . +*/ + +extern void foo(void *); +extern void bar(void *); + +void test1(void *ptr) +{ + if (ptr) { + foo(ptr); + } else { + bar(ptr); + } +} diff --git a/tests/sm/patterns/named-pattern/script.py b/tests/sm/patterns/named-pattern/script.py new file mode 100644 index 00000000..63b19b76 --- /dev/null +++ b/tests/sm/patterns/named-pattern/script.py @@ -0,0 +1,34 @@ +# -*- coding: utf-8 -*- +# Copyright 2012 David Malcolm +# Copyright 2012 Red Hat, Inc. +# +# This is free software: you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see +# . + +from sm import main +from sm.parser import parse_string + +SCRIPT = ''' +sm checked_against_null { + stateful decl any_pointer ptr; + + pat checked_against_null { ptr != 0 }; + + ptr.*: + checked_against_null => {{ error('%s was checked against NULL' % ptr) }}; +} +''' + +checker = parse_string(SCRIPT) +main([checker]) diff --git a/tests/sm/patterns/named-pattern/stderr.txt b/tests/sm/patterns/named-pattern/stderr.txt new file mode 100644 index 00000000..48021514 --- /dev/null +++ b/tests/sm/patterns/named-pattern/stderr.txt @@ -0,0 +1,2 @@ +tests/sm/patterns/named-pattern/input.c: In function 'test1': +tests/sm/patterns/named-pattern/input.c:25:nn: error: ptr was checked against NULL diff --git a/tests/sm/patterns/named-pattern/stdout.txt b/tests/sm/patterns/named-pattern/stdout.txt new file mode 100644 index 00000000..e69de29b diff --git a/tests/sm/patterns/or/input.c b/tests/sm/patterns/or/input.c new file mode 100644 index 00000000..bb9c38cd --- /dev/null +++ b/tests/sm/patterns/or/input.c @@ -0,0 +1,39 @@ +/* + Copyright 2012 David Malcolm + Copyright 2012 Red Hat, Inc. + + This is free software: you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + . +*/ + +extern void foo(void *); +extern void bar(void *); + +void test1(void *ptr) +{ + if (ptr) { + foo(ptr); + } else { + bar(ptr); + } +} + +void test2(void *ptr) +{ + if (!ptr) { + foo(ptr); + } else { + bar(ptr); + } +} diff --git a/tests/sm/patterns/or/script.py b/tests/sm/patterns/or/script.py new file mode 100644 index 00000000..833ef78d --- /dev/null +++ b/tests/sm/patterns/or/script.py @@ -0,0 +1,32 @@ +# -*- coding: utf-8 -*- +# Copyright 2012 David Malcolm +# Copyright 2012 Red Hat, Inc. +# +# This is free software: you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see +# . + +from sm import main +from sm.parser import parse_string + +SCRIPT = ''' +sm checked_against_null { + stateful decl any_pointer ptr; + + ptr.*: + { ptr != 0 } | {ptr == 0 } => {{ error('%s was checked against NULL' % ptr) }}; +} +''' + +checker = parse_string(SCRIPT) +main([checker]) diff --git a/tests/sm/patterns/or/stderr.txt b/tests/sm/patterns/or/stderr.txt new file mode 100644 index 00000000..2eb5619c --- /dev/null +++ b/tests/sm/patterns/or/stderr.txt @@ -0,0 +1,4 @@ +tests/sm/patterns/or/input.c: In function 'test1': +tests/sm/patterns/or/input.c:25:nn: error: ptr was checked against NULL +tests/sm/patterns/or/input.c: In function 'test2': +tests/sm/patterns/or/input.c:34:nn: error: ptr was checked against NULL diff --git a/tests/sm/patterns/or/stdout.txt b/tests/sm/patterns/or/stdout.txt new file mode 100644 index 00000000..e69de29b diff --git a/tests/sm/patterns/result-of-fn-call/input.c b/tests/sm/patterns/result-of-fn-call/input.c new file mode 100644 index 00000000..b39c8802 --- /dev/null +++ b/tests/sm/patterns/result-of-fn-call/input.c @@ -0,0 +1,30 @@ +/* + Copyright 2012 David Malcolm + Copyright 2012 Red Hat, Inc. + + This is free software: you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + . +*/ + +extern void *never_call_this(void); + +void test1(void) +{ + void *ptr = never_call_this(); +} + +void test2(void) +{ + /* empty */ +} diff --git a/tests/sm/patterns/result-of-fn-call/script.py b/tests/sm/patterns/result-of-fn-call/script.py new file mode 100644 index 00000000..eea7ef86 --- /dev/null +++ b/tests/sm/patterns/result-of-fn-call/script.py @@ -0,0 +1,32 @@ +# -*- coding: utf-8 -*- +# Copyright 2012 David Malcolm +# Copyright 2012 Red Hat, Inc. +# +# This is free software: you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see +# . + +from sm import main +from sm.parser import parse_string + +SCRIPT = ''' +sm result_of_fn_call { + stateful decl any_pointer ptr; + + ptr.*: + { ptr = never_call_this() } => {{ error('never_call_this was called %s' % ptr) }}; +} +''' + +checker = parse_string(SCRIPT) +main([checker]) diff --git a/tests/sm/patterns/result-of-fn-call/stderr.txt b/tests/sm/patterns/result-of-fn-call/stderr.txt new file mode 100644 index 00000000..ff498395 --- /dev/null +++ b/tests/sm/patterns/result-of-fn-call/stderr.txt @@ -0,0 +1,2 @@ +tests/sm/patterns/result-of-fn-call/input.c: In function 'test1': +tests/sm/patterns/result-of-fn-call/input.c:24:nn: error: never_call_this was called ptr diff --git a/tests/sm/patterns/result-of-fn-call/stdout.txt b/tests/sm/patterns/result-of-fn-call/stdout.txt new file mode 100644 index 00000000..e69de29b diff --git a/tests/sm/patterns/var-dereference/input.c b/tests/sm/patterns/var-dereference/input.c new file mode 100644 index 00000000..346f6757 --- /dev/null +++ b/tests/sm/patterns/var-dereference/input.c @@ -0,0 +1,32 @@ +/* + Copyright 2012 David Malcolm + Copyright 2012 Red Hat, Inc. + + This is free software: you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + . +*/ + +int test1(int *p) +{ + return *p; +} + +struct foo { + int i; +}; + +int test2(struct foo *f) +{ + return f->i; +} diff --git a/tests/sm/patterns/var-dereference/script.py b/tests/sm/patterns/var-dereference/script.py new file mode 100644 index 00000000..4e29c276 --- /dev/null +++ b/tests/sm/patterns/var-dereference/script.py @@ -0,0 +1,32 @@ +# -*- coding: utf-8 -*- +# Copyright 2012 David Malcolm +# Copyright 2012 Red Hat, Inc. +# +# This is free software: you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see +# . + +from sm import main +from sm.parser import parse_string + +SCRIPT = ''' +sm var_dereference { + stateful decl any_pointer ptr; + + ptr.*: + { *ptr } => {{ error('%s was dereferenced' % ptr) }}; +} +''' + +checker = parse_string(SCRIPT) +main([checker]) diff --git a/tests/sm/patterns/var-dereference/stderr.txt b/tests/sm/patterns/var-dereference/stderr.txt new file mode 100644 index 00000000..65da25e2 --- /dev/null +++ b/tests/sm/patterns/var-dereference/stderr.txt @@ -0,0 +1,4 @@ +tests/sm/patterns/var-dereference/input.c: In function 'test1': +tests/sm/patterns/var-dereference/input.c:22:nn: error: p was dereferenced +tests/sm/patterns/var-dereference/input.c: In function 'test2': +tests/sm/patterns/var-dereference/input.c:31:nn: error: f was dereferenced diff --git a/tests/sm/patterns/var-dereference/stdout.txt b/tests/sm/patterns/var-dereference/stdout.txt new file mode 100644 index 00000000..e69de29b diff --git a/tests/sm/patterns/var-usage/input.c b/tests/sm/patterns/var-usage/input.c new file mode 100644 index 00000000..35185308 --- /dev/null +++ b/tests/sm/patterns/var-usage/input.c @@ -0,0 +1,23 @@ +/* + Copyright 2012 David Malcolm + Copyright 2012 Red Hat, Inc. + + This is free software: you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see + . +*/ + +void *test1(void *ptr) +{ + return ptr; +} diff --git a/tests/sm/patterns/var-usage/script.py b/tests/sm/patterns/var-usage/script.py new file mode 100644 index 00000000..013cf5da --- /dev/null +++ b/tests/sm/patterns/var-usage/script.py @@ -0,0 +1,34 @@ +# -*- coding: utf-8 -*- +# Copyright 2012 David Malcolm +# Copyright 2012 Red Hat, Inc. +# +# This is free software: you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see +# . + +from sm import main +from sm.parser import parse_string + +SCRIPT = ''' +sm result_of_fn_call { + stateful decl any_pointer ptr; + + ptr.*: + { ptr } => {{ error('%s was used' % ptr) }}; +} +''' + +# TODO: for now this picks up on uses of temporaries + +checker = parse_string(SCRIPT) +main([checker]) diff --git a/tests/sm/patterns/var-usage/stderr.txt b/tests/sm/patterns/var-usage/stderr.txt new file mode 100644 index 00000000..f253b506 --- /dev/null +++ b/tests/sm/patterns/var-usage/stderr.txt @@ -0,0 +1,3 @@ +tests/sm/patterns/var-usage/input.c: In function 'test1': +tests/sm/patterns/var-usage/input.c:22:nn: error: ptr was used +tests/sm/patterns/var-usage/input.c:22:nn: error: return value was used diff --git a/tests/sm/patterns/var-usage/stdout.txt b/tests/sm/patterns/var-usage/stdout.txt new file mode 100644 index 00000000..e69de29b diff --git a/tests/sm/regression/configure-with-exceptions/getopts.py b/tests/sm/regression/configure-with-exceptions/getopts.py new file mode 100644 index 00000000..68031806 --- /dev/null +++ b/tests/sm/regression/configure-with-exceptions/getopts.py @@ -0,0 +1 @@ +print('-fexceptions') diff --git a/tests/sm/regression/configure-with-exceptions/input.c b/tests/sm/regression/configure-with-exceptions/input.c new file mode 100644 index 00000000..f5711bf7 --- /dev/null +++ b/tests/sm/regression/configure-with-exceptions/input.c @@ -0,0 +1,14 @@ +/* + Taken from autoconf test +*/ + +#include +int +main () +{ + FILE *f = fopen ("conftest.out", "w"); + return ferror (f) || fclose (f) != 0; + + ; + return 0; +} diff --git a/tests/sm/regression/configure-with-exceptions/script.py b/tests/sm/regression/configure-with-exceptions/script.py new file mode 100644 index 00000000..f5439a23 --- /dev/null +++ b/tests/sm/regression/configure-with-exceptions/script.py @@ -0,0 +1,23 @@ +# -*- coding: utf-8 -*- +# Copyright 2012, 2013 David Malcolm +# Copyright 2012, 2013 Red Hat, Inc. +# +# This is free software: you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see +# . + +from sm import main +from sm.parser import parse_file + +checker = parse_file('sm/checkers/malloc_checker.sm') +main([checker]) diff --git a/tests/sm/regression/configure-with-exceptions/stdout.txt b/tests/sm/regression/configure-with-exceptions/stdout.txt new file mode 100644 index 00000000..e69de29b diff --git a/tests/sm/regression/configure-without-exceptions/getopts.py b/tests/sm/regression/configure-without-exceptions/getopts.py new file mode 100644 index 00000000..964caddc --- /dev/null +++ b/tests/sm/regression/configure-without-exceptions/getopts.py @@ -0,0 +1 @@ +print('-O2') diff --git a/tests/sm/regression/configure-without-exceptions/input.c b/tests/sm/regression/configure-without-exceptions/input.c new file mode 100644 index 00000000..f5711bf7 --- /dev/null +++ b/tests/sm/regression/configure-without-exceptions/input.c @@ -0,0 +1,14 @@ +/* + Taken from autoconf test +*/ + +#include +int +main () +{ + FILE *f = fopen ("conftest.out", "w"); + return ferror (f) || fclose (f) != 0; + + ; + return 0; +} diff --git a/tests/sm/regression/configure-without-exceptions/script.py b/tests/sm/regression/configure-without-exceptions/script.py new file mode 100644 index 00000000..f5439a23 --- /dev/null +++ b/tests/sm/regression/configure-without-exceptions/script.py @@ -0,0 +1,23 @@ +# -*- coding: utf-8 -*- +# Copyright 2012, 2013 David Malcolm +# Copyright 2012, 2013 Red Hat, Inc. +# +# This is free software: you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see +# . + +from sm import main +from sm.parser import parse_file + +checker = parse_file('sm/checkers/malloc_checker.sm') +main([checker]) diff --git a/tests/sm/regression/configure-without-exceptions/stdout.txt b/tests/sm/regression/configure-without-exceptions/stdout.txt new file mode 100644 index 00000000..e69de29b diff --git a/tests/sm/regression/optimization/dead-code-elimination/getopts.py b/tests/sm/regression/optimization/dead-code-elimination/getopts.py new file mode 100644 index 00000000..1584aa48 --- /dev/null +++ b/tests/sm/regression/optimization/dead-code-elimination/getopts.py @@ -0,0 +1,17 @@ +print('-O2') + +# With optimization, the whole of test() goes away, and hence we get no error +# message +# +# Specifically, what's happening is dead-code elimination (the "cddce" pass +# in tree-ssa-dce.c) +# Invoking gcc with -fdump-tree-all-details shows this within input.c.031t.cddce1: +# Eliminating unnecessary statements: +# Deleting : free (ptr_1); +# +# Deleting : free (ptr_1); +# +# Deleting : ptr_1 = malloc (512); +# +# and the whole of the function has been optimized away before the analysis +# pass sees it diff --git a/tests/sm/regression/optimization/dead-code-elimination/input.c b/tests/sm/regression/optimization/dead-code-elimination/input.c new file mode 100644 index 00000000..384ae247 --- /dev/null +++ b/tests/sm/regression/optimization/dead-code-elimination/input.c @@ -0,0 +1,9 @@ +#include + +void test(void) +{ + void *ptr = malloc(512); + free(ptr); + /* BUG: this is a double-free: */ + free(ptr); +} diff --git a/tests/sm/regression/optimization/dead-code-elimination/script.py b/tests/sm/regression/optimization/dead-code-elimination/script.py new file mode 100644 index 00000000..f5439a23 --- /dev/null +++ b/tests/sm/regression/optimization/dead-code-elimination/script.py @@ -0,0 +1,23 @@ +# -*- coding: utf-8 -*- +# Copyright 2012, 2013 David Malcolm +# Copyright 2012, 2013 Red Hat, Inc. +# +# This is free software: you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see +# . + +from sm import main +from sm.parser import parse_file + +checker = parse_file('sm/checkers/malloc_checker.sm') +main([checker]) diff --git a/tests/sm/regression/optimization/dead-code-elimination/stdout.txt b/tests/sm/regression/optimization/dead-code-elimination/stdout.txt new file mode 100644 index 00000000..e69de29b diff --git a/tests/sm/regression/return-0/getopts.py b/tests/sm/regression/return-0/getopts.py new file mode 100644 index 00000000..8dd3ec32 --- /dev/null +++ b/tests/sm/regression/return-0/getopts.py @@ -0,0 +1,23 @@ +""" +Seeing this error: + + tests/sm/regression/return-0/input.c:4:3: error: Unhandled Python exception raised calling 'execute' method + Traceback (most recent call last): + File "sm/__init__.py", line 53, in execute + solve(ctxt, 'solution') + File "sm/solver.py", line 586, in solve + solution = ctxt.solve(name) + File "sm/solver.py", line 544, in solve + find_leaks(self) + File "sm/leaks.py", line 59, in find_leaks + retval_aliases = get_retval_aliases(ctxt, edge.dstnode) + File "sm/leaks.py", line 33, in get_retval_aliases + retval = retval.var + AttributeError: 'gcc.IntegerCst' object has no attribute 'var' +with: + -O2 + +With optimization, the return statement directly returns a constant, rather +than a temporary. +""" +print('-O2') diff --git a/tests/sm/regression/return-0/input.c b/tests/sm/regression/return-0/input.c new file mode 100644 index 00000000..115ae046 --- /dev/null +++ b/tests/sm/regression/return-0/input.c @@ -0,0 +1,4 @@ +int test(void) +{ + return 0; +} diff --git a/tests/sm/regression/return-0/script.py b/tests/sm/regression/return-0/script.py new file mode 100644 index 00000000..f5439a23 --- /dev/null +++ b/tests/sm/regression/return-0/script.py @@ -0,0 +1,23 @@ +# -*- coding: utf-8 -*- +# Copyright 2012, 2013 David Malcolm +# Copyright 2012, 2013 Red Hat, Inc. +# +# This is free software: you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see +# . + +from sm import main +from sm.parser import parse_file + +checker = parse_file('sm/checkers/malloc_checker.sm') +main([checker]) diff --git a/tests/sm/regression/return-0/stdout.txt b/tests/sm/regression/return-0/stdout.txt new file mode 100644 index 00000000..e69de29b diff --git a/tests/sm/regression/rhs-is-vardecl/input.c b/tests/sm/regression/rhs-is-vardecl/input.c new file mode 100644 index 00000000..37458de6 --- /dev/null +++ b/tests/sm/regression/rhs-is-vardecl/input.c @@ -0,0 +1,8 @@ +extern const char *char_ptr; + +int test(void) +{ + if (strcmp("literal", char_ptr)) + return 1; + return 0; +} diff --git a/tests/sm/regression/rhs-is-vardecl/script.py b/tests/sm/regression/rhs-is-vardecl/script.py new file mode 100644 index 00000000..f5439a23 --- /dev/null +++ b/tests/sm/regression/rhs-is-vardecl/script.py @@ -0,0 +1,23 @@ +# -*- coding: utf-8 -*- +# Copyright 2012, 2013 David Malcolm +# Copyright 2012, 2013 Red Hat, Inc. +# +# This is free software: you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see +# . + +from sm import main +from sm.parser import parse_file + +checker = parse_file('sm/checkers/malloc_checker.sm') +main([checker]) diff --git a/tests/sm/regression/rhs-is-vardecl/stdout.txt b/tests/sm/regression/rhs-is-vardecl/stdout.txt new file mode 100644 index 00000000..e69de29b
Stageblock %i stmt:%i
Stage%r
Stage%s