Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 29 additions & 0 deletions doc/cprover-manual/cbmc-unwinding.md
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,35 @@ As an example, consider a program with two loops in the function main:
This sets a bound of 10 for the first loop, and a bound of 20 for the
second loop.

### Hash-based Loop Identifiers

The ordinal loop IDs (such as `main.0`, `main.1`) depend on the
position of the loop within the function. Adding or removing code before
a loop — even unrelated code such as a `do { ... } while(0)` from a C
macro — can shift the ordinal numbers and break `--unwindset` settings.

To address this, `--show-loops` also displays a hash-based identifier
for each loop:

Loop main.0: (hash main.hash_706061793765627172)
file example.c line 10 function main

The hash is computed from the structural content of the loop body
(instruction types, expression shapes, and types) while ignoring
variable names. This makes it stable across unrelated code changes.

Hash-based identifiers can be used in `--unwindset` in place of ordinal
IDs:

--unwindset main.hash_706061793765627172:10

This is particularly useful in CI scripts where the verification
harness must survive code modifications in other parts of the file.

Note that two loops with identical structure will have the same hash.
In that case, the hash-based identifier refers to the first matching
loop in the function.

What if the number of unwindings specified is too small? In this case,
bugs that require paths that are deeper may be missed. In order to
address this problem, CBMC can optionally insert checks that the given
Expand Down
7 changes: 5 additions & 2 deletions doc/man/cbmc.1
Original file line number Diff line number Diff line change
Expand Up @@ -394,15 +394,18 @@ equivalent to setting the maximum field
sensitivity size for arrays to 0
.TP
\fB\-\-show\-loops\fR
show the loops in the program
show the loops in the program, including hash\-based loop identifiers
that are stable across unrelated code changes
.TP
\fB\-\-unwind\fR \fInr\fR
unwind all loops at most \fInr\fR times
.TP
\fB\-\-unwindset\fR [\fIT\fR:]\fIL\fR:\fIB\fR,...
unwind loop \fIL\fR with a bound of \fIB\fR, optionally restricted to thread
\fIT\fR, and overriding what may be set as default unwinding via
\fB\-\-unwind\fR (use \fB\-\-show\-loops\fR to get the loop IDs)
\fB\-\-unwind\fR (use \fB\-\-show\-loops\fR to get the loop IDs).
\fIL\fR may be an ordinal loop ID (e.g., \fBmain.0\fR) or a hash\-based
identifier (e.g., \fBmain.hash_706061793765627172\fR)
.TP
\fB\-\-incremental\-loop\fR L
check properties after each unwinding
Expand Down
7 changes: 5 additions & 2 deletions doc/man/goto-instrument.1
Original file line number Diff line number Diff line change
Expand Up @@ -856,15 +856,18 @@ empty the function pointer is removed using the standard
.SS "Loop information and transformations:"
.TP
\fB\-\-show\-loops\fR
show the loops in the program
show the loops in the program, including hash\-based loop identifiers
that are stable across unrelated code changes
.TP
\fB\-\-unwind\fR \fInr\fR
unwind all loops \fInr\fR times
.TP
\fB\-\-unwindset\fR [\fIT\fR:]\fIL\fR:\fIB\fR,...
unwind loop \fIL\fR with a bound of \fIB\fR
(optionally restricted to thread \fIT\fR)
(use \fB\-\-show\-loops\fR to get the loop IDs)
(use \fB\-\-show\-loops\fR to get the loop IDs).
\fIL\fR may be an ordinal loop ID (e.g., \fBmain.0\fR) or a hash\-based
identifier (e.g., \fBmain.hash_706061793765627172\fR)
.TP
\fB\-\-unwindset\-file\fR \fIfile\fR
read unwindset from file
Expand Down
1 change: 1 addition & 0 deletions regression/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ endif()
add_subdirectory(goto-cc-cbmc)
add_subdirectory(cbmc-cpp)
add_subdirectory(goto-cc-goto-analyzer)
add_subdirectory(goto-programs)
add_subdirectory(goto-analyzer-simplify)
add_subdirectory(statement-list)
add_subdirectory(systemc)
Expand Down
1 change: 1 addition & 0 deletions regression/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ DIRS = cbmc-shadow-memory \
validate-trace-xml-schema \
cbmc-primitives \
goto-interpreter \
goto-programs \
cbmc-sequentialization \
cpp-linter \
catch-framework \
Expand Down
19 changes: 19 additions & 0 deletions regression/cbmc/loop-hash-nested/test.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
// Test program with nested loops
// Each loop should have a unique hash identifier

int main()
{
int sum = 0;

// Outer loop
for(int i = 0; i < 3; i++)
{
// Inner loop
for(int j = 0; j < 3; j++)
{
sum += i * j;
}
}

return sum;
}
11 changes: 11 additions & 0 deletions regression/cbmc/loop-hash-nested/test.desc
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
CORE
test.c
--show-loops
^EXIT=0$
^SIGNAL=0$
^Loop main\.[0-9]+: \(hash main\.hash_[0-9]+\)$
^Loop main\.[0-9]+: \(hash main\.hash_[0-9]+\)$
--
^warning: ignoring
--
Nested loops should each have a unique hash identifier.
24 changes: 24 additions & 0 deletions regression/cbmc/loop-hash-stability/test.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
// Test program to demonstrate hash-based loop identification
// The loop identifiers should remain stable even when we add
// unrelated code before or after the loops.

int main()
{
int sum = 0;

// Loop 1: Simple for loop
for(int i = 0; i < 10; i++)
{
sum += i;
}

// Loop 2: While loop
int j = 0;
while(j < 5)
{
sum += j * 2;
j++;
}

return sum;
}
10 changes: 10 additions & 0 deletions regression/cbmc/loop-hash-stability/test.desc
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
CORE
test.c
--show-loops
^EXIT=0$
^SIGNAL=0$
^Loop main\.[0-9]+: \(hash main\.hash_[0-9]+\)$
--
^warning: ignoring
--
Loops should have hash-based identifiers in --show-loops output.
38 changes: 38 additions & 0 deletions regression/cbmc/loop-hash-stability/test_modified.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
// Modified test program with pseudo-loops added before the real loops
// The real loops should maintain their hash identifiers

int main()
{
int sum = 0;

// Add some pseudo-loops (do-while-0 pattern) that should not affect
// the hash values of the real loops below
do
{
sum += 1;
} while(0);
do
{
sum += 2;
} while(0);
do
{
sum += 3;
} while(0);

// Loop 1: Simple for loop (same as before)
for(int i = 0; i < 10; i++)
{
sum += i;
}

// Loop 2: While loop (same as before)
int j = 0;
while(j < 5)
{
sum += j * 2;
j++;
}

return sum;
}
10 changes: 10 additions & 0 deletions regression/cbmc/loop-hash-stability/test_modified.desc
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
CORE
test_modified.c
--show-loops
^EXIT=0$
^SIGNAL=0$
^Loop main\.[0-9]+: \(hash main\.hash_[0-9]+\)$
--
^warning: ignoring
--
Loops should have hash-based identifiers even with pseudo-loops present.
31 changes: 31 additions & 0 deletions regression/cbmc/loop-hash-types/test.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
// Test program with different loop types
// Each loop should have a unique hash identifier

int main()
{
int sum = 0;

// For loop
for(int i = 0; i < 5; i++)
{
sum += i;
}

// While loop
int j = 0;
while(j < 5)
{
sum += j;
j++;
}

// Do-while loop
int k = 0;
do
{
sum += k;
k++;
} while(k < 5);

return sum;
}
10 changes: 10 additions & 0 deletions regression/cbmc/loop-hash-types/test.desc
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
CORE
test.c
--show-loops
^EXIT=0$
^SIGNAL=0$
^Loop main\.[0-9]+: \(hash main\.hash_[0-9]+\)$
--
^warning: ignoring
--
Different loop types (for, while, do-while) should each have hash identifiers.
9 changes: 9 additions & 0 deletions regression/goto-programs/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
if(WIN32)
set(is_windows true)
else()
set(is_windows false)
endif()

add_test_pl_tests(
"${CMAKE_CURRENT_SOURCE_DIR}/chain.sh $<TARGET_FILE:goto-cc> $<TARGET_FILE:cbmc> ${is_windows}"
)
22 changes: 22 additions & 0 deletions regression/goto-programs/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
# For goto-programs regression tests, see ../Makefile

default: test

include ../../src/config.inc
include ../../src/common

ifeq ($(BUILD_ENV_),MSVC)
exe=../../../src/goto-cc/goto-cl
is_windows=true
else
exe=../../../src/goto-cc/goto-cc
is_windows=false
endif

test:
@../test.pl -p -c "bash ./chain.sh $(exe) ../../../src/cbmc/cbmc $(is_windows)"

clean:
find . -name '*.out' -execdir $(RM) '{}' \;
find . -name '*.gb' -execdir $(RM) '{}' \;
$(RM) tests.log
17 changes: 17 additions & 0 deletions regression/goto-programs/chain.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
#!/usr/bin/env bash
set -e
goto_cc=$1
cbmc=$2
is_windows=$3
shift 3

# Handle --chain option: run the specified chain script
if [[ "$1" == "--chain" ]]; then
shift
chain_script=$1
shift
exec bash "${chain_script}" "${goto_cc}" "${cbmc}" "${is_windows}" "$@"
fi

# Default: just run cbmc with the given options
exec "${cbmc}" "$@"
18 changes: 18 additions & 0 deletions regression/goto-programs/loop-hash-nested-01/chain.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
#!/usr/bin/env bash

set -e

goto_cc=$1
cbmc=$2
is_windows=$3

name="main"

if [[ "${is_windows}" == "true" ]]; then
${goto_cc} ${name}.c "/Fe${name}.gb"
else
${goto_cc} ${name}.c -o ${name}.gb
fi

# Extract hash values from --show-loops output
${cbmc} --show-loops ${name}.gb | grep -o 'hash main\.hash_[0-9]*' | sed 's/hash main\.hash_//' | sort -n
25 changes: 25 additions & 0 deletions regression/goto-programs/loop-hash-nested-01/main.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
// Test program with nested loops for hash uniqueness verification
// Each loop should have a unique hash identifier

int main()
{
int sum = 0;

// Outer loop
for(int i = 0; i < 3; i++)
{
// Inner loop 1
for(int j = 0; j < 3; j++)
{
sum += i * j;
}

// Inner loop 2 (sibling to inner loop 1)
for(int k = 0; k < 2; k++)
{
sum += i + k;
}
}

return sum;
}
7 changes: 7 additions & 0 deletions regression/goto-programs/loop-hash-nested-01/test.desc
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
CORE
main.c
--chain chain.sh
^EXIT=0$
^SIGNAL=0$
^[0-9]+$
^[0-9]+$
18 changes: 18 additions & 0 deletions regression/goto-programs/loop-hash-sensitivity-01/chain.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
#!/usr/bin/env bash

set -e

goto_cc=$1
cbmc=$2
is_windows=$3

name="main"

if [[ "${is_windows}" == "true" ]]; then
${goto_cc} ${name}.c "/Fe${name}.gb"
else
${goto_cc} ${name}.c -o ${name}.gb
fi

# Extract hash values from --show-loops output
${cbmc} --show-loops ${name}.gb | grep -o 'hash main\.hash_[0-9]*' | sed 's/hash main\.hash_//' | sort -n
Loading
Loading