I apologize. I left a dangling space in the patch. I am guessing
that this is against whitespace policy. I had actually corrected
it but forgot to add the file to the commit. Please see the
attached.
commit a9b60f36b499463b49095f28ce8053f6387c506d
Author: Brian Sobulefsky <brian.sobulef...@protonmail.com>
Date: Tue Feb 9 16:25:43 2021 -0800
Address the bug found in test file gcc/testsuite/gcc.dg/analyzer/casts-1.c, as
recorded by the XFAIL, and some simpler and more complex versions found during
the investigation of it. PR analyzer/98797 was opened for these bugs.
The simplest manifestation of this bug can be replicated with:
char arr[] = {'A', 'B', 'C', 'D'};
char *pt = ar;
__analyzer_eval(*(pt + 0) == 'A');
__analyzer_eval(*(pt + 1) == 'B');
//etc
The result of the eval call is "UNKNOWN" because the analyzer is unable to
determine the value at the pointer. Note that array access (such as arr[0]) is
correctly handled. The code responsible for this is in file
region-model-manager.cc, function region_model_manager::maybe_fold_sub_svalue.
The relevant section is commented /* Handle getting individual chars from
STRING_CST */. This section only had a case for an element_region. A case
needed to be added for an offset_region.
Additionally, when the offset was 0, such as in *pt or *(pt + 0), the function
region_model_manager::get_offset_region was failing to make the needed offset
region at all. This was due to the test for a constant 0 pointer that was then
returning get_cast_region. A special case is needed for when PARENT is of type
array_type whose type matches TYPE. In this case, get_offset_region is allowed
to continue to normal conclusion.
The original bug noted in gcc/testsuite/gcc.dg/analyzer/casts-1.c was for the
case:
struct s1
{
char a;
char b;
char c;
char d;
};
struct s2
{
char arr[4];
};
struct s2 x = {{'A', 'B', 'C', 'D'}};
struct s1 *p = (struct s1 *)&x;
__analyzer_eval (p->a == 'A');
//etc
This requires a case added to region_model_manager::maybe_fold_sub_svalue in
the individual characters from string constant section for a field region.
Finally, the prior only works for the case where struct s2 was a single field
struct. The more general case is:
struct s2
{
char arr1[2];
char arr2[2];
};
struct s2 x = {{'A', 'B'}, {'C', 'D'}};
Here the array will never be found in the get_any_binding routines. A new
routine is added to class binding_cluster that checks if the region being
searched is a field_region of a cast_region, and if so, tries to find a field
of the original region that contains the region under examination. This new
function is named binding_cluster::get_parent_cast_binding. It is called from
get_binding_recursive.
gcc/analyzer/ChangeLog:
PR analyzer/98797
* region-model-manager.cc region_model_manager::get_offset_region: Add
check for a PARENT array whose type matches TYPE, and have this skip
returning get_cast_region and rather conclude the function normally.
* region-model-manager.cc region_model_manager::maybe_fold_sub_svalue
Update the get character from string_cst section to include cases for
an offset_region and a field_region.
* store.cc binding_cluster::get_binding_recursive: Add case for call
to new function get_parent_cast_binding.
* store.cc binding_cluster::get_parent_cast_binding: New function.
* store.h class binding_cluster: Add declaration for new member
function get_parent_class_binding and macros for bit to byte
conversion.
gcc/testsuite/ChangeLog:
PR analyzer/98797
* gcc.dg/analyzer/casts-1.c: Update file to no longer expect failures
and add test cases for additional bugs solved.
diff --git a/gcc/analyzer/region-model-manager.cc b/gcc/analyzer/region-model-manager.cc
index dfd2413e914..1fd6b94f20a 100644
--- a/gcc/analyzer/region-model-manager.cc
+++ b/gcc/analyzer/region-model-manager.cc
@@ -602,16 +602,46 @@ region_model_manager::maybe_fold_sub_svalue (tree type,
/* Handle getting individual chars from a STRING_CST. */
if (tree cst = parent_svalue->maybe_get_constant ())
if (TREE_CODE (cst) == STRING_CST)
- if (const element_region *element_reg
+ {
+ if (const element_region *element_reg
= subregion->dyn_cast_element_region ())
- {
- const svalue *idx_sval = element_reg->get_index ();
- if (tree cst_idx = idx_sval->maybe_get_constant ())
- if (const svalue *char_sval
- = maybe_get_char_from_string_cst (cst, cst_idx))
- return get_or_create_cast (type, char_sval);
- }
-
+ {
+ const svalue *idx_sval = element_reg->get_index ();
+ if (tree cst_idx = idx_sval->maybe_get_constant ())
+ if (const svalue *char_sval
+ = maybe_get_char_from_string_cst (cst, cst_idx))
+ return get_or_create_cast (type, char_sval);
+ }
+ else if (const offset_region *offset_reg
+ = subregion->dyn_cast_offset_region ())
+ {
+ const svalue *offset_sval = offset_reg->get_byte_offset();
+ if (tree cst_offset = offset_sval->maybe_get_constant ())
+ if (const svalue *char_sval
+ = maybe_get_char_from_string_cst (cst, cst_offset))
+ return get_or_create_cast (type, char_sval);
+ }
+ else if (const field_region *field_reg
+ = subregion->dyn_cast_field_region ())
+ {
+ region_offset field_offset = field_reg->get_offset ();
+ if (!field_offset.symbolic_p ())
+ {
+ bit_offset_t field_bit_offset = field_offset.get_bit_offset ();
+ HOST_WIDE_INT field_offset_bits
+ = field_bit_offset.get_val ()[0];
+ if(!(field_offset_bits & BYTE_BOUNDARY_MASK))
+ {
+ tree cst_offset
+ = build_int_cst (integer_type_node,
+ field_offset_bits >> BYTE_BIT_CONVERT);
+ if (const svalue *char_sval
+ = maybe_get_char_from_string_cst (cst, cst_offset))
+ return get_or_create_cast (type, char_sval);
+ }
+ }
+ }
+ }
/* SUB(INIT(r)).FIELD -> INIT(r.FIELD)
i.e.
Subvalue(InitialValue(R1), FieldRegion(R2, F))
@@ -867,8 +897,19 @@ region_model_manager::get_offset_region (const region *parent,
{
/* If BYTE_OFFSET is zero, return PARENT. */
if (tree cst_offset = byte_offset->maybe_get_constant ())
- if (zerop (cst_offset))
- return get_cast_region (parent, type);
+ {
+ /* Special case: PARENT type is array_type whose type matches TYPE */
+ if (parent->get_type ()
+ && TREE_CODE(parent->get_type ()) == ARRAY_TYPE
+ && TREE_TYPE(parent->get_type ()) == type)
+ {
+ tree offset_int
+ = build_int_cst (integer_type_node, cst_offset->int_cst.val[0]);
+ byte_offset = get_or_create_constant_svalue (offset_int);
+ }
+ else if (zerop (cst_offset))
+ return get_cast_region (parent, type);
+ }
/* Fold OFFSET_REGION(OFFSET_REGION(REG, X), Y)
to OFFSET_REGION(REG, (X + Y)). */
diff --git a/gcc/analyzer/store.cc b/gcc/analyzer/store.cc
index abdb336da91..3afba55ce90 100644
--- a/gcc/analyzer/store.cc
+++ b/gcc/analyzer/store.cc
@@ -996,14 +996,129 @@ binding_cluster::get_binding_recursive (store_manager *mgr,
return sval;
if (reg != m_base_region)
if (const region *parent_reg = reg->get_parent_region ())
- if (const svalue *parent_sval
- = get_binding_recursive (mgr, parent_reg, kind))
+ {
+ if (const svalue *parent_sval
+ = get_binding_recursive (mgr, parent_reg, kind))
+ {
+ /* Extract child svalue from parent svalue. */
+ region_model_manager *rmm_mgr = mgr->get_svalue_manager ();
+ return rmm_mgr->get_or_create_sub_svalue (reg->get_type (),
+ parent_sval, reg);
+ }
+ else if (const svalue *parent_cast_field_sval
+ = get_parent_cast_binding (mgr, reg, kind))
+ {
+ /* PARENT_REG is a cast_region and we found a covering binding
+ in the original_region */
+ return parent_cast_field_sval;
+ }
+ }
+ return NULL;
+}
+
+/* Get a binding for access to a field of a cast_region.
+ Note the original motivation for this was access of the form:
+ struct s1 x = {{'A', 'B'}; {'C', 'D'}}; struct s2 p = (struct s2) &x;
+ __analyzer_eval (p->a == 'A');
+ et al. for the other fields. get_binding_recursive fails because it is
+ unidirectional (from the field_region p->a up the chain of parents). The
+ routine can probably be expanded, and even further broken out, as other cases
+ are discovered and understood. */
+const svalue *
+binding_cluster::get_parent_cast_binding (store_manager *mgr,
+ const region *reg,
+ enum binding_kind kind) const
+{
+
+ /* If we are sure this will never be called incorrectly, we can remove */
+ if (!(mgr && reg))
+ return NULL;
+
+ const region *parent_reg = reg->get_parent_region ();
+
+ /* If it is impossible for a region's parent to be NULL, remove guard */
+ if(!parent_reg)
+ return NULL;
+
+ const cast_region *parent_cast = parent_reg->dyn_cast_cast_region ();
+ const field_region *field_reg = reg->dyn_cast_field_region ();
+
+ /* The first two guards are just safety, this is the real condition */
+ if (!(parent_cast && field_reg))
+ return NULL;
+
+ const region *original_reg = parent_cast->get_original_region ();
+ region_offset reg_offset = field_reg->get_offset ();
+ bit_size_t reg_bit_size;
+ tree original_tree = original_reg->get_type ();
+ tree original_field;
+
+ /* Note assignment to ORIGINAL_FIELD in compound test to
+ save some indentation space (80 comes quickly) */
+ if (original_tree && TREE_CODE(original_tree) == RECORD_TYPE
+ && (original_field = TYPE_FIELDS(original_tree))
+ && !reg_offset.symbolic_p() && field_reg->get_bit_size(®_bit_size))
+ {
+
+ bit_offset_t reg_bit_offset = reg_offset.get_bit_offset ();
+ HOST_WIDE_INT start_offset = reg_bit_offset.get_val ()[0];
+ HOST_WIDE_INT end_offset = start_offset + reg_bit_size.get_val ()[0] - 1;
+ tree covering_field = NULL_TREE;
+
+ /* get_field_at_bit_offset in region.cc has a test for RECORD_TYPE, maybe
+ this should be here too? The test is probably moot, since we have to
+ fully cover and survive the get_binding_recursive below. Also
+ consdider making the function in region.cc usable outside the file? */
+ for (covering_field = original_field;
+ (original_field = TREE_CHAIN (original_field));)
+ {
+ if (original_field->field_decl.bit_offset->int_cst.val[0]
+ <= start_offset)
+ covering_field = original_field;
+ else
+ break; /* Is the list guaranteed to be in order? */
+ }
+
+ if (covering_field && end_offset
+ <= covering_field->field_decl.bit_offset->int_cst.val[0]
+ + covering_field->decl_common.size->int_cst.val[0] - 1)
{
- /* Extract child svalue from parent svalue. */
+ /* We found a field that entirely covers REG. The following code
+ should probably be generalized to more cases, as of now it will
+ basically handle the case where a char array has been initialized
+ into the original struct type. Further work might be to handle when
+ a field to a struct is itself a struct, which is likely recursive.*/
+
region_model_manager *rmm_mgr = mgr->get_svalue_manager ();
- return rmm_mgr->get_or_create_sub_svalue (reg->get_type (),
- parent_sval, reg);
+ const region *covering_field_reg = rmm_mgr
+ ->get_field_region (original_reg, covering_field);
+
+ if (const svalue *parent_sval = get_binding_recursive (mgr,
+ covering_field_reg, kind))
+ {
+
+ HOST_WIDE_INT covering_field_offset = covering_field
+ ->field_decl.bit_offset->int_cst.val[0];
+
+ if (TREE_CODE(covering_field_reg->get_type()) == ARRAY_TYPE
+ && !((start_offset - covering_field_offset)
+ & BYTE_BOUNDARY_MASK))
+ {
+ const svalue *arr_index = rmm_mgr
+ ->get_or_create_constant_svalue (
+ build_int_cst (integer_type_node,
+ (start_offset - covering_field_offset)
+ >> BYTE_BIT_CONVERT));
+ const region *elmt_reg = rmm_mgr
+ ->get_element_region (covering_field_reg,
+ TREE_TYPE (covering_field_reg->get_type ()),
+ arr_index);
+ return rmm_mgr->get_or_create_sub_svalue (
+ elmt_reg->get_type (), parent_sval, elmt_reg);
+ }
+ }
}
+ }
return NULL;
}
diff --git a/gcc/analyzer/store.h b/gcc/analyzer/store.h
index 2bcef6c398a..9ddc57fea01 100644
--- a/gcc/analyzer/store.h
+++ b/gcc/analyzer/store.h
@@ -416,6 +416,9 @@ public:
const svalue *get_binding_recursive (store_manager *mgr,
const region *reg,
enum binding_kind kind) const;
+ const svalue *get_parent_cast_binding (store_manager *mgr,
+ const region *reg,
+ enum binding_kind kind) const;
const svalue *get_any_binding (store_manager *mgr,
const region *reg) const;
const svalue *maybe_get_compound_binding (store_manager *mgr,
@@ -640,4 +643,7 @@ private:
} // namespace ana
+#define BYTE_BIT_CONVERT 3
+#define BYTE_BOUNDARY_MASK ((1 << BYTE_BIT_CONVERT) - 1)
+
#endif /* GCC_ANALYZER_STORE_H */
diff --git a/gcc/testsuite/gcc.dg/analyzer/casts-1.c b/gcc/testsuite/gcc.dg/analyzer/casts-1.c
index 15cd85f77cf..9340615b20f 100644
--- a/gcc/testsuite/gcc.dg/analyzer/casts-1.c
+++ b/gcc/testsuite/gcc.dg/analyzer/casts-1.c
@@ -13,6 +13,21 @@ struct s2
char arr[4];
};
+struct s3
+{
+ char arr1[2];
+ char arr2[2];
+};
+
+void test_0 ()
+{
+ char ar[] = {'A', 'B', 'C', 'D'};
+ char *pt = ar;
+ __analyzer_eval (*(pt + 0) == 'A'); /* { dg-warning "TRUE" } */
+ __analyzer_eval (*(pt + 1) == 'B'); /* { dg-warning "TRUE" } */
+ __analyzer_eval (*(pt + 2) == 'C'); /* { dg-warning "TRUE" } */
+ __analyzer_eval (*(pt + 3) == 'D'); /* { dg-warning "TRUE" } */
+}
void test_1 ()
{
struct s1 x = {'A', 'B', 'C', 'D'};
@@ -38,12 +53,22 @@ void test_2 ()
__analyzer_eval (x.arr[2] == 'C'); /* { dg-warning "TRUE" } */
__analyzer_eval (x.arr[3] == 'D'); /* { dg-warning "TRUE" } */
struct s1 *p = (struct s1 *)&x;
- __analyzer_eval (p->a == 'A'); /* { dg-warning "TRUE" "true" { xfail *-*-* } } */
- /* { dg-bogus "UNKNOWN" "unknown" { xfail *-*-* } .-1 } */
- __analyzer_eval (p->b == 'B'); /* { dg-warning "TRUE" "true" { xfail *-*-* } } */
- /* { dg-bogus "UNKNOWN" "unknown" { xfail *-*-* } .-1 } */
- __analyzer_eval (p->c == 'C'); /* { dg-warning "TRUE" "true" { xfail *-*-* } } */
- /* { dg-bogus "UNKNOWN" "unknown" { xfail *-*-* } .-1 } */
- __analyzer_eval (p->d == 'D'); /* { dg-warning "TRUE" "true" { xfail *-*-* } } */
- /* { dg-bogus "UNKNOWN" "unknown" { xfail *-*-* } .-1 } */
+ __analyzer_eval (p->a == 'A'); /* { dg-warning "TRUE" "true" } */
+ __analyzer_eval (p->b == 'B'); /* { dg-warning "TRUE" "true" } */
+ __analyzer_eval (p->c == 'C'); /* { dg-warning "TRUE" "true" } */
+ __analyzer_eval (p->d == 'D'); /* { dg-warning "TRUE" "true" } */
+}
+
+void test_3 ()
+{
+ struct s3 x = {{'A', 'B'}, {'C', 'D'}};
+ __analyzer_eval (x.arr1[0] == 'A'); /* { dg-warning "TRUE" } */
+ __analyzer_eval (x.arr1[1] == 'B'); /* { dg-warning "TRUE" } */
+ __analyzer_eval (x.arr2[0] == 'C'); /* { dg-warning "TRUE" } */
+ __analyzer_eval (x.arr2[1] == 'D'); /* { dg-warning "TRUE" } */
+ struct s1 *p = (struct s1 *)&x;
+ __analyzer_eval (p->a == 'A'); /* { dg-warning "TRUE" "true" } */
+ __analyzer_eval (p->b == 'B'); /* { dg-warning "TRUE" "true" } */
+ __analyzer_eval (p->c == 'C'); /* { dg-warning "TRUE" "true" } */
+ __analyzer_eval (p->d == 'D'); /* { dg-warning "TRUE" "true" } */
}