forked from danmar/cppcheck
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdataflow.cpp
More file actions
4152 lines (3861 loc) · 198 KB
/
dataflow.cpp
File metadata and controls
4152 lines (3861 loc) · 198 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
/*
* Cppcheck - A tool for static C/C++ code analysis
* Copyright (C) 2007-2026 Cppcheck team.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
/*
* DataFlow analysis implementation.
*
* See dataflow.h for the public API.
*
* =========================================================================
* REQUIREMENTS (user perspective)
* =========================================================================
*
* Requirement 1 — Null-pointer dereference detection:
* Detect when a local pointer variable is assigned null (nullptr/NULL/0)
* and then dereferenced without a null check in between. Warn at the
* dereference site so that CheckNullPointer can report the bug to the user.
*
* Requirement 2 — Uninitialized variable detection:
* Detect when a local integer, pointer, or float variable is read before
* it has been assigned any value. Warn at the read site so that
* CheckUninitVar can report the bug to the user.
*
* Requirement 3 — Flow-sensitivity:
* Values must follow program execution order. An assignment changes the
* known value from that point forward. An if/else branch must be analyzed
* on each path independently; after the branch, only values present on all
* surviving paths are carried forward.
*
* Requirement 4 — No false positives:
* When the analysis encounters code that is too complex to reason about
* precisely (deeply nested branches, too many tracked variables,
* unrecognised patterns), it must stop and produce no output rather than
* risk an incorrect warning. False negatives (missed bugs) are acceptable;
* false positives are not.
*
* Requirement 5 — Scalar types only:
* Only integer, pointer, and floating-point scalar local variables are
* tracked. User-defined types, arrays, reference parameters, and global
* variables are not tracked.
*
* Requirement 6 — Linear performance:
* The analysis must complete in O(n) time per function, where n is the
* number of tokens in the function body. No iterative fixed-point
* computation is used.
*
* Requirement 7 — No non-const pointer alias analysis:
* When the address of a tracked variable is stored in a non-const pointer
* (e.g. "T* p = &var" or "(T*)(&var)"), the analysis cannot determine
* whether the variable is subsequently written through the alias. The
* variable is removed from the tracked state so that a stale UNINIT value
* is not propagated to later reads. If the address is stored only in a
* const pointer ("const T* p = &var"), the pointed-to value cannot be
* modified through the alias, so tracking continues normally.
*
* Requirement 8 — Partial initialization via non-const alias:
* If a variable is initialized through a non-const pointer alias (even
* partially, such as writing individual bytes of a scalar), the analysis
* treats the variable as fully initialized. This is implemented by
* Requirement 7: aborting tracking when a non-const alias is formed
* means the UNINIT value is cleared before any writes occur through
* the alias.
*
* Requirement 9 — Inline assembler aborts analysis:
* When an asm() statement is encountered, the forward analysis is aborted
* immediately for the remainder of the function body. Inline assembler can
* read and write any variable through output constraints, memory clobbers,
* or indirect effects that the analysis cannot model. Aborting prevents
* false positives (Requirement 4) — the cost is false negatives after the
* asm statement, which is acceptable per project policy.
*
* =========================================================================
* ALGORITHM
* =========================================================================
*
* The analysis runs two passes over each function body.
*
* PASS 1 — Forward analysis (implements Requirements 1, 2, 3, 5, 6):
* Walk tokens in program order, maintaining a state map (DFContext) from
* variable IDs to their current known/possible values. When an assignment
* is seen the state is updated; when a variable is read the current state
* values are written onto the token so that downstream checkers can inspect
* them (Requirement 2 — values visible to checkers without modification).
*
* Branches (Requirement 3): at every if/else the context is forked into two
* independent copies, each analyzed separately. The copies are merged at
* the join point: a Known value that is identical on both paths stays Known;
* differing values become Possible; a value present on only one path is
* dropped (Requirement 4 — conservative merge).
*
* Loops (Requirement 4): the loop body is scanned for assignments and any
* variable modified inside is removed from the state before continuing after
* the loop. Exception: if a variable has an unconditional top-level
* assignment in the loop body, it is treated as initialized after the loop
* because loops are assumed to execute at least once (Requirement 4
* trade-off: false negatives preferred over false positives).
* Constant-true loops (while(1)): Phase NW/NW3 null-exit constraints and
* UNINIT re-injection are skipped because the condition never becomes false
* — the loop only exits via break/return/throw, so "condition was false"
* constraints would be spurious (Requirement 4 — no false positives).
*
* Complexity limits (Requirement 4): the analysis aborts when branch nesting
* exceeds MAX_BRANCH_DEPTH, loop nesting exceeds MAX_LOOP_DEPTH, or the
* number of simultaneously tracked variables exceeds MAX_VARS.
*
* PASS 2 — Backward analysis (implements Requirements 1, 3):
* Walk tokens in reverse program order, collecting value constraints from
* condition checks (e.g. "if (x == 5)"). Those constraints are propagated
* backward and annotated onto earlier reads of the same variable. An
* assignment to a variable kills the backward constraint for it. Block
* boundaries are treated conservatively (Requirement 4): any variable
* assigned inside a block is dropped from the backward state when the block
* boundary is crossed.
*
* PHASES (implemented inside the two passes):
*
* Phase N — null pointer tracking (Requirement 1):
* Pointer variables assigned nullptr/NULL/0 receive a Known(0) value.
* Conditions such as "p==nullptr", "!p", "if(p)" update the state on each
* branch so that reads on the null path are annotated. CheckNullPointer
* finds null values via Token::getValue(0) without any change.
*
* Phase U — uninitialized variable tracking (Requirement 2):
* Local variables declared without an initializer receive a UNINIT value.
* Assignments clear it. Reads while UNINIT is in state are annotated so
* CheckUninitVar can detect the uninitialized use.
*
* Phase U2 — enhanced uninit tracking, survives function calls (Requirement 2):
* A separate set (DFUninitSet) remembers which variables were declared
* without an initializer. After a function call clears the main state,
* UNINIT(Possible) is re-injected for every variable still in that set.
* This prevents calls from silently hiding uninitialized variables. The
* set entry is removed when the variable is first definitely assigned. At
* branch merges the sets are unioned (a variable stays if it might be
* uninitialized on any surviving path).
*
* Phase U-CI — conditional initialization resolution (Requirement 2, 4):
* When a variable is initialized inside the then-block of an if-statement
* with a simple "condVar == const" condition (no else branch), but remains
* possibly-uninit on the else-path, a conditional initialization fact is
* recorded: condInits[varId] = {condVarId, condValue}. When a subsequent
* guard (e.g. "if (condVar != const) { return; }") is processed and the
* surviving path has condVar == Known(const), the fact is resolved and
* varId is removed from uninits. This prevents Phase U2 from falsely
* re-injecting UNINIT after function calls when the guard guarantees the
* initializing block was taken (Requirement 4 — no false positives).
* The fact is invalidated if condVar is reassigned between the two ifs.
*
* Phase F — float/double tracking (Requirement 2, 5):
* Float, double, and long double scalar variables are tracked in the same
* DFState as integers, using FLOAT-typed ValueFlow::Value entries.
* evalConstFloat() constant-folds float expressions.
*
* Phase S — string literal non-null (Requirement 1):
* Assigning a string literal to a pointer stores an Impossible(0) value,
* meaning the pointer is definitely not null. This suppresses spurious
* null-pointer warnings from CheckNullPointer.
*
* Phase M — struct/class member field tracking (Requirements 1, 2):
* "obj.field = value" and "obj.field" reads are tracked in a separate
* DFMemberState keyed by (objectVarId, fieldVarId). Forked at branches
* and merged at join points using the same rules as the main DFState.
* Function calls clear the member state conservatively.
*
* Phase C — container size tracking:
* Container variables (std::vector, std::string, …) are tracked in a
* separate DFContState using CONTAINER_SIZE values. push_back /
* emplace_back increment the known size; pop_back decrements; clear
* resets to 0.
*
* Phase UA-FC — for-loop condition annotation (Requirement 2, ticket #7614):
* Variable reads in the for-loop condition "for (init; cond; incr)" are
* annotated with the current state after the init clause, before the
* increment clause is processed. Without this, the increment-clause
* handler (dropWrittenVars / hasTopLevelAssignment) would erase the
* variable from ctx.state and ctx.uninits first, hiding the UNINIT value
* from the condition read and producing a false negative.
*
* Phase Cast — cast expression value propagation:
* evalConstInt and evalConstFloat look through C-style and C++ cast
* expressions so that assignments such as "int x = (int)5;" propagate
* the constant value correctly.
*
* Phase TC — try/catch block handling (Requirement 4):
* catch blocks are treated as alternative execution paths. When a catch
* block terminates unconditionally (return/throw), the surviving path is
* the post-try path and the catch-block state is discarded. When the
* catch block falls through, the post-try and post-catch states are merged
* at the join point. This prevents stale null assignments inside
* terminating catch blocks from producing false null-pointer positives on
* the surviving (non-exceptional) path.
*
* Phase U-ArgAssign — assignment inside function-call argument list
* (Requirement 4):
* When a function argument list contains an assignment inside a
* parenthesised group (e.g. "(x = get()) == NULL ? \":\" : x"), the
* argument annotation loop skips '(...)' groups to avoid double-processing.
* Without special handling, the assignment is missed and the subsequent
* read of x in the ternary false-branch is falsely annotated as UNINIT.
* Phase U-ArgAssign fixes this: before each '(...)' group is skipped, the
* tokens inside are scanned for plain '=' assignments. Any assigned
* variable is erased from ctx.uninits and ctx.state so that subsequent
* reads of that variable within the same argument list are not falsely
* flagged. This handles all value types (UNINIT, integer, float, pointer).
*
* Phase MC — macro-expanded if-condition suppression (Requirement 4):
* When the "if" keyword is expanded from a macro (tok->isExpandedMacro()),
* applyConditionConstraint is skipped entirely for that if-statement.
* A null-check inside a macro (e.g. "if (p != NULL) p->x = v;") is an
* internal safety guard, not a program-level assertion about the caller's
* variable. Applying the condition would merge a Possible(null) onto the
* post-macro path and trigger a false nullPointerRedundantCheck warning at
* the next dereference (Requirement 4 — no false positives).
*
* =========================================================================
* FILE LAYOUT
* =========================================================================
*
* 1. Core types
* DFValues — vector of values for one variable
* DFState — varId → DFValues (int, ptr, float, UNINIT)
* DFMemberKey — (objectVarId, fieldVarId) composite key (Phase M)
* DFMemberKeyHash — hash for DFMemberKey
* DFMemberState — DFMemberKey → DFValues (Phase M)
* DFContState — varId → CONTAINER_SIZE DFValues (Phase C)
* DFContext — bundles DFState + DFMemberState + DFContState +
* DFUninitSet; forked at branches and merged at joins
* 2. Complexity limits (MAX_BRANCH_DEPTH, MAX_LOOP_DEPTH, MAX_VARS)
* 3. Helper predicates
* isTrackedVar — integral non-pointer scalar (Requirement 5)
* isTrackedPtrVar — raw pointer (Phase N)
* isTrackedFloatVar — float/double/long double scalar (Phase F)
* isTrackedContainerVar — std::vector / std::string / … (Phase C)
* isLhsOfAssignment — token is written, not read
* isFunctionCallOpen — '(' opening a function call
* 4. evalConstInt / evalConstFloat — constant-fold expressions
* 5. annotateTok / annotateMemberTok / annotateContainerTok
* Write state values onto tokens (implements Requirement 2 output)
* Helpers: andLhsContainsVarGuard, orLhsContainsNullTest,
* isRhsOfGuardedAndBySameVar, isRhsOfGuardedOrByNullTest,
* isZeroValue, isNullTestCondition, isNonNullTestCondition,
* isInTrueBranchOfTernaryBySameVar,
* isInFalseBranchOfNegatedTernaryBySameVar
* 5b. Partial null-guard detection (Phase UA-PG, ticket #3054)
* subtreeContainsVar, isDirectOrOperandForVar, lhsContainsVarInOr,
* isInPartiallyGuardedContext:
* Detect "(p || q) && *p" where the OR guard does not exclusively
* protect p. Inject Possible(0) so CheckNullPointer can warn.
* 6. mergeStates / mergeMemberStates / mergeContexts
* Combine two branch states at join points (Requirement 3)
* 7. blockTerminates — detect unconditional exit from a block
* 8. applyConditionConstraint — derive state update from a condition
* 9. dropWrittenVars / hasTopLevelAssignment / collectWhileExitConstraints
* Loop body helpers (Requirement 4)
* 9d. suppressUninitForSentinelLoop — Phase NW3b helper (all loop types)
* 9e. resolveCondInits / recordConditionalInits — Phase U-CI helpers
* 10. forwardAnalyzeBlock — Pass 1 (forward walk, all phases)
* 11. backwardAnalyzeBlock — Pass 2 (backward walk, int/ptr constraints)
* 12. DataFlow::setValues — public entry point
*/
#include "dataflow.h"
#include "astutils.h"
#include "mathlib.h"
#include "settings.h"
#include "symboldatabase.h"
#include "token.h"
#include "tokenlist.h"
#include "vfvalue.h"
#include <algorithm>
#include <cmath>
#include <unordered_map>
#include <unordered_set>
#include <vector>
// Suppress unused-parameter warnings for parameters kept for API symmetry.
// NOLINTNEXTLINE(cppcoreguidelines-macro-usage)
#define DF_UNUSED(x) (void)(x)
namespace {
// Returns true if the floating-point value is exactly zero (positive or negative).
// Used instead of direct == 0.0 comparison to avoid -Wfloat-equal warnings.
static bool isFloatZero(double v) {
return std::fpclassify(v) == FP_ZERO;
}
// ===========================================================================
// 1. Core types
// ===========================================================================
/// The list of known/possible values associated with one variable.
/// Multiple entries represent multiple possible values on different paths
/// (e.g. after merging an if/else where each branch assigns a different value).
using DFValues = std::vector<ValueFlow::Value>;
/// The analysis state for one point in the program.
/// Maps each tracked variable ID to its current set of values.
///
/// Used for integer, pointer, float, and UNINIT values — all in one map
/// distinguished by ValueFlow::Value::valueType.
using DFState = std::unordered_map<nonneg int, DFValues>;
/// Phase M: composite key identifying one struct/class member field.
/// First element is the variable ID of the object; second is the variable ID
/// of the field within that object's type.
using DFMemberKey = std::pair<nonneg int, nonneg int>;
/// Hash for DFMemberKey. Uses a multiplicative combine to reduce collisions
/// from closely-spaced integer pairs.
struct DFMemberKeyHash {
std::size_t operator()(const DFMemberKey& k) const noexcept {
// Knuth multiplicative hash combine
return std::hash<nonneg int>()(k.first) * 2654435761UL ^
std::hash<nonneg int>()(k.second);
}
};
/// Phase M: per-member-field state. Maps (objectVarId, fieldVarId) to the
/// current set of values that field is known/possibly equal to.
using DFMemberState = std::unordered_map<DFMemberKey, DFValues, DFMemberKeyHash>;
/// Phase C: per-container state. Maps container varId to CONTAINER_SIZE
/// values representing the current known/possible size.
/// Re-uses the same DFValues type; all stored values have
/// valueType == ValueFlow::Value::ValueType::CONTAINER_SIZE.
using DFContState = std::unordered_map<nonneg int, DFValues>;
/// Phase U2: set of variable IDs that were declared without an initializer
/// in the current function body. Survives function calls (unlike DFState)
/// so UNINIT can be re-injected after calls. Removed when the variable
/// receives its first definite assignment.
using DFUninitSet = std::unordered_set<nonneg int>;
/// Bundle of per-branch analysis state for the forward pass.
///
/// All four fields are copied when the analysis forks at an if/else branch,
/// and merged back at the join point. For DFUninitSet, the merge takes the
/// union: a variable stays in the set if it might be uninitialized on any
/// surviving path.
struct DFContext {
DFState state; ///< varId → values (int, ptr, float, UNINIT)
DFMemberState members; ///< (objVarId, fieldVarId) → values (Phase M)
DFContState containers; ///< container varId → CONTAINER_SIZE (Phase C)
DFUninitSet uninits; ///< declared-without-init varIds (Phase U2)
/// Requirement (Phase L): varIds of local, non-pointer, non-reference,
/// non-static scalar variables. These cannot be modified by a called
/// function unless their address has been taken. Populated when a
/// variable declaration is first encountered; never cleared.
std::unordered_set<nonneg int> localScalars;
/// Requirement (Phase L): varIds whose address has been observed taken
/// via the unary '&' operator. A local scalar in this set is excluded
/// from the "safe to preserve" set at call sites, because a called
/// function could reach it through a pointer that holds &var.
std::unordered_set<nonneg int> addressTaken;
/// Phase U-CI: conditional initialization facts.
/// condInits[varId] = {condVarId, condValue}: varId was initialized
/// in the then-block of an if-statement whose condition was
/// "condVarId == condValue". When condVarId later becomes Known
/// (condValue) in state (e.g. after a guard "if (condVarId != condValue)
/// { return; }"), the fact is resolved and varId is removed from uninits.
/// Invalidated when condVarId is assigned in the outer scope.
std::unordered_map<nonneg int, std::pair<nonneg int, MathLib::bigint>> condInits;
};
// ===========================================================================
// 2. Complexity limits
// ===========================================================================
//
// Requirement 4: the analysis must abort on complex paths rather than risk
// producing a false positive. These constants define the abort thresholds.
/// Maximum nesting depth of if/else branches before aborting.
/// Deeper nesting makes it hard to merge states correctly without FP risk.
static constexpr int MAX_BRANCH_DEPTH = 4;
/// Maximum nesting depth of loops before aborting.
static constexpr int MAX_LOOP_DEPTH = 2;
/// Maximum number of simultaneously tracked variables before aborting.
/// Keeps memory usage and merge cost bounded.
static constexpr std::size_t MAX_VARS = 64;
// ===========================================================================
// 3. Helper predicates
// ===========================================================================
/// Returns true when tok refers to an integer (non-pointer) variable that the
/// analysis should track.
///
/// Requirement 5: only track integral, non-pointer types initially.
/// Pointers, floats, and user-defined types are excluded.
static bool isTrackedVar(const Token* tok) {
if (!tok || tok->varId() == 0)
return false;
const Variable* var = tok->variable();
if (!var)
return false;
const ValueType* vt = var->valueType();
return vt && vt->isIntegral() && vt->pointer == 0;
}
/// Returns true when tok refers to a pointer variable that the analysis
/// should track for null-pointer and uninitialized-pointer bugs.
///
/// Phase N: track pointers assigned nullptr so that CheckNullPointer can
/// detect null dereferences. Only raw (non-function, non-array) pointers
/// with exactly one level of indirection are tracked to keep the
/// implementation simple and conservative.
static bool isTrackedPtrVar(const Token* tok) {
if (!tok || tok->varId() == 0)
return false;
const Variable* var = tok->variable();
if (!var || var->isArray())
return false;
const ValueType* vt = var->valueType();
// pointer > 0: one or more levels of pointer indirection.
// Exclude function pointers (they have a function type base).
return vt && vt->pointer > 0;
}
/// Returns true when tok is a '.' AST node representing a member pointer
/// field access that the analysis should track for null conditions.
///
/// Requirements:
/// - tok is the '.' AST operator node (member-access expression)
/// - astOperand1() (the object) has a non-zero varId
/// - astOperand2() (the field) has a non-zero varId
/// - the field's variable type is a raw pointer (pointer > 0)
///
/// Phase MN: used by applyConditionConstraint to recognise conditions of the
/// form "!s.x", "if(s.x)", "s.x == nullptr", "s.x != nullptr" where s.x is
/// a pointer-typed struct/class member field.
static bool isMemberPtrAccess(const Token* tok) {
if (!tok || tok->str() != ".")
return false;
const Token* objTok = tok->astOperand1();
const Token* fieldTok = tok->astOperand2();
if (!objTok || objTok->varId() == 0)
return false;
if (!fieldTok || fieldTok->varId() == 0)
return false;
const Variable* fieldVar = fieldTok->variable();
if (!fieldVar)
return false;
const ValueType* vt = fieldVar->valueType();
// Only track simple pointer fields (pointer > 0), not arrays or function pointers.
return vt && vt->pointer > 0;
}
/// Returns true when tok refers to a floating-point scalar variable.
///
/// Phase F: track float/double/long double non-pointer variables so that
/// the analysis can propagate constant float values through assignments and
/// arithmetic, enabling checkers to detect float-related bugs.
static bool isTrackedFloatVar(const Token* tok) {
if (!tok || tok->varId() == 0)
return false;
const Variable* var = tok->variable();
if (!var)
return false;
const ValueType* vt = var->valueType();
// isFloat() returns true for FLOAT, DOUBLE, and LONGDOUBLE.
return vt && vt->isFloat() && vt->pointer == 0;
}
/// Returns true when tok refers to a container variable (std::vector,
/// std::string, etc.) that the analysis should track for size information.
///
/// Phase C: requires library configuration to be loaded so that ValueType
/// is populated with CONTAINER type information.
static bool isTrackedContainerVar(const Token* tok) {
if (!tok || tok->varId() == 0)
return false;
const Variable* var = tok->variable();
if (!var)
return false;
const ValueType* vt = var->valueType();
return vt &&
vt->type == ValueType::Type::CONTAINER &&
vt->container != nullptr;
}
/// Returns true when tok is the direct left-hand-side operand of an
/// assignment operator (=, +=, -=, …).
/// Such tokens are being written to, not read, so they must not be annotated
/// with the pre-assignment state value.
static bool isLhsOfAssignment(const Token* tok) {
return tok->astParent() &&
tok->astParent()->isAssignmentOp() &&
tok->astParent()->astOperand1() == tok;
}
/// Returns true when tok is the opening '(' of a function call.
/// Keywords that use '(' (if, for, while, …) are excluded.
/// This is used to conservatively clear state on any call site because a
/// called function might modify variables through pointers or global state.
///
/// Two call forms are recognised:
/// name(args) — ordinary call: prev is a name token
/// (*expr)(args) — function-pointer call: prev is ')' closing the
/// dereferenced function-pointer expression
/// For the second form only: the opening '(' of the preceding expression
/// must immediately be followed by '*', so that "(type)(expr)" cast
/// expressions (where prev->link()->next() is a type keyword, not '*')
/// are not mistaken for function calls.
static bool isFunctionCallOpen(const Token* tok) {
if (!tok || tok->str() != "(")
return false;
const Token* prev = tok->previous();
if (!prev)
return false;
if (prev->isName()) {
// Exclude control-flow keywords
if (Token::Match(prev, "if|for|while|do|switch|return|catch|throw"))
return false;
// Exclude type-cast expressions: "(int)x" — prev is a type keyword
if (prev->isStandardType())
return false;
return true;
}
// Function-pointer call: (*f)(args), (*obj->fn)(args), etc.
// The preceding ')' closes the parenthesised function-pointer expression,
// whose first token after '(' is '*' (the dereference operator).
if (prev->str() == ")" && prev->link()) {
const Token* afterOpen = prev->link()->next();
if (afterOpen && afterOpen->str() == "*")
return true;
}
// Template function call: name<args>(args), e.g. "Calc<true>(x, y)".
// The tokenizer sets Token::link() on the closing '>' of a template
// argument list so that '>' with a link means a template close, not a
// comparison operator. This handles calls of the form:
// func<T>(args)
// Scope::func<T>(args)
// obj.func<T>(args)
if (prev->str() == ">" && prev->link())
return true;
return false;
}
// ===========================================================================
// 3b. isZeroValue — moved here so evalConstInt can use it
// ===========================================================================
/// Returns true when tok carries a Known integer value of zero.
/// Used by evalConstInt (to recognise null pointer constants that have been
/// annotated with Known(0) in the literal pass) and by isNullTestCondition /
/// isNonNullTestCondition to recognise null literal operands in comparisons
/// such as "p == 0" or "p != 0".
static bool isZeroValue(const Token* t) {
return t && t->hasKnownIntValue() && t->getKnownIntValue() == 0;
}
// ===========================================================================
// 4. evalConstInt
// ===========================================================================
/// Try to evaluate the expression rooted at `expr` as a compile-time integer
/// constant. `state` is consulted for variables that already have a single
/// known value. Returns true and sets `result` on success, false otherwise.
///
/// Handles:
/// - Integer literals (42, 0xFF, …)
/// - Character literals ('a', …)
/// - Null pointer constants → 0 (via isZeroValue; Phase N)
/// - Variables with a Known value in state (copy propagation)
/// - Unary minus (-x)
/// - Binary arithmetic (+, -, *, /, %)
///
/// Does NOT handle:
/// - Variables with multiple Possible values (ambiguous → return false)
/// - Pointer arithmetic, floating-point, bitwise ops (not needed for MVP)
/// - Division / modulo by zero (returns false — no value, no FP)
static bool evalConstInt(const Token* expr, const DFState& state, ValueFlow::Value& result) {
if (!expr)
return false;
// --- Cast expression: look through to the operand ---
// Phase Cast: "(T)expr" has the same integer value as expr.
// For a C-style cast the token structure is:
// '(' (isCast==true)
// astOperand1() → the expression being cast (no astOperand2)
// For a C++ functional cast, e.g. int(expr):
// '(' (isCast==true)
// astOperand1() → type keyword
// astOperand2() → the expression being cast
// We follow the same pattern used in programmemory.cpp (execute, cast branch).
if (expr->str() == "(" && expr->isCast()) {
if (expr->astOperand2())
return evalConstInt(expr->astOperand2(), state, result);
if (expr->astOperand1())
return evalConstInt(expr->astOperand1(), state, result);
return false;
}
// --- null pointer constant ---
// Phase N: any non-variable token that already carries a Known(0) integer
// value is treated as a null pointer constant. This covers:
// - C++11 "nullptr" (annotated in the setValues literal pass)
// - C macro "NULL" after preprocessing (expanded to the literal "0")
// - Any other zero-valued constant resolved by an earlier pass
// Using isZeroValue() avoids hardcoding specific token names here.
if (expr->varId() == 0 && isZeroValue(expr)) {
result = ValueFlow::Value(0LL);
result.setKnown();
return true;
}
// --- Integer literal ---
if (expr->isNumber() && MathLib::isInt(expr->str())) {
result = ValueFlow::Value(MathLib::toBigNumber(expr->str()));
result.setKnown();
return true;
}
// --- Character literal ---
if (expr->tokType() == Token::eChar) {
try {
result = ValueFlow::Value(MathLib::toBigNumber(expr->str()));
result.setKnown();
return true;
} catch (...) {
return false;
}
}
// --- Variable: look up in state ---
// Only propagate when there is a single Known integer value (unambiguous).
if (expr->varId() > 0) {
const auto it = state.find(expr->varId());
if (it != state.end() &&
it->second.size() == 1 &&
it->second[0].isKnown() &&
it->second[0].isIntValue()) {
result = it->second[0];
return true;
}
}
// --- Unary minus ---
if (expr->str() == "-" && expr->astOperand1() && !expr->astOperand2()) {
ValueFlow::Value inner;
if (!evalConstInt(expr->astOperand1(), state, inner))
return false;
inner.intvalue = -inner.intvalue;
inner.varvalue = inner.intvalue;
inner.wideintvalue = inner.intvalue;
result = inner;
return true;
}
// --- Binary arithmetic ---
if (expr->astOperand1() && expr->astOperand2()) {
ValueFlow::Value lhs, rhs;
if (!evalConstInt(expr->astOperand1(), state, lhs) ||
!evalConstInt(expr->astOperand2(), state, rhs))
return false;
if (!lhs.isIntValue() || !rhs.isIntValue())
return false;
result = ValueFlow::Value(0);
result.setKnown();
const std::string& op = expr->str();
if (op == "+")
result.intvalue = lhs.intvalue + rhs.intvalue;
else if (op == "-")
result.intvalue = lhs.intvalue - rhs.intvalue;
else if (op == "*")
result.intvalue = lhs.intvalue * rhs.intvalue;
else if (op == "/" && rhs.intvalue != 0)
result.intvalue = lhs.intvalue / rhs.intvalue;
else if (op == "%" && rhs.intvalue != 0)
result.intvalue = lhs.intvalue % rhs.intvalue;
else
return false; // division/modulo by zero, or unsupported operator
result.varvalue = result.intvalue;
result.wideintvalue = result.intvalue;
return true;
}
return false;
}
// ===========================================================================
// 4b. evalConstFloat (Phase F)
// ===========================================================================
/// Try to evaluate the expression rooted at `expr` as a compile-time
/// floating-point constant. Returns true and sets `result` (with
/// valueType==FLOAT and floatValue set) on success, false otherwise.
///
/// Phase F1: constant-fold float/double expressions. Reuses the same
/// recursive structure as evalConstInt().
///
/// Handles:
/// - Float literals (3.14, 1.0e-5, …)
/// - Integer literals (promoted to double for mixed arithmetic)
/// - Variables with a single Known FLOAT value in state
/// - Unary minus (-x)
/// - Binary arithmetic (+, -, *, /)
/// - Division by zero (returns false — no value, no FP)
static bool evalConstFloat(const Token* expr, const DFState& state, ValueFlow::Value& result) {
if (!expr)
return false;
// --- Cast expression: look through to the operand ---
// Phase Cast: "(T)expr" has the same float value as expr.
// Mirrors the cast branch in evalConstInt.
if (expr->str() == "(" && expr->isCast()) {
if (expr->astOperand2())
return evalConstFloat(expr->astOperand2(), state, result);
if (expr->astOperand1())
return evalConstFloat(expr->astOperand1(), state, result);
return false;
}
// --- Float literal ---
if (expr->isNumber() && MathLib::isFloat(expr->str())) {
result = ValueFlow::Value();
result.valueType = ValueFlow::Value::ValueType::FLOAT;
result.floatValue = MathLib::toDoubleNumber(expr->str());
result.setKnown();
return true;
}
// --- Integer literal (promoted to float context) ---
if (expr->isNumber() && MathLib::isInt(expr->str())) {
result = ValueFlow::Value();
result.valueType = ValueFlow::Value::ValueType::FLOAT;
result.floatValue = static_cast<double>(MathLib::toBigNumber(expr->str()));
result.setKnown();
return true;
}
// --- Variable: look up Known FLOAT value in state ---
if (expr->varId() > 0) {
const auto it = state.find(expr->varId());
if (it != state.end() &&
it->second.size() == 1 &&
it->second[0].isKnown() &&
it->second[0].isFloatValue()) {
result = it->second[0];
return true;
}
}
// --- Unary minus ---
if (expr->isUnaryOp("-")) {
ValueFlow::Value inner;
if (!evalConstFloat(expr->astOperand1(), state, inner))
return false;
inner.floatValue = -inner.floatValue;
result = inner;
return true;
}
// --- Binary arithmetic ---
if (expr->isBinaryOp()) {
ValueFlow::Value lhs, rhs;
if (!evalConstFloat(expr->astOperand1(), state, lhs) ||
!evalConstFloat(expr->astOperand2(), state, rhs))
return false;
if (!lhs.isFloatValue() || !rhs.isFloatValue())
return false;
result = ValueFlow::Value();
result.valueType = ValueFlow::Value::ValueType::FLOAT;
result.setKnown();
const std::string& op = expr->str();
if (op == "+")
result.floatValue = lhs.floatValue + rhs.floatValue;
else if (op == "-")
result.floatValue = lhs.floatValue - rhs.floatValue;
else if (op == "*")
result.floatValue = lhs.floatValue * rhs.floatValue;
else if (op == "/" && !isFloatZero(rhs.floatValue))
result.floatValue = lhs.floatValue / rhs.floatValue;
else
return false; // division by zero or unsupported operator
return true;
}
return false;
}
// ===========================================================================
// 5. annotateTok / annotateMemberTok / annotateContainerTok
// ===========================================================================
//
// These functions write the current analysis state values onto token nodes so
// that downstream checkers (CheckNullPointer, CheckUninitVar, …) can read
// them via Token::getValue() / Token::addValue() without any modification.
// This is the output mechanism of the analysis (Requirement 2).
//
// Several helper predicates are defined first because annotateTok needs them
// to decide whether a particular use of a pointer is already guarded by a
// null check in the surrounding expression (e.g. "p && p->x" — do not
// annotate p->x with a possible-null value because p is guarded by the &&).
/// Returns true when the LHS subtree of a '&&' chain contains a direct
/// non-null guard for varId (the pointer variable itself).
/// Handles chained && expressions such as "s && foo" appearing as the
/// LHS of a later "&&", e.g. "(s && foo) && s->x" — s is guarded even
/// though it is not the immediate LHS of the outer &&.
/// Conservative: only recognises the simple "variable itself" form of guard.
static bool andLhsContainsVarGuard(const Token* lhs, nonneg int varId) {
if (!lhs)
return false;
// Direct non-null guard: the pointer variable itself.
if (lhs->varId() == varId && isTrackedPtrVar(lhs))
return true;
// Chained &&: recurse into both operands.
if (lhs->str() == "&&")
return andLhsContainsVarGuard(lhs->astOperand1(), varId) ||
andLhsContainsVarGuard(lhs->astOperand2(), varId);
return false;
}
// Forward declaration needed by orLhsContainsNullTest (defined below).
static bool isNullTestCondition(const Token* cond, nonneg int varId);
// Requirement: when the LHS subtree of a '||' chain contains a null-test for
// varId (e.g. "!s", "s == 0", "s == nullptr"), then the RHS of the '||' is
// only evaluated when the null-test is false, i.e. when the pointer is
// non-null. This handles chained || such as "!s || foo" appearing as the LHS
// of a later "||", e.g. "(!s || foo) || s->x".
// Conservative: delegates to isNullTestCondition for the leaf check.
static bool orLhsContainsNullTest(const Token* lhs, nonneg int varId) {
if (!lhs)
return false;
// Direct null-test in the LHS: !s, s==0, etc.
if (isNullTestCondition(lhs, varId))
return true;
// Chained ||: recurse into both operands.
if (lhs->str() == "||")
return orLhsContainsNullTest(lhs->astOperand1(), varId) ||
orLhsContainsNullTest(lhs->astOperand2(), varId);
return false;
}
/// Walk up the AST parent chain looking for a binary operator node `op`
/// ("&&" or "||") where tok is on the right-hand side and the left-hand side
/// satisfies `lhsCheck(lhs, varId)`. Returns true if such a node is found.
///
/// Used to suppress false-positive nullable annotations when the pointer is
/// already guarded by a preceding short-circuit check in the same expression
/// (Requirement 4). The `LhsCheck` template parameter avoids std::function
/// overhead; it is instantiated with andLhsContainsVarGuard or
/// orLhsContainsNullTest below.
template<typename LhsCheck>
static bool isRhsOfChainedOp(const Token* tok, const char* op, LhsCheck lhsCheck) {
if (!tok || tok->varId() == 0)
return false;
const nonneg int varId = tok->varId();
const Token* child = tok;
for (const Token* parent = tok->astParent(); parent; child = parent, parent = parent->astParent()) {
if (parent->str() != op)
continue;
if (parent->astOperand2() != child)
continue;
if (lhsCheck(parent->astOperand1(), varId))
return true;
}
return false;
}
/// Returns true when tok is on the RHS of a "&&" chain whose LHS guards
/// the same pointer variable (e.g. "s && foo && s->x" — s->x is safe).
static bool isRhsOfGuardedAndBySameVar(const Token* tok) {
return isRhsOfChainedOp(tok, "&&", andLhsContainsVarGuard);
}
/// Returns true when tok is on the RHS of a "||" chain whose LHS contains
/// a null-test for the same variable (e.g. "!s || foo || s->x" — s->x is safe).
static bool isRhsOfGuardedOrByNullTest(const Token* tok) {
return isRhsOfChainedOp(tok, "||", orLhsContainsNullTest);
}
// Forward declaration needed because isNullTestCondition and
// isNonNullTestCondition are mutually recursive.
static bool isNonNullTestCondition(const Token* cond, nonneg int varId);
/// Returns true when `cond` is a null-test of `varId` — i.e. the condition
/// is true exactly when the variable is null. Covers:
/// !p, p == 0, 0 == p, p == nullptr, nullptr == p, !(non-null-test)
static bool isNullTestCondition(const Token* cond, nonneg int varId) {
if (!cond)
return false;
if (cond->str() == "!" && cond->astOperand1() &&
cond->astOperand1()->varId() == varId && isTrackedPtrVar(cond->astOperand1()))
return true;
if (cond->str() == "==") {
const Token* lhs = cond->astOperand1();
const Token* rhs = cond->astOperand2();
if (lhs && lhs->varId() == varId && isTrackedPtrVar(lhs) && isZeroValue(rhs))
return true;
if (rhs && rhs->varId() == varId && isTrackedPtrVar(rhs) && isZeroValue(lhs))
return true;
}
// !(non-null-test) is a null-test, e.g. !(s!=0)
if (cond->str() == "!" && isNonNullTestCondition(cond->astOperand1(), varId))
return true;
return false;
}
/// Returns true when `cond` is a non-null-test of `varId` — i.e. the condition
/// is true exactly when the variable is non-null. Covers:
/// p, p != 0, 0 != p, p != nullptr, nullptr != p, !(null-test)
static bool isNonNullTestCondition(const Token* cond, nonneg int varId) {
if (!cond)
return false;
if (cond->varId() == varId && isTrackedPtrVar(cond))
return true;
if (cond->str() == "!=") {
const Token* lhs = cond->astOperand1();
const Token* rhs = cond->astOperand2();
if (lhs && lhs->varId() == varId && isTrackedPtrVar(lhs) && isZeroValue(rhs))
return true;
if (rhs && rhs->varId() == varId && isTrackedPtrVar(rhs) && isZeroValue(lhs))
return true;
}
// !(null-test) is a non-null-test, e.g. !(s==0)
if (cond->str() == "!" && isNullTestCondition(cond->astOperand1(), varId))
return true;
return false;
}
/// Returns true when tok is in the true branch of a ternary where the
/// condition is a non-null-test of the same pointer variable as tok.
/// Covers: "p ? p->x : 0", "p!=0 ? p->x : 0", "0!=p ? p->x : 0", etc.
/// In that context the pointer is known non-null, so nullable-zero annotations
/// must not be attached (otherwise CheckNullPointer gets a false positive).
static bool isInTrueBranchOfTernaryBySameVar(const Token* tok) {
if (!tok || tok->varId() == 0)
return false;
const nonneg int varId = tok->varId();
const Token* child = tok;
for (const Token* parent = tok->astParent(); parent; child = parent, parent = parent->astParent()) {
if (parent->str() != ":")
continue;
// child must be the true branch (astOperand1 of ':'), not the false branch
if (parent->astOperand1() != child)
continue;
// ':' must be the rhs of a '?'
const Token* question = parent->astParent();
if (!question || question->str() != "?")
continue;
if (isNonNullTestCondition(question->astOperand1(), varId))
return true;
}
return false;
}
/// Returns true when tok is in the false branch of a ternary where the
/// condition is a null-test of the same pointer variable as tok.
/// Covers: "!p ? 0 : p->x", "p==0 ? 0 : p->x", "0==p ? 0 : p->x", etc.
/// In that context the pointer is known non-null, so nullable-zero annotations
/// must not be attached (otherwise CheckNullPointer gets a false positive).
static bool isInFalseBranchOfNegatedTernaryBySameVar(const Token* tok) {
if (!tok || tok->varId() == 0)
return false;
const nonneg int varId = tok->varId();
const Token* child = tok;
for (const Token* parent = tok->astParent(); parent; child = parent, parent = parent->astParent()) {
if (parent->str() != ":")
continue;
// child must be the false branch (astOperand2 of ':'), not the true branch
if (parent->astOperand2() != child)
continue;
// ':' must be the rhs of a '?'
const Token* question = parent->astParent();