cppcheck/lib/dataflow.cpp at dataflow · cppchecksolutions/cppcheck

History

4152 lines (3861 loc) · 198 KB

Raw

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

139

140

141

142

143

144

145

146

147

148

149

150

151

152

153

154

155

156

157

158

159

160

161

162

163

164

165

166

167

168

169

170

171

172

173

174

175

176

177

178

179

180

181

182

183

184

185

186

187

188

189

190

191

192

193

194

195

196

197

198

199

200

201

202

203

204

205

206

207

208

209

210

211

212

213

214

215

216

217

218

219

220

221

222

223

224

225

226

227

228

229

230

231

232

233

234

235

236

237

238

239

240

241

242

243

244

245

246

247

248

249

250

251

252

253

254

255

256

257

258

259

260

261

262

263

264

265

266

267

268

269

270

271

272

273

274

275

276

277

278

279

280

281

282

283

284

285

286

287

288

289

290

291

292

293

294

295

296

297

298

299

300

301

302

303

304

305

306

307

308

309

310

311

312

313

314

315

316

317

318

319

320

321

322

323

324

325

326

327

328

329

330

331

332

333

334

335

336

337

338

339

340

341

342

343

344

345

346

347

348

349

350

351

352

353

354

355

356

357

358

359

360

361

362

363

364

365

366

367

368

369

370

371

372

373

374

375

376

377

378

379

380

381

382

383

384

385

386

387

388

389

390

391

392

393

394

395

396

397

398

399

400

401

402

403

404

405

406

407

408

409

410

411

412

413

414

415

416

417

418

419

420

421

422

423

424

425

426

427

428

429

430

431

432

433

434

435

436

437

438

439

440

441

442

443

444

445

446

447

448

449

450

451

452

453

454

455

456

457

458

459

460

461

462

463

464

465

466

467

468

469

470

471

472

473

474

475

476

477

478

479

480

481

482

483

484

485

486

487

488

489

490

491

492

493

494

495

496

497

498

499

500

501

502

503

504

505

506

507

508

509

510

511

512

513

514

515

516

517

518

519

520

521

522

523

524

525

526

527

528

529

530

531

532

533

534

535

536

537

538

539

540

541

542

543

544

545

546

547

548

549

550

551

552

553

554

555

556

557

558

559

560

561

562

563

564

565

566

567

568

569

570

571

572

573

574

575

576

577

578

579

580

581

582

583

584

585

586

587

588

589

590

591

592

593

594

595

596

597

598

599

600

601

602

603

604

605

606

607

608

609

610

611

612

613

614

615

616

617

618

619

620

621

622

623

624

625

626

627

628

629

630

631

632

633

634

635

636

637

638

639

640

641

642

643

644

645

646

647

648

649

650

651

652

653

654

655

656

657

658

659

660

661

662

663

664

665

666

667

668

669

670

671

672

673

674

675

676

677

678

679

680

681

682

683

684

685

686

687

688

689

690

691

692

693

694

695

696

697

698

699

700

701

702

703

704

705

706

707

708

709

710

711

712

713

714

715

716

717

718

719

720

721

722

723

724

725

726

727

728

729

730

731

732

733

734

735

736

737

738

739

740

741

742

743

744

745

746

747

748

749

750

751

752

753

754

755

756

757

758

759

760

761

762

763

764

765

766

767

768

769

770

771

772

773

774

775

776

777

778

779

780

781

782

783

784

785

786

787

788

789

790

791

792

793

794

795

796

797

798

799

800

801

802

803

804

805

806

807

808

809

810

811

812

813

814

815

816

817

818

819

820

821

822

823

824

825

826

827

828

829

830

831

832

833

834

835

836

837

838

839

840

841

842

843

844

845

846

847

848

849

850

851

852

853

854

855

856

857

858

859

860

861

862

863

864

865

866

867

868

869

870

871

872

873

874

875

876

877

878

879

880

881

882

883

884

885

886

887

888

889

890

891

892

893

894

895

896

897

898

899

900

901

902

903

904

905

906

907

908

909

910

911

912

913

914

915

916

917

918

919

920

921

922

923

924

925

926

927

928

929

930

931

932

933

934

935

936

937

938

939

940

941

942

943

944

945

946

947

948

949

950

951

952

953

954

955

956

957

958

959

960

961

962

963

964

965

966

967

968

969

970

971

972

973

974

975

976

977

978

979

980

981

982

983

984

985

986

987

988

989

990

991

992

993

994

995

996

997

998

999

1000

* Cppcheck - A tool for static C/C++ code analysis

* This program is free software: you can redistribute it and/or modify

* it under the terms of the GNU General Public License as published by

* the Free Software Foundation, either version 3 of the License, or

* (at your option) any later version.

* This program is distributed in the hope that it will be useful,

* but WITHOUT ANY WARRANTY; without even the implied warranty of

* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the

* GNU General Public License for more details.

* You should have received a copy of the GNU General Public License

* along with this program. If not, see <http://www.gnu.org/licenses/>.

* DataFlow analysis implementation.

* See dataflow.h for the public API.

* =========================================================================

* REQUIREMENTS (user perspective)

* =========================================================================

* Requirement 1 — Null-pointer dereference detection:

* Detect when a local pointer variable is assigned null (nullptr/NULL/0)

* and then dereferenced without a null check in between. Warn at the

* dereference site so that CheckNullPointer can report the bug to the user.

* Requirement 2 — Uninitialized variable detection:

* Detect when a local integer, pointer, or float variable is read before

* it has been assigned any value. Warn at the read site so that

* CheckUninitVar can report the bug to the user.

* Requirement 3 — Flow-sensitivity:

* Values must follow program execution order. An assignment changes the

* known value from that point forward. An if/else branch must be analyzed

* on each path independently; after the branch, only values present on all

* surviving paths are carried forward.

* Requirement 4 — No false positives:

* When the analysis encounters code that is too complex to reason about

* precisely (deeply nested branches, too many tracked variables,

* unrecognised patterns), it must stop and produce no output rather than

* risk an incorrect warning. False negatives (missed bugs) are acceptable;

* false positives are not.

* Requirement 5 — Scalar types only:

* Only integer, pointer, and floating-point scalar local variables are

* tracked. User-defined types, arrays, reference parameters, and global

* variables are not tracked.

* Requirement 6 — Linear performance:

* The analysis must complete in O(n) time per function, where n is the

* number of tokens in the function body. No iterative fixed-point

* computation is used.

* Requirement 7 — No non-const pointer alias analysis:

* When the address of a tracked variable is stored in a non-const pointer

* (e.g. "T* p = &var" or "(T*)(&var)"), the analysis cannot determine

* whether the variable is subsequently written through the alias. The

* variable is removed from the tracked state so that a stale UNINIT value

* is not propagated to later reads. If the address is stored only in a

* const pointer ("const T* p = &var"), the pointed-to value cannot be

* modified through the alias, so tracking continues normally.

* Requirement 8 — Partial initialization via non-const alias:

* If a variable is initialized through a non-const pointer alias (even

* partially, such as writing individual bytes of a scalar), the analysis

* treats the variable as fully initialized. This is implemented by

* Requirement 7: aborting tracking when a non-const alias is formed

* means the UNINIT value is cleared before any writes occur through

* the alias.

* Requirement 9 — Inline assembler aborts analysis:

* When an asm() statement is encountered, the forward analysis is aborted

* immediately for the remainder of the function body. Inline assembler can

* read and write any variable through output constraints, memory clobbers,

* or indirect effects that the analysis cannot model. Aborting prevents

* false positives (Requirement 4) — the cost is false negatives after the

* asm statement, which is acceptable per project policy.

* =========================================================================

* ALGORITHM

* =========================================================================

* The analysis runs two passes over each function body.

* PASS 1 — Forward analysis (implements Requirements 1, 2, 3, 5, 6):

* Walk tokens in program order, maintaining a state map (DFContext) from

* variable IDs to their current known/possible values. When an assignment

* is seen the state is updated; when a variable is read the current state

* values are written onto the token so that downstream checkers can inspect

* them (Requirement 2 — values visible to checkers without modification).

* Branches (Requirement 3): at every if/else the context is forked into two

* independent copies, each analyzed separately. The copies are merged at

* the join point: a Known value that is identical on both paths stays Known;

* differing values become Possible; a value present on only one path is

* dropped (Requirement 4 — conservative merge).

* Loops (Requirement 4): the loop body is scanned for assignments and any

* variable modified inside is removed from the state before continuing after

* the loop. Exception: if a variable has an unconditional top-level

* assignment in the loop body, it is treated as initialized after the loop

* because loops are assumed to execute at least once (Requirement 4

* trade-off: false negatives preferred over false positives).

* Constant-true loops (while(1)): Phase NW/NW3 null-exit constraints and

* UNINIT re-injection are skipped because the condition never becomes false

* — the loop only exits via break/return/throw, so "condition was false"

* constraints would be spurious (Requirement 4 — no false positives).

* Complexity limits (Requirement 4): the analysis aborts when branch nesting

* exceeds MAX_BRANCH_DEPTH, loop nesting exceeds MAX_LOOP_DEPTH, or the

* number of simultaneously tracked variables exceeds MAX_VARS.

* PASS 2 — Backward analysis (implements Requirements 1, 3):

* Walk tokens in reverse program order, collecting value constraints from

* condition checks (e.g. "if (x == 5)"). Those constraints are propagated

* backward and annotated onto earlier reads of the same variable. An

* assignment to a variable kills the backward constraint for it. Block

* boundaries are treated conservatively (Requirement 4): any variable

* assigned inside a block is dropped from the backward state when the block

* boundary is crossed.

* PHASES (implemented inside the two passes):

* Phase N — null pointer tracking (Requirement 1):

* Pointer variables assigned nullptr/NULL/0 receive a Known(0) value.

* Conditions such as "p==nullptr", "!p", "if(p)" update the state on each

* branch so that reads on the null path are annotated. CheckNullPointer

* finds null values via Token::getValue(0) without any change.

* Phase U — uninitialized variable tracking (Requirement 2):

* Local variables declared without an initializer receive a UNINIT value.

* Assignments clear it. Reads while UNINIT is in state are annotated so

* CheckUninitVar can detect the uninitialized use.

* Phase U2 — enhanced uninit tracking, survives function calls (Requirement 2):

* A separate set (DFUninitSet) remembers which variables were declared

* without an initializer. After a function call clears the main state,

* UNINIT(Possible) is re-injected for every variable still in that set.

* This prevents calls from silently hiding uninitialized variables. The

* set entry is removed when the variable is first definitely assigned. At

* branch merges the sets are unioned (a variable stays if it might be

* uninitialized on any surviving path).

* Phase U-CI — conditional initialization resolution (Requirement 2, 4):

* When a variable is initialized inside the then-block of an if-statement

* with a simple "condVar == const" condition (no else branch), but remains

* possibly-uninit on the else-path, a conditional initialization fact is

* recorded: condInits[varId] = {condVarId, condValue}. When a subsequent

* guard (e.g. "if (condVar != const) { return; }") is processed and the

* surviving path has condVar == Known(const), the fact is resolved and

* varId is removed from uninits. This prevents Phase U2 from falsely

* re-injecting UNINIT after function calls when the guard guarantees the

* initializing block was taken (Requirement 4 — no false positives).

* The fact is invalidated if condVar is reassigned between the two ifs.

* Phase F — float/double tracking (Requirement 2, 5):

* Float, double, and long double scalar variables are tracked in the same

* DFState as integers, using FLOAT-typed ValueFlow::Value entries.

* evalConstFloat() constant-folds float expressions.

* Phase S — string literal non-null (Requirement 1):

* Assigning a string literal to a pointer stores an Impossible(0) value,

* meaning the pointer is definitely not null. This suppresses spurious

* null-pointer warnings from CheckNullPointer.

* Phase M — struct/class member field tracking (Requirements 1, 2):

* "obj.field = value" and "obj.field" reads are tracked in a separate

* DFMemberState keyed by (objectVarId, fieldVarId). Forked at branches

* and merged at join points using the same rules as the main DFState.

* Function calls clear the member state conservatively.

* Phase C — container size tracking:

* Container variables (std::vector, std::string, …) are tracked in a

* separate DFContState using CONTAINER_SIZE values. push_back /

* emplace_back increment the known size; pop_back decrements; clear

* resets to 0.

* Phase UA-FC — for-loop condition annotation (Requirement 2, ticket #7614):

* Variable reads in the for-loop condition "for (init; cond; incr)" are

* annotated with the current state after the init clause, before the

* increment clause is processed. Without this, the increment-clause

* handler (dropWrittenVars / hasTopLevelAssignment) would erase the

* variable from ctx.state and ctx.uninits first, hiding the UNINIT value

* from the condition read and producing a false negative.

* Phase Cast — cast expression value propagation:

* evalConstInt and evalConstFloat look through C-style and C++ cast

* expressions so that assignments such as "int x = (int)5;" propagate

* the constant value correctly.

* Phase TC — try/catch block handling (Requirement 4):

* catch blocks are treated as alternative execution paths. When a catch

* block terminates unconditionally (return/throw), the surviving path is

* the post-try path and the catch-block state is discarded. When the

* catch block falls through, the post-try and post-catch states are merged

* at the join point. This prevents stale null assignments inside

* terminating catch blocks from producing false null-pointer positives on

* the surviving (non-exceptional) path.

* Phase U-ArgAssign — assignment inside function-call argument list

* (Requirement 4):

* When a function argument list contains an assignment inside a

* parenthesised group (e.g. "(x = get()) == NULL ? \":\" : x"), the

* argument annotation loop skips '(...)' groups to avoid double-processing.

* Without special handling, the assignment is missed and the subsequent

* read of x in the ternary false-branch is falsely annotated as UNINIT.

* Phase U-ArgAssign fixes this: before each '(...)' group is skipped, the

* tokens inside are scanned for plain '=' assignments. Any assigned

* variable is erased from ctx.uninits and ctx.state so that subsequent

* reads of that variable within the same argument list are not falsely

* flagged. This handles all value types (UNINIT, integer, float, pointer).

* Phase MC — macro-expanded if-condition suppression (Requirement 4):

* When the "if" keyword is expanded from a macro (tok->isExpandedMacro()),

* applyConditionConstraint is skipped entirely for that if-statement.

* A null-check inside a macro (e.g. "if (p != NULL) p->x = v;") is an

* internal safety guard, not a program-level assertion about the caller's

* variable. Applying the condition would merge a Possible(null) onto the

* post-macro path and trigger a false nullPointerRedundantCheck warning at

* the next dereference (Requirement 4 — no false positives).

* =========================================================================

* FILE LAYOUT

* =========================================================================

* 1. Core types

* DFValues — vector of values for one variable

* DFState — varId → DFValues (int, ptr, float, UNINIT)

* DFMemberKey — (objectVarId, fieldVarId) composite key (Phase M)

* DFMemberKeyHash — hash for DFMemberKey

* DFMemberState — DFMemberKey → DFValues (Phase M)

* DFContState — varId → CONTAINER_SIZE DFValues (Phase C)

* DFContext — bundles DFState + DFMemberState + DFContState +

* DFUninitSet; forked at branches and merged at joins

* 2. Complexity limits (MAX_BRANCH_DEPTH, MAX_LOOP_DEPTH, MAX_VARS)

* 3. Helper predicates

* isTrackedVar — integral non-pointer scalar (Requirement 5)

* isTrackedPtrVar — raw pointer (Phase N)

* isTrackedFloatVar — float/double/long double scalar (Phase F)

* isTrackedContainerVar — std::vector / std::string / … (Phase C)

* isLhsOfAssignment — token is written, not read

* isFunctionCallOpen — '(' opening a function call

* 4. evalConstInt / evalConstFloat — constant-fold expressions

* 5. annotateTok / annotateMemberTok / annotateContainerTok

* Write state values onto tokens (implements Requirement 2 output)

* Helpers: andLhsContainsVarGuard, orLhsContainsNullTest,

* isRhsOfGuardedAndBySameVar, isRhsOfGuardedOrByNullTest,

* isZeroValue, isNullTestCondition, isNonNullTestCondition,

* isInTrueBranchOfTernaryBySameVar,

* isInFalseBranchOfNegatedTernaryBySameVar

* 5b. Partial null-guard detection (Phase UA-PG, ticket #3054)

* subtreeContainsVar, isDirectOrOperandForVar, lhsContainsVarInOr,

* isInPartiallyGuardedContext:

* Detect "(p || q) && *p" where the OR guard does not exclusively

* protect p. Inject Possible(0) so CheckNullPointer can warn.

* 6. mergeStates / mergeMemberStates / mergeContexts

* Combine two branch states at join points (Requirement 3)

* 7. blockTerminates — detect unconditional exit from a block

* 8. applyConditionConstraint — derive state update from a condition

* 9. dropWrittenVars / hasTopLevelAssignment / collectWhileExitConstraints

* Loop body helpers (Requirement 4)

* 9d. suppressUninitForSentinelLoop — Phase NW3b helper (all loop types)

* 9e. resolveCondInits / recordConditionalInits — Phase U-CI helpers

* 10. forwardAnalyzeBlock — Pass 1 (forward walk, all phases)

* 11. backwardAnalyzeBlock — Pass 2 (backward walk, int/ptr constraints)

* 12. DataFlow::setValues — public entry point

#include "dataflow.h"

#include "astutils.h"

#include "mathlib.h"

#include "settings.h"

#include "symboldatabase.h"

#include "token.h"

#include "tokenlist.h"

#include "vfvalue.h"

#include <algorithm>

#include <cmath>

#include <unordered_map>

#include <unordered_set>

#include <vector>

// Suppress unused-parameter warnings for parameters kept for API symmetry.

// NOLINTNEXTLINE(cppcoreguidelines-macro-usage)

#define DF_UNUSED(x) (void)(x)

namespace {

// Returns true if the floating-point value is exactly zero (positive or negative).

// Used instead of direct == 0.0 comparison to avoid -Wfloat-equal warnings.

static bool isFloatZero(double v) {

return std::fpclassify(v) == FP_ZERO;

}

// ===========================================================================

// 1. Core types

// ===========================================================================

/// The list of known/possible values associated with one variable.

/// Multiple entries represent multiple possible values on different paths

/// (e.g. after merging an if/else where each branch assigns a different value).

using DFValues = std::vector<ValueFlow::Value>;

/// The analysis state for one point in the program.

/// Maps each tracked variable ID to its current set of values.

///

/// Used for integer, pointer, float, and UNINIT values — all in one map

/// distinguished by ValueFlow::Value::valueType.

using DFState = std::unordered_map<nonneg int, DFValues>;

/// Phase M: composite key identifying one struct/class member field.

/// First element is the variable ID of the object; second is the variable ID

/// of the field within that object's type.

using DFMemberKey = std::pair<nonneg int, nonneg int>;

/// Hash for DFMemberKey. Uses a multiplicative combine to reduce collisions

/// from closely-spaced integer pairs.

struct DFMemberKeyHash {

std::size_t operator()(const DFMemberKey& k) const noexcept {

// Knuth multiplicative hash combine

return std::hash<nonneg int>()(k.first) * 2654435761UL ^

std::hash<nonneg int>()(k.second);

}

};

/// Phase M: per-member-field state. Maps (objectVarId, fieldVarId) to the

/// current set of values that field is known/possibly equal to.

using DFMemberState = std::unordered_map<DFMemberKey, DFValues, DFMemberKeyHash>;

/// Phase C: per-container state. Maps container varId to CONTAINER_SIZE

/// values representing the current known/possible size.

/// Re-uses the same DFValues type; all stored values have

/// valueType == ValueFlow::Value::ValueType::CONTAINER_SIZE.

using DFContState = std::unordered_map<nonneg int, DFValues>;

/// Phase U2: set of variable IDs that were declared without an initializer

/// in the current function body. Survives function calls (unlike DFState)

/// so UNINIT can be re-injected after calls. Removed when the variable

/// receives its first definite assignment.

using DFUninitSet = std::unordered_set<nonneg int>;

/// Bundle of per-branch analysis state for the forward pass.

///

/// All four fields are copied when the analysis forks at an if/else branch,

/// and merged back at the join point. For DFUninitSet, the merge takes the

/// union: a variable stays in the set if it might be uninitialized on any

/// surviving path.

struct DFContext {

DFState state; ///< varId → values (int, ptr, float, UNINIT)

DFMemberState members; ///< (objVarId, fieldVarId) → values (Phase M)

DFContState containers; ///< container varId → CONTAINER_SIZE (Phase C)

DFUninitSet uninits; ///< declared-without-init varIds (Phase U2)

/// Requirement (Phase L): varIds of local, non-pointer, non-reference,

/// non-static scalar variables. These cannot be modified by a called

/// function unless their address has been taken. Populated when a

/// variable declaration is first encountered; never cleared.

std::unordered_set<nonneg int> localScalars;

/// Requirement (Phase L): varIds whose address has been observed taken

/// via the unary '&' operator. A local scalar in this set is excluded

/// from the "safe to preserve" set at call sites, because a called

/// function could reach it through a pointer that holds &var.

std::unordered_set<nonneg int> addressTaken;

/// Phase U-CI: conditional initialization facts.

/// condInits[varId] = {condVarId, condValue}: varId was initialized

/// in the then-block of an if-statement whose condition was

/// "condVarId == condValue". When condVarId later becomes Known

/// (condValue) in state (e.g. after a guard "if (condVarId != condValue)

/// { return; }"), the fact is resolved and varId is removed from uninits.

/// Invalidated when condVarId is assigned in the outer scope.

std::unordered_map<nonneg int, std::pair<nonneg int, MathLib::bigint>> condInits;

};

// ===========================================================================

// 2. Complexity limits

// ===========================================================================

// Requirement 4: the analysis must abort on complex paths rather than risk

// producing a false positive. These constants define the abort thresholds.

/// Maximum nesting depth of if/else branches before aborting.

/// Deeper nesting makes it hard to merge states correctly without FP risk.

static constexpr int MAX_BRANCH_DEPTH = 4;

/// Maximum nesting depth of loops before aborting.

static constexpr int MAX_LOOP_DEPTH = 2;

/// Maximum number of simultaneously tracked variables before aborting.

/// Keeps memory usage and merge cost bounded.

static constexpr std::size_t MAX_VARS = 64;

// ===========================================================================

// 3. Helper predicates

// ===========================================================================

/// Returns true when tok refers to an integer (non-pointer) variable that the

/// analysis should track.

///

/// Requirement 5: only track integral, non-pointer types initially.

/// Pointers, floats, and user-defined types are excluded.

static bool isTrackedVar(const Token* tok) {

if (!tok || tok->varId() == 0)

return false;

const Variable* var = tok->variable();

if (!var)

return false;

const ValueType* vt = var->valueType();

return vt && vt->isIntegral() && vt->pointer == 0;

}

/// Returns true when tok refers to a pointer variable that the analysis

/// should track for null-pointer and uninitialized-pointer bugs.

///

/// Phase N: track pointers assigned nullptr so that CheckNullPointer can

/// detect null dereferences. Only raw (non-function, non-array) pointers

/// with exactly one level of indirection are tracked to keep the

/// implementation simple and conservative.

static bool isTrackedPtrVar(const Token* tok) {

if (!tok || tok->varId() == 0)

return false;

const Variable* var = tok->variable();

if (!var || var->isArray())

return false;

const ValueType* vt = var->valueType();

// pointer > 0: one or more levels of pointer indirection.

// Exclude function pointers (they have a function type base).

return vt && vt->pointer > 0;

}

/// Returns true when tok is a '.' AST node representing a member pointer

/// field access that the analysis should track for null conditions.

///

/// Requirements:

/// - tok is the '.' AST operator node (member-access expression)

/// - astOperand1() (the object) has a non-zero varId

/// - astOperand2() (the field) has a non-zero varId

/// - the field's variable type is a raw pointer (pointer > 0)

///

/// Phase MN: used by applyConditionConstraint to recognise conditions of the

/// form "!s.x", "if(s.x)", "s.x == nullptr", "s.x != nullptr" where s.x is

/// a pointer-typed struct/class member field.

static bool isMemberPtrAccess(const Token* tok) {

if (!tok || tok->str() != ".")

return false;

const Token* objTok = tok->astOperand1();

const Token* fieldTok = tok->astOperand2();

if (!objTok || objTok->varId() == 0)

return false;

if (!fieldTok || fieldTok->varId() == 0)

return false;

const Variable* fieldVar = fieldTok->variable();

if (!fieldVar)

return false;

const ValueType* vt = fieldVar->valueType();

// Only track simple pointer fields (pointer > 0), not arrays or function pointers.

return vt && vt->pointer > 0;

}

/// Returns true when tok refers to a floating-point scalar variable.

///

/// Phase F: track float/double/long double non-pointer variables so that

/// the analysis can propagate constant float values through assignments and

/// arithmetic, enabling checkers to detect float-related bugs.

static bool isTrackedFloatVar(const Token* tok) {

if (!tok || tok->varId() == 0)

return false;

const Variable* var = tok->variable();

if (!var)

return false;

const ValueType* vt = var->valueType();

// isFloat() returns true for FLOAT, DOUBLE, and LONGDOUBLE.

return vt && vt->isFloat() && vt->pointer == 0;

}

/// Returns true when tok refers to a container variable (std::vector,

/// std::string, etc.) that the analysis should track for size information.

///

/// Phase C: requires library configuration to be loaded so that ValueType

/// is populated with CONTAINER type information.

static bool isTrackedContainerVar(const Token* tok) {

if (!tok || tok->varId() == 0)

return false;

const Variable* var = tok->variable();

if (!var)

return false;

const ValueType* vt = var->valueType();

return vt &&

vt->type == ValueType::Type::CONTAINER &&

vt->container != nullptr;

}

/// Returns true when tok is the direct left-hand-side operand of an

/// assignment operator (=, +=, -=, …).

/// Such tokens are being written to, not read, so they must not be annotated

/// with the pre-assignment state value.

static bool isLhsOfAssignment(const Token* tok) {

return tok->astParent() &&

tok->astParent()->isAssignmentOp() &&

tok->astParent()->astOperand1() == tok;

}

/// Returns true when tok is the opening '(' of a function call.

/// Keywords that use '(' (if, for, while, …) are excluded.

/// This is used to conservatively clear state on any call site because a

/// called function might modify variables through pointers or global state.

///

/// Two call forms are recognised:

/// name(args) — ordinary call: prev is a name token

/// (*expr)(args) — function-pointer call: prev is ')' closing the

/// dereferenced function-pointer expression

/// For the second form only: the opening '(' of the preceding expression

/// must immediately be followed by '*', so that "(type)(expr)" cast

/// expressions (where prev->link()->next() is a type keyword, not '*')

/// are not mistaken for function calls.

static bool isFunctionCallOpen(const Token* tok) {

if (!tok || tok->str() != "(")

return false;

const Token* prev = tok->previous();

if (!prev)

return false;

if (prev->isName()) {

// Exclude control-flow keywords

return false;

// Exclude type-cast expressions: "(int)x" — prev is a type keyword

if (prev->isStandardType())

return false;

return true;

}

// Function-pointer call: (*f)(args), (*obj->fn)(args), etc.

// The preceding ')' closes the parenthesised function-pointer expression,

// whose first token after '(' is '*' (the dereference operator).

if (prev->str() == ")" && prev->link()) {

const Token* afterOpen = prev->link()->next();

if (afterOpen && afterOpen->str() == "*")

return true;

}

// Template function call: name<args>(args), e.g. "Calc<true>(x, y)".

// The tokenizer sets Token::link() on the closing '>' of a template

// argument list so that '>' with a link means a template close, not a

// comparison operator. This handles calls of the form:

// func<T>(args)

// Scope::func<T>(args)

// obj.func<T>(args)

if (prev->str() == ">" && prev->link())

return true;

return false;

}

// ===========================================================================

// 3b. isZeroValue — moved here so evalConstInt can use it

// ===========================================================================

/// Returns true when tok carries a Known integer value of zero.

/// Used by evalConstInt (to recognise null pointer constants that have been

/// annotated with Known(0) in the literal pass) and by isNullTestCondition /

/// isNonNullTestCondition to recognise null literal operands in comparisons

/// such as "p == 0" or "p != 0".

static bool isZeroValue(const Token* t) {

return t && t->hasKnownIntValue() && t->getKnownIntValue() == 0;

}

// ===========================================================================

// 4. evalConstInt

// ===========================================================================

/// Try to evaluate the expression rooted at `expr` as a compile-time integer

/// constant. `state` is consulted for variables that already have a single

/// known value. Returns true and sets `result` on success, false otherwise.

///

/// Handles:

/// - Integer literals (42, 0xFF, …)

/// - Character literals ('a', …)

/// - Null pointer constants → 0 (via isZeroValue; Phase N)

/// - Variables with a Known value in state (copy propagation)

/// - Unary minus (-x)

/// - Binary arithmetic (+, -, *, /, %)

///

/// Does NOT handle:

/// - Variables with multiple Possible values (ambiguous → return false)

/// - Pointer arithmetic, floating-point, bitwise ops (not needed for MVP)

/// - Division / modulo by zero (returns false — no value, no FP)

static bool evalConstInt(const Token* expr, const DFState& state, ValueFlow::Value& result) {

if (!expr)

return false;

// --- Cast expression: look through to the operand ---

// Phase Cast: "(T)expr" has the same integer value as expr.

// For a C-style cast the token structure is:

// '(' (isCast==true)

// astOperand1() → the expression being cast (no astOperand2)

// For a C++ functional cast, e.g. int(expr):

// '(' (isCast==true)

// astOperand1() → type keyword

// astOperand2() → the expression being cast

// We follow the same pattern used in programmemory.cpp (execute, cast branch).

if (expr->str() == "(" && expr->isCast()) {

if (expr->astOperand2())

return evalConstInt(expr->astOperand2(), state, result);

if (expr->astOperand1())

return evalConstInt(expr->astOperand1(), state, result);

return false;

}

// --- null pointer constant ---

// Phase N: any non-variable token that already carries a Known(0) integer

// value is treated as a null pointer constant. This covers:

// - C++11 "nullptr" (annotated in the setValues literal pass)

// - C macro "NULL" after preprocessing (expanded to the literal "0")

// - Any other zero-valued constant resolved by an earlier pass

// Using isZeroValue() avoids hardcoding specific token names here.

if (expr->varId() == 0 && isZeroValue(expr)) {

result = ValueFlow::Value(0LL);

result.setKnown();

return true;

}

// --- Integer literal ---

if (expr->isNumber() && MathLib::isInt(expr->str())) {

result = ValueFlow::Value(MathLib::toBigNumber(expr->str()));

result.setKnown();

return true;

}

// --- Character literal ---

if (expr->tokType() == Token::eChar) {

try {

result = ValueFlow::Value(MathLib::toBigNumber(expr->str()));

result.setKnown();

return true;

} catch (...) {

return false;

}

// --- Variable: look up in state ---

// Only propagate when there is a single Known integer value (unambiguous).

if (expr->varId() > 0) {

const auto it = state.find(expr->varId());

if (it != state.end() &&

it->second.size() == 1 &&

it->second[0].isKnown() &&

it->second[0].isIntValue()) {

result = it->second[0];

return true;

}

// --- Unary minus ---

if (expr->str() == "-" && expr->astOperand1() && !expr->astOperand2()) {

ValueFlow::Value inner;

if (!evalConstInt(expr->astOperand1(), state, inner))

return false;

inner.intvalue = -inner.intvalue;

inner.varvalue = inner.intvalue;

inner.wideintvalue = inner.intvalue;

result = inner;

return true;

}

// --- Binary arithmetic ---

if (expr->astOperand1() && expr->astOperand2()) {

ValueFlow::Value lhs, rhs;

if (!evalConstInt(expr->astOperand1(), state, lhs) ||

!evalConstInt(expr->astOperand2(), state, rhs))

return false;

if (!lhs.isIntValue() || !rhs.isIntValue())

return false;

result = ValueFlow::Value(0);

result.setKnown();

const std::string& op = expr->str();

if (op == "+")

result.intvalue = lhs.intvalue + rhs.intvalue;

else if (op == "-")

result.intvalue = lhs.intvalue - rhs.intvalue;

else if (op == "*")

result.intvalue = lhs.intvalue * rhs.intvalue;

else if (op == "/" && rhs.intvalue != 0)

result.intvalue = lhs.intvalue / rhs.intvalue;

else if (op == "%" && rhs.intvalue != 0)

result.intvalue = lhs.intvalue % rhs.intvalue;

else

return false; // division/modulo by zero, or unsupported operator

result.varvalue = result.intvalue;

result.wideintvalue = result.intvalue;

return true;

}

return false;

}

// ===========================================================================

// 4b. evalConstFloat (Phase F)

// ===========================================================================

/// Try to evaluate the expression rooted at `expr` as a compile-time

/// floating-point constant. Returns true and sets `result` (with

/// valueType==FLOAT and floatValue set) on success, false otherwise.

///

/// Phase F1: constant-fold float/double expressions. Reuses the same

/// recursive structure as evalConstInt().

///

/// Handles:

/// - Float literals (3.14, 1.0e-5, …)

/// - Integer literals (promoted to double for mixed arithmetic)

/// - Variables with a single Known FLOAT value in state

/// - Unary minus (-x)

/// - Binary arithmetic (+, -, *, /)

/// - Division by zero (returns false — no value, no FP)

static bool evalConstFloat(const Token* expr, const DFState& state, ValueFlow::Value& result) {

if (!expr)

return false;

// --- Cast expression: look through to the operand ---

// Phase Cast: "(T)expr" has the same float value as expr.

// Mirrors the cast branch in evalConstInt.

if (expr->str() == "(" && expr->isCast()) {

if (expr->astOperand2())

return evalConstFloat(expr->astOperand2(), state, result);

if (expr->astOperand1())

return evalConstFloat(expr->astOperand1(), state, result);

return false;

}

// --- Float literal ---

if (expr->isNumber() && MathLib::isFloat(expr->str())) {

result = ValueFlow::Value();

result.valueType = ValueFlow::Value::ValueType::FLOAT;

result.floatValue = MathLib::toDoubleNumber(expr->str());

result.setKnown();

return true;

}

// --- Integer literal (promoted to float context) ---

if (expr->isNumber() && MathLib::isInt(expr->str())) {

result = ValueFlow::Value();

result.valueType = ValueFlow::Value::ValueType::FLOAT;

result.floatValue = static_cast<double>(MathLib::toBigNumber(expr->str()));

result.setKnown();

return true;

}

// --- Variable: look up Known FLOAT value in state ---

if (expr->varId() > 0) {

const auto it = state.find(expr->varId());

if (it != state.end() &&

it->second.size() == 1 &&

it->second[0].isKnown() &&

it->second[0].isFloatValue()) {

result = it->second[0];

return true;

}

// --- Unary minus ---

if (expr->isUnaryOp("-")) {

ValueFlow::Value inner;

if (!evalConstFloat(expr->astOperand1(), state, inner))

return false;

inner.floatValue = -inner.floatValue;

result = inner;

return true;

}

// --- Binary arithmetic ---

if (expr->isBinaryOp()) {

ValueFlow::Value lhs, rhs;

if (!evalConstFloat(expr->astOperand1(), state, lhs) ||

!evalConstFloat(expr->astOperand2(), state, rhs))

return false;

if (!lhs.isFloatValue() || !rhs.isFloatValue())

return false;

result = ValueFlow::Value();

result.valueType = ValueFlow::Value::ValueType::FLOAT;

result.setKnown();

const std::string& op = expr->str();

if (op == "+")

result.floatValue = lhs.floatValue + rhs.floatValue;

else if (op == "-")

result.floatValue = lhs.floatValue - rhs.floatValue;

else if (op == "*")

result.floatValue = lhs.floatValue * rhs.floatValue;

else if (op == "/" && !isFloatZero(rhs.floatValue))

result.floatValue = lhs.floatValue / rhs.floatValue;

else

return false; // division by zero or unsupported operator

return true;

}

return false;

}

// ===========================================================================

// 5. annotateTok / annotateMemberTok / annotateContainerTok

// ===========================================================================

// These functions write the current analysis state values onto token nodes so

// that downstream checkers (CheckNullPointer, CheckUninitVar, …) can read

// them via Token::getValue() / Token::addValue() without any modification.

// This is the output mechanism of the analysis (Requirement 2).

// Several helper predicates are defined first because annotateTok needs them

// to decide whether a particular use of a pointer is already guarded by a

// null check in the surrounding expression (e.g. "p && p->x" — do not

// annotate p->x with a possible-null value because p is guarded by the &&).

/// Returns true when the LHS subtree of a '&&' chain contains a direct

/// non-null guard for varId (the pointer variable itself).

/// Handles chained && expressions such as "s && foo" appearing as the

/// LHS of a later "&&", e.g. "(s && foo) && s->x" — s is guarded even

/// though it is not the immediate LHS of the outer &&.

/// Conservative: only recognises the simple "variable itself" form of guard.

static bool andLhsContainsVarGuard(const Token* lhs, nonneg int varId) {

if (!lhs)

return false;

// Direct non-null guard: the pointer variable itself.

if (lhs->varId() == varId && isTrackedPtrVar(lhs))

return true;

// Chained &&: recurse into both operands.

if (lhs->str() == "&&")

return andLhsContainsVarGuard(lhs->astOperand1(), varId) ||

andLhsContainsVarGuard(lhs->astOperand2(), varId);

return false;

}

// Forward declaration needed by orLhsContainsNullTest (defined below).

static bool isNullTestCondition(const Token* cond, nonneg int varId);

// Requirement: when the LHS subtree of a '||' chain contains a null-test for

// varId (e.g. "!s", "s == 0", "s == nullptr"), then the RHS of the '||' is

// only evaluated when the null-test is false, i.e. when the pointer is

// non-null. This handles chained || such as "!s || foo" appearing as the LHS

// of a later "||", e.g. "(!s || foo) || s->x".

// Conservative: delegates to isNullTestCondition for the leaf check.

static bool orLhsContainsNullTest(const Token* lhs, nonneg int varId) {

if (!lhs)

return false;

// Direct null-test in the LHS: !s, s==0, etc.

if (isNullTestCondition(lhs, varId))

return true;

// Chained ||: recurse into both operands.

if (lhs->str() == "||")

return orLhsContainsNullTest(lhs->astOperand1(), varId) ||

orLhsContainsNullTest(lhs->astOperand2(), varId);

return false;

}

/// Walk up the AST parent chain looking for a binary operator node `op`

/// ("&&" or "||") where tok is on the right-hand side and the left-hand side

/// satisfies `lhsCheck(lhs, varId)`. Returns true if such a node is found.

///

/// Used to suppress false-positive nullable annotations when the pointer is

/// already guarded by a preceding short-circuit check in the same expression

/// (Requirement 4). The `LhsCheck` template parameter avoids std::function

/// overhead; it is instantiated with andLhsContainsVarGuard or

/// orLhsContainsNullTest below.

template<typename LhsCheck>

static bool isRhsOfChainedOp(const Token* tok, const char* op, LhsCheck lhsCheck) {

if (!tok || tok->varId() == 0)

return false;

const nonneg int varId = tok->varId();

const Token* child = tok;

for (const Token* parent = tok->astParent(); parent; child = parent, parent = parent->astParent()) {

if (parent->str() != op)

continue;

if (parent->astOperand2() != child)

continue;

if (lhsCheck(parent->astOperand1(), varId))

return true;

}

return false;

}

/// Returns true when tok is on the RHS of a "&&" chain whose LHS guards

/// the same pointer variable (e.g. "s && foo && s->x" — s->x is safe).

static bool isRhsOfGuardedAndBySameVar(const Token* tok) {

return isRhsOfChainedOp(tok, "&&", andLhsContainsVarGuard);

}

/// Returns true when tok is on the RHS of a "||" chain whose LHS contains

/// a null-test for the same variable (e.g. "!s || foo || s->x" — s->x is safe).

static bool isRhsOfGuardedOrByNullTest(const Token* tok) {

return isRhsOfChainedOp(tok, "||", orLhsContainsNullTest);

}

// Forward declaration needed because isNullTestCondition and

// isNonNullTestCondition are mutually recursive.

static bool isNonNullTestCondition(const Token* cond, nonneg int varId);

/// Returns true when `cond` is a null-test of `varId` — i.e. the condition

/// is true exactly when the variable is null. Covers:

/// !p, p == 0, 0 == p, p == nullptr, nullptr == p, !(non-null-test)

static bool isNullTestCondition(const Token* cond, nonneg int varId) {

if (!cond)

return false;

if (cond->str() == "!" && cond->astOperand1() &&

cond->astOperand1()->varId() == varId && isTrackedPtrVar(cond->astOperand1()))

return true;

if (cond->str() == "==") {

const Token* lhs = cond->astOperand1();

const Token* rhs = cond->astOperand2();

if (lhs && lhs->varId() == varId && isTrackedPtrVar(lhs) && isZeroValue(rhs))

return true;

if (rhs && rhs->varId() == varId && isTrackedPtrVar(rhs) && isZeroValue(lhs))

return true;

}

// !(non-null-test) is a null-test, e.g. !(s!=0)

if (cond->str() == "!" && isNonNullTestCondition(cond->astOperand1(), varId))

return true;

return false;

}

/// Returns true when `cond` is a non-null-test of `varId` — i.e. the condition

/// is true exactly when the variable is non-null. Covers:

/// p, p != 0, 0 != p, p != nullptr, nullptr != p, !(null-test)

static bool isNonNullTestCondition(const Token* cond, nonneg int varId) {

if (!cond)

return false;

if (cond->varId() == varId && isTrackedPtrVar(cond))

return true;

if (cond->str() == "!=") {

const Token* lhs = cond->astOperand1();

const Token* rhs = cond->astOperand2();

if (lhs && lhs->varId() == varId && isTrackedPtrVar(lhs) && isZeroValue(rhs))

return true;

if (rhs && rhs->varId() == varId && isTrackedPtrVar(rhs) && isZeroValue(lhs))

return true;

}

// !(null-test) is a non-null-test, e.g. !(s==0)

if (cond->str() == "!" && isNullTestCondition(cond->astOperand1(), varId))

return true;

return false;

}

/// Returns true when tok is in the true branch of a ternary where the

/// condition is a non-null-test of the same pointer variable as tok.

/// Covers: "p ? p->x : 0", "p!=0 ? p->x : 0", "0!=p ? p->x : 0", etc.

/// In that context the pointer is known non-null, so nullable-zero annotations

/// must not be attached (otherwise CheckNullPointer gets a false positive).

static bool isInTrueBranchOfTernaryBySameVar(const Token* tok) {

if (!tok || tok->varId() == 0)

return false;

const nonneg int varId = tok->varId();

const Token* child = tok;

for (const Token* parent = tok->astParent(); parent; child = parent, parent = parent->astParent()) {

if (parent->str() != ":")

continue;

// child must be the true branch (astOperand1 of ':'), not the false branch

if (parent->astOperand1() != child)

continue;

// ':' must be the rhs of a '?'

const Token* question = parent->astParent();

if (!question || question->str() != "?")

continue;

if (isNonNullTestCondition(question->astOperand1(), varId))

return true;

}

return false;

}

/// Returns true when tok is in the false branch of a ternary where the

/// condition is a null-test of the same pointer variable as tok.

/// Covers: "!p ? 0 : p->x", "p==0 ? 0 : p->x", "0==p ? 0 : p->x", etc.

/// In that context the pointer is known non-null, so nullable-zero annotations

/// must not be attached (otherwise CheckNullPointer gets a false positive).

static bool isInFalseBranchOfNegatedTernaryBySameVar(const Token* tok) {

if (!tok || tok->varId() == 0)

return false;

const nonneg int varId = tok->varId();

const Token* child = tok;

for (const Token* parent = tok->astParent(); parent; child = parent, parent = parent->astParent()) {

if (parent->str() != ":")

continue;

// child must be the false branch (astOperand2 of ':'), not the true branch

if (parent->astOperand2() != child)

continue;

// ':' must be the rhs of a '?'

const Token* question = parent->astParent();

View remainder of file in raw view

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

FilesExpand file tree

dataflow.cpp

Latest commit

History

dataflow.cpp

File metadata and controls