diff --git a/README.md b/README.md index 2cffc8c5..4dfe17ba 100644 --- a/README.md +++ b/README.md @@ -24,6 +24,11 @@ The `codemodder` package is available [on PyPI](https://pypi.org/project/codemod $ pip install codemodder ``` +> **WARNING:** You should also install the semgrep optional dependency for now as it isn't deprecated yet: + ``` + $ pip install codemodder[semgrep] + ``` + To install the package from source, use `pip`: ``` diff --git a/pyproject.toml b/pyproject.toml index 091b5d98..95a82ad3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -20,7 +20,6 @@ dependencies = [ "pylint>=3.3,<3.4", "python-json-logger~=3.3.0", "PyYAML~=6.0.0", - "semgrep>=1.118,<1.119", "toml~=0.10.2", "tomlkit~=0.13.0", "wrapt~=1.17.0", @@ -50,6 +49,9 @@ generate-docs = 'codemodder.scripts.generate_docs:main' get-hashes = 'codemodder.scripts.get_hashes:main' [project.optional-dependencies] +semgrep = [ + "semgrep>=1.119,<1.120", +] test = [ "azure-ai-inference>=1.0.0b1,<2.0", "coverage>=7.8,<7.9", @@ -81,6 +83,8 @@ test = [ "flask_wtf~=1.2.0", "fickling~=0.1.0,>=0.1.3", "graphql-server~=3.0.0b7", + "unidiff>=0.7.5", + "semgrep>=1.119,<1.120", ] complexity = [ "radon==6.0.*", diff --git a/src/codemodder/codetf/common.py b/src/codemodder/codetf/common.py index 011809c0..fa1086ec 100644 --- a/src/codemodder/codetf/common.py +++ b/src/codemodder/codetf/common.py @@ -1,8 +1,9 @@ from abc import ABCMeta from enum import Enum from pathlib import Path +from typing import Optional -from pydantic import BaseModel +from pydantic import BaseModel, ConfigDict, model_validator from codemodder.logging import logger @@ -26,3 +27,60 @@ def write_report(self, outfile: Path | str) -> int: return 2 logger.debug("wrote report to %s", outfile) return 0 + + +class Rule(BaseModel): + id: str + name: str + url: Optional[str] = None + + model_config = ConfigDict(frozen=True) + + +class Finding(BaseModel): + id: str + rule: Rule + + model_config = ConfigDict(frozen=True) + + +class Action(CaseInsensitiveEnum): + ADD = "add" + REMOVE = "remove" + + +class PackageResult(CaseInsensitiveEnum): + COMPLETED = "completed" + FAILED = "failed" + SKIPPED = "skipped" + + +class DiffSide(CaseInsensitiveEnum): + LEFT = "left" + RIGHT = "right" + + +class PackageAction(BaseModel): + action: Action + result: PackageResult + package: str + + +class Change(BaseModel): + lineNumber: int + description: Optional[str] + diffSide: DiffSide = DiffSide.RIGHT + properties: Optional[dict] = None + packageActions: Optional[list[PackageAction]] = None + + @model_validator(mode="after") + def validate_lineNumber(self): + if self.lineNumber < 1: + raise ValueError("lineNumber must be greater than 0") + return self + + @model_validator(mode="after") + def validate_description(self): + if self.description is not None and not self.description: + raise ValueError("description must not be empty") + return self diff --git a/src/codemodder/codetf/v2/codetf.py b/src/codemodder/codetf/v2/codetf.py index fefb9f5d..863fe527 100644 --- a/src/codemodder/codetf/v2/codetf.py +++ b/src/codemodder/codetf/v2/codetf.py @@ -11,11 +11,21 @@ from enum import Enum from typing import TYPE_CHECKING, Optional -from pydantic import BaseModel, ConfigDict, model_validator +from pydantic import BaseModel, model_validator from codemodder import __version__ -from ..common import CaseInsensitiveEnum, CodeTFWriter +from ..common import ( + CaseInsensitiveEnum, +) +from ..common import Change as CommonChange +from ..common import ( + CodeTFWriter, +) +from ..common import Finding as CommonFinding +from ..common import ( + Rule, +) if TYPE_CHECKING: from codemodder.context import CodemodExecutionContext @@ -46,10 +56,6 @@ class PackageAction(BaseModel): class Change(BaseModel): lineNumber: int description: Optional[str] - # All of our changes are currently treated as additive, so it makes sense - # for the comments to appear on the RIGHT side of the split diff. Eventually we - # may want to differentiate between LEFT and RIGHT, but for now we'll just - # default to RIGHT. diffSide: DiffSide = DiffSide.RIGHT properties: Optional[dict] = None packageActions: Optional[list[PackageAction]] = None @@ -77,6 +83,15 @@ def with_findings(self, findings: list[Finding] | None) -> Change: fixedFindings=findings, ) + def to_common(self) -> CommonChange: + return CommonChange( + lineNumber=self.lineNumber, + description=self.description, + diffSide=self.diffSide, + properties=self.properties, + packageActions=self.packageActions, + ) + class AIMetadata(BaseModel): provider: Optional[str] = None @@ -138,20 +153,7 @@ def validate_description(self): return self -class Rule(BaseModel): - id: str - name: str - url: Optional[str] = None - - model_config = ConfigDict(frozen=True) - - -class Finding(BaseModel): - id: Optional[str] = None - rule: Rule - - model_config = ConfigDict(frozen=True) - +class Finding(CommonFinding): def to_unfixed_finding( self, *, @@ -170,7 +172,7 @@ def to_unfixed_finding( def with_rule(self, name: str, url: Optional[str]) -> Finding: return Finding( id=self.id, - rule=Rule(id=self.rule.id, name=name, url=url), + rule=Rule(id=self.rule.id, name=name, url=url) if self.rule else None, ) diff --git a/src/codemodder/codetf/v3/codetf.py b/src/codemodder/codetf/v3/codetf.py index 1ed7e50d..255b8cec 100644 --- a/src/codemodder/codetf/v3/codetf.py +++ b/src/codemodder/codetf/v3/codetf.py @@ -5,7 +5,8 @@ from pydantic import BaseModel, model_validator -from ..common import CaseInsensitiveEnum, CodeTFWriter +from ..common import Change, CodeTFWriter, Finding +from ..v2.codetf import Finding as V2Finding class Run(BaseModel): @@ -44,59 +45,6 @@ class FixStatus(BaseModel): details: Optional[str] -class Rule(BaseModel): - id: str - name: str - url: Optional[str] = None - - -class Finding(BaseModel): - id: str - rule: Optional[Rule] = None - - -class Action(CaseInsensitiveEnum): - ADD = "add" - REMOVE = "remove" - - -class PackageResult(CaseInsensitiveEnum): - COMPLETED = "completed" - FAILED = "failed" - SKIPPED = "skipped" - - -class DiffSide(CaseInsensitiveEnum): - LEFT = "left" - RIGHT = "right" - - -class PackageAction(BaseModel): - action: Action - result: PackageResult - package: str - - -class Change(BaseModel): - lineNumber: int - description: Optional[str] - diffSide: DiffSide = DiffSide.RIGHT - properties: Optional[dict] = None - packageActions: Optional[list[PackageAction]] = None - - @model_validator(mode="after") - def validate_lineNumber(self): - if self.lineNumber < 1: - raise ValueError("lineNumber must be greater than 0") - return self - - @model_validator(mode="after") - def validate_description(self): - if self.description is not None and not self.description: - raise ValueError("description must not be empty") - return self - - class ChangeSet(BaseModel): path: str diff: str @@ -158,7 +106,7 @@ class FixQuality(BaseModel): class FixResult(BaseModel): """Result corresponding to a single finding""" - finding: Finding + finding: Finding | V2Finding fixStatus: FixStatus changeSets: list[ChangeSet] fixMetadata: Optional[FixMetadata] = None diff --git a/src/codemodder/result.py b/src/codemodder/result.py index e6b473de..b5882e19 100644 --- a/src/codemodder/result.py +++ b/src/codemodder/result.py @@ -130,13 +130,19 @@ def from_sarif( id=finding_id, rule=Rule( id=rule_id, - name=rule_id, + name=cls.extract_finding_rule_name(sarif_result, sarif_run), url=cls.rule_url_from_id(sarif_result, sarif_run, rule_id), ), ), finding_msg=cls.extract_finding_message(sarif_result, sarif_run), ) + @classmethod + def extract_finding_rule_name( + cls, sarif_result: ResultModel, sarif_run: Run + ) -> str: + return cls.extract_rule_id(sarif_result, sarif_run) + @classmethod def extract_finding_message( cls, sarif_result: ResultModel, sarif_run: Run @@ -278,6 +284,18 @@ def results_for_rules(self, rule_ids: list[str]) -> list[ResultType]: ) ) + def result_by_finding_id(self, finding_id: str) -> ResultType | None: + """Returns first result matching the given finding ID.""" + return next( + ( + result + for results in self.results_for_rule.values() + for result in results + if result.finding and result.finding.id == finding_id + ), + None, + ) + def files_for_rule(self, rule_id: str) -> list[Path]: return list(self.get(rule_id, {}).keys()) diff --git a/tests/test_results.py b/tests/test_results.py index 04a18d8d..97990596 100644 --- a/tests/test_results.py +++ b/tests/test_results.py @@ -1,6 +1,8 @@ import json from pathlib import Path +from uuid import uuid4 +from codemodder.semgrep import SemgrepResultSet from core_codemods.sonar.results import SonarResultSet @@ -298,3 +300,72 @@ def test_sonar_robustness(self, tmpdir): result = SonarResultSet.from_json(sonar_json) # did not crash and returned an empty ResultSet assert not result + + def test_sonar_result_by_finding_id(self, tmpdir): + issues = { + "issues": [ + { + "rule": "python:S5659", + "status": "OPEN", + "component": "code.py", + "textRange": { + "startLine": 2, + "endLine": 2, + "startOffset": 2, + "endOffset": 2, + }, + "key": "1234", + } + ] + } + sonar_json = Path(tmpdir) / "sonar1.json" + sonar_json.write_text(json.dumps(issues)) + + result_set = SonarResultSet.from_json(sonar_json) + result = result_set.result_by_finding_id("1234") + assert result is not None + assert result.finding.rule.id == "python:S5659" + + def test_semgrep_sarif_result_by_finding_id(self, tmpdir): + uuid = str(uuid4()) + issues = { + "runs": [ + { + "tool": { + "driver": { + "name": "Semgrep", + "version": "0.100.0", + } + }, + "results": [ + { + "message": { + "text": "Found a potential issue", + }, + "guid": uuid, + "ruleId": "python:fake.rule.name", + "locations": [ + { + "physicalLocation": { + "artifactLocation": { + "uri": str(Path(tmpdir) / "code.py"), + }, + "region": { + "startLine": 2, + "startColumn": 2, + }, + } + } + ], + } + ], + } + ] + } + sarif_json = Path(tmpdir) / "semgrep.sarif" + sarif_json.write_text(json.dumps(issues)) + + result_set = SemgrepResultSet.from_sarif(sarif_json) + result = result_set.result_by_finding_id(uuid) + assert result is not None + assert result.finding.rule.id == "python:fake.rule.name"