From 060db71ca6094991b56e064520d83047d51fa1d3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20Silva?= Date: Wed, 26 Feb 2025 16:47:33 +0100 Subject: [PATCH] feat: userspace option --- collect_bugs.py | 6 +++++- collect_repos.py | 7 ++++++- export_bugs.py | 22 ++++++++++++++++++---- filter_bugs.py | 10 +++++++--- gitbugactions/actions/actions.py | 23 +++++++++++++++++------ gitbugactions/test_executor.py | 3 +++ 6 files changed, 56 insertions(+), 15 deletions(-) diff --git a/collect_bugs.py b/collect_bugs.py index 5628e4b2f3..3d00bf704b 100644 --- a/collect_bugs.py +++ b/collect_bugs.py @@ -439,6 +439,7 @@ def collect_bugs( strategies: Tuple[str] = ("PASS_PASS", "FAIL_PASS"), pull_requests: bool = False, base_image: str | None = None, + user_mapping: bool = True, ): """Collects bug-fixes from the repos listed in `data_path`. The result is saved on `results_path`. A file `data.json` is also created with information about @@ -459,11 +460,14 @@ def collect_bugs( The available strategies are: "PASS_PASS", "FAIL_PASS", "FAIL_FAIL", "FAIL_PASS_BUILD". pull_requests (bool, optional): If True, the commits in pull requests will be considered. Defaults to False. base_image (str, optional): Base image to use for building the runner image. If None, uses default. + user_mapping (bool, optional): Whether to include user/group ID mapping in the Dockerfile. Defaults to True. """ set_test_config(normalize_non_code_patch, strategies) Act.set_memory_limit(memory_limit) - Act(base_image=base_image) # Initialize Act with base_image + Act( + base_image=base_image, user_mapping=user_mapping + ) # Initialize Act with base_image and user_mapping github: GithubAPI = GithubAPI( per_page=100, pool_size=n_workers, diff --git a/collect_repos.py b/collect_repos.py index f5d3b0670e..4faaeb6f92 100644 --- a/collect_repos.py +++ b/collect_repos.py @@ -202,6 +202,7 @@ def collect_repos( n_workers: int = 1, out_path: str = "./out/", base_image: str | None = None, + user_mapping: bool = True, ): """Collect the repositories from GitHub that match the query and have executable GitHub Actions workflows with parsable tests. @@ -213,11 +214,15 @@ def collect_repos( n_workers (int, optional): Number of parallel workers. Defaults to 1. out_path (str, optional): Folder on which the results will be saved. Defaults to "./out/". base_image (str, optional): Base image to use for building the runner image. If None, uses default. + user_mapping (bool, optional): Whether to include user/group ID mapping in the Dockerfile. Defaults to True. """ if not Path(out_path).exists(): os.makedirs(out_path, exist_ok=True) - Act(base_image=base_image) # Initialize Act with base_image + Act( + base_image=base_image, user_mapping=user_mapping + ) # Initialize Act with base_image and user_mapping + crawler = RepoCrawler(query, pagination_freq=pagination_freq, n_workers=n_workers) crawler.get_repos(CollectReposStrategy(out_path)) diff --git a/export_bugs.py b/export_bugs.py index 586e12afb6..07bf246ed7 100644 --- a/export_bugs.py +++ b/export_bugs.py @@ -77,7 +77,12 @@ def create_exported_containers( break -def export_bug_containers(bug: Dict, export_path: str, base_image: str | None = None): +def export_bug_containers( + bug: Dict, + export_path: str, + base_image: str | None = None, + user_mapping: bool = True, +): TestExecutor.toggle_cleanup(False) repo_full_name = bug["repository"] commit_hash = bug["commit_hash"] @@ -99,6 +104,7 @@ def export_bug_containers(bug: Dict, export_path: str, base_image: str | None = act_cache_dir, default_actions, base_image=base_image, + user_mapping=user_mapping, ) runs = bug_patch.test_current_commit(executor, keep_containers=True) create_exported_containers( @@ -110,7 +116,10 @@ def export_bug_containers(bug: Dict, export_path: str, base_image: str | None = def export_bugs( - dataset_path: str, output_folder_path: str, base_image: str | None = None + dataset_path: str, + output_folder_path: str, + base_image: str | None = None, + user_mapping: bool = True, ): """Export the containers (reproducible environment) for the bug-fixes collected by collect_bugs. @@ -118,6 +127,7 @@ def export_bugs( dataset_path (str): Folder where the result of collect_bugs is. output_folder_path (str): Folder on which the results will be saved. base_image (str, optional): Base image to use for building the runner image. If None, uses default. + user_mapping (bool, optional): Whether to include user/group ID mapping in the Dockerfile. Defaults to True. """ # FIXME: export_bugs is not working with multiple workers n_workers = 1 @@ -126,7 +136,7 @@ def export_bugs( futures = [] futures_to_bug = {} - Act(base_image=base_image) + Act(base_image=base_image, user_mapping=user_mapping) for jsonl_path in os.listdir(dataset_path): if jsonl_path == "log.out" or jsonl_path == "data.json": @@ -138,7 +148,11 @@ def export_bugs( bug = json.loads(line) futures.append( executor.submit( - export_bug_containers, bug, output_folder_path, base_image + export_bug_containers, + bug, + output_folder_path, + base_image, + user_mapping, ) ) futures_to_bug[futures[-1]] = bug diff --git a/filter_bugs.py b/filter_bugs.py index 3ecffe6a1d..3215844eaa 100644 --- a/filter_bugs.py +++ b/filter_bugs.py @@ -93,19 +93,20 @@ def filter_bug( offline: bool, n_executions: int, base_image: str | None = None, + user_mapping: bool = True, ) -> str: try: repo_name = bug["repository"].replace("/", "-") bug_patch: BugPatch = BugPatch.from_dict(bug, repo_clone) diff_folder_path = os.path.join(export_path, repo_name, bug_patch.commit) - - Act(base_image=base_image) # Pass base_image to Act initialization + Act( + base_image=base_image, user_mapping=user_mapping + ) # Initialize Act with user_mapping option image_name = f"gitbugactions-run-bug:{str(uuid.uuid4())}" docker_client = DockerClient.getInstance() create_diff_image( "gitbugactions:latest", image_name, get_diff_path(diff_folder_path) ) - previous_commit_runs = [] previous_commit_with_diff_runs = [] current_commit_runs = [] @@ -198,6 +199,7 @@ def filter_bugs( offline: bool = True, n_executions: int = 5, base_image: str | None = None, + user_mapping: bool = True, ): """Creates the list of non-flaky bug-fixes that are able to be reproduced. @@ -209,6 +211,7 @@ def filter_bugs( offline (bool, optional): If the containers must be isolated from the internet. Defaults to True. n_executions (int, optional): Number of times to execute each test. Defaults to 5. base_image (str, optional): Base image to use for building the runner image. If None, uses default. + user_mapping (bool, optional): Whether to include user/group ID mapping in the Dockerfile. Defaults to True. """ ActCacheDirManager.init_act_cache_dirs(n_dirs=n_workers) executor = ThreadPoolExecutor(max_workers=n_workers) @@ -247,6 +250,7 @@ def filter_bugs( offline, n_executions, base_image, + user_mapping, ) future_to_bug[future] = bug finally: diff --git a/gitbugactions/actions/actions.py b/gitbugactions/actions/actions.py index 9c3925e703..7959b83771 100644 --- a/gitbugactions/actions/actions.py +++ b/gitbugactions/actions/actions.py @@ -244,6 +244,7 @@ def __init__( offline: bool = False, fail_strategy: ActFailureStrategy = ActTestsFailureStrategy(), base_image: str | None = None, + user_mapping: bool = True, ): """ Args: @@ -252,9 +253,10 @@ def __init__( offline (bool): Whether to run in offline mode fail_strategy (ActFailureStrategy): Strategy to determine test failures base_image (str): Base image to use for building the runner image. If None, uses default. + user_mapping (bool): Whether to include user/group ID mapping in the Dockerfile. Default is True. """ Act.__check_act() - Act.__setup_image(runner_image, base_image) + Act.__setup_image(runner_image, base_image, user_mapping) if reuse: self.flags = "--reuse" else: @@ -284,7 +286,9 @@ def __check_act(): Act.__ACT_CHECK = True @staticmethod - def __setup_image(runner_image: str, base_image: str | None = None): + def __setup_image( + runner_image: str, base_image: str | None = None, user_mapping: bool = True + ): with Act.__SETUP_LOCK: client = DockerClient.getInstance() if Act.__IMAGE_SETUP: @@ -304,10 +308,14 @@ def __setup_image(runner_image: str, base_image: str | None = None): with open("Dockerfile", "w") as f: client = DockerClient.getInstance() dockerfile = f"FROM {base}\n" - dockerfile += f"RUN sudo usermod -u 4000000 runneradmin\n" - dockerfile += f"RUN sudo groupadd -o -g {os.getgid()} {grp.getgrgid(os.getgid()).gr_name}\n" - dockerfile += f"RUN sudo usermod -G {os.getgid()} runner\n" - dockerfile += f"RUN sudo usermod -o -u {os.getuid()} runner\n" + + # Only add user mapping if requested + if user_mapping: + dockerfile += f"RUN sudo usermod -u 4000000 runneradmin\n" + dockerfile += f"RUN sudo groupadd -o -g {os.getgid()} {grp.getgrgid(os.getgid()).gr_name}\n" + dockerfile += f"RUN sudo usermod -G {os.getgid()} runner\n" + dockerfile += f"RUN sudo usermod -o -u {os.getuid()} runner\n" + f.write(dockerfile) client.images.build(path="./", tag="gitbugactions", forcerm=True) @@ -385,6 +393,7 @@ def __init__( runner_image: str = "gitbugactions:latest", offline: bool = False, base_image: str | None = None, + user_mapping: bool = True, ): self.repo_path = repo_path self.keep_containers = keep_containers @@ -394,6 +403,7 @@ def __init__( self.runner_image = runner_image self.offline = offline self.base_image = base_image + self.user_mapping = user_mapping workflows_path = os.path.join(repo_path, ".github", "workflows") for dirpath, dirnames, filenames in os.walk(workflows_path): @@ -476,6 +486,7 @@ def run_workflow( offline=self.offline, fail_strategy=act_fail_strategy, base_image=self.base_image, + user_mapping=self.user_mapping, ) return act.run_act(self.repo_path, workflow, act_cache_dir=act_cache_dir) diff --git a/gitbugactions/test_executor.py b/gitbugactions/test_executor.py index fd1d4c3ba9..51ffbfcf52 100644 --- a/gitbugactions/test_executor.py +++ b/gitbugactions/test_executor.py @@ -27,6 +27,7 @@ def __init__( default_actions: GitHubActions, runner_image: str = "gitbugactions:latest", base_image: str | None = None, + user_mapping: bool = True, ): TestExecutor.__schedule_cleanup(runner_image) self.act_cache_dir = act_cache_dir @@ -34,6 +35,7 @@ def __init__( self.runner_image = runner_image self.base_image = base_image self.language = language + self.user_mapping = user_mapping # Note: these default actions may have different configuration options # such as paths, runners, etc. self.default_actions = default_actions @@ -98,6 +100,7 @@ def run_tests( runner_image=self.runner_image, offline=offline, base_image=self.base_image, + user_mapping=self.user_mapping, ) if len(test_actions.test_workflows) == 0 and self.default_actions is not None: default_actions = True