From 060db71ca6094991b56e064520d83047d51fa1d3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Andr=C3=A9=20Silva?= <andre15andre@hotmail.com>
Date: Wed, 26 Feb 2025 16:47:33 +0100
Subject: [PATCH] feat: userspace option

---
 collect_bugs.py                  |  6 +++++-
 collect_repos.py                 |  7 ++++++-
 export_bugs.py                   | 22 ++++++++++++++++++----
 filter_bugs.py                   | 10 +++++++---
 gitbugactions/actions/actions.py | 23 +++++++++++++++++------
 gitbugactions/test_executor.py   |  3 +++
 6 files changed, 56 insertions(+), 15 deletions(-)

diff --git a/collect_bugs.py b/collect_bugs.py
index 5628e4b2f3..3d00bf704b 100644
--- a/collect_bugs.py
+++ b/collect_bugs.py
@@ -439,6 +439,7 @@ def collect_bugs(
     strategies: Tuple[str] = ("PASS_PASS", "FAIL_PASS"),
     pull_requests: bool = False,
     base_image: str | None = None,
+    user_mapping: bool = True,
 ):
     """Collects bug-fixes from the repos listed in `data_path`. The result is saved
     on `results_path`. A file `data.json` is also created with information about
@@ -459,11 +460,14 @@ def collect_bugs(
                                            The available strategies are: "PASS_PASS", "FAIL_PASS", "FAIL_FAIL", "FAIL_PASS_BUILD".
         pull_requests (bool, optional): If True, the commits in pull requests will be considered. Defaults to False.
         base_image (str, optional): Base image to use for building the runner image. If None, uses default.
+        user_mapping (bool, optional): Whether to include user/group ID mapping in the Dockerfile. Defaults to True.
     """
     set_test_config(normalize_non_code_patch, strategies)
 
     Act.set_memory_limit(memory_limit)
-    Act(base_image=base_image)  # Initialize Act with base_image
+    Act(
+        base_image=base_image, user_mapping=user_mapping
+    )  # Initialize Act with base_image and user_mapping
     github: GithubAPI = GithubAPI(
         per_page=100,
         pool_size=n_workers,
diff --git a/collect_repos.py b/collect_repos.py
index f5d3b0670e..4faaeb6f92 100644
--- a/collect_repos.py
+++ b/collect_repos.py
@@ -202,6 +202,7 @@ def collect_repos(
     n_workers: int = 1,
     out_path: str = "./out/",
     base_image: str | None = None,
+    user_mapping: bool = True,
 ):
     """Collect the repositories from GitHub that match the query and have executable
     GitHub Actions workflows with parsable tests.
@@ -213,11 +214,15 @@ def collect_repos(
         n_workers (int, optional): Number of parallel workers. Defaults to 1.
         out_path (str, optional): Folder on which the results will be saved. Defaults to "./out/".
         base_image (str, optional): Base image to use for building the runner image. If None, uses default.
+        user_mapping (bool, optional): Whether to include user/group ID mapping in the Dockerfile. Defaults to True.
     """
     if not Path(out_path).exists():
         os.makedirs(out_path, exist_ok=True)
 
-    Act(base_image=base_image)  # Initialize Act with base_image
+    Act(
+        base_image=base_image, user_mapping=user_mapping
+    )  # Initialize Act with base_image and user_mapping
+
     crawler = RepoCrawler(query, pagination_freq=pagination_freq, n_workers=n_workers)
     crawler.get_repos(CollectReposStrategy(out_path))
 
diff --git a/export_bugs.py b/export_bugs.py
index 586e12afb6..07bf246ed7 100644
--- a/export_bugs.py
+++ b/export_bugs.py
@@ -77,7 +77,12 @@ def create_exported_containers(
         break
 
 
-def export_bug_containers(bug: Dict, export_path: str, base_image: str | None = None):
+def export_bug_containers(
+    bug: Dict,
+    export_path: str,
+    base_image: str | None = None,
+    user_mapping: bool = True,
+):
     TestExecutor.toggle_cleanup(False)
     repo_full_name = bug["repository"]
     commit_hash = bug["commit_hash"]
@@ -99,6 +104,7 @@ def export_bug_containers(bug: Dict, export_path: str, base_image: str | None =
             act_cache_dir,
             default_actions,
             base_image=base_image,
+            user_mapping=user_mapping,
         )
         runs = bug_patch.test_current_commit(executor, keep_containers=True)
         create_exported_containers(
@@ -110,7 +116,10 @@ def export_bug_containers(bug: Dict, export_path: str, base_image: str | None =
 
 
 def export_bugs(
-    dataset_path: str, output_folder_path: str, base_image: str | None = None
+    dataset_path: str,
+    output_folder_path: str,
+    base_image: str | None = None,
+    user_mapping: bool = True,
 ):
     """Export the containers (reproducible environment) for the bug-fixes collected by collect_bugs.
 
@@ -118,6 +127,7 @@ def export_bugs(
         dataset_path (str): Folder where the result of collect_bugs is.
         output_folder_path (str): Folder on which the results will be saved.
         base_image (str, optional): Base image to use for building the runner image. If None, uses default.
+        user_mapping (bool, optional): Whether to include user/group ID mapping in the Dockerfile. Defaults to True.
     """
     # FIXME: export_bugs is not working with multiple workers
     n_workers = 1
@@ -126,7 +136,7 @@ def export_bugs(
     futures = []
     futures_to_bug = {}
 
-    Act(base_image=base_image)
+    Act(base_image=base_image, user_mapping=user_mapping)
 
     for jsonl_path in os.listdir(dataset_path):
         if jsonl_path == "log.out" or jsonl_path == "data.json":
@@ -138,7 +148,11 @@ def export_bugs(
                 bug = json.loads(line)
                 futures.append(
                     executor.submit(
-                        export_bug_containers, bug, output_folder_path, base_image
+                        export_bug_containers,
+                        bug,
+                        output_folder_path,
+                        base_image,
+                        user_mapping,
                     )
                 )
                 futures_to_bug[futures[-1]] = bug
diff --git a/filter_bugs.py b/filter_bugs.py
index 3ecffe6a1d..3215844eaa 100644
--- a/filter_bugs.py
+++ b/filter_bugs.py
@@ -93,19 +93,20 @@ def filter_bug(
     offline: bool,
     n_executions: int,
     base_image: str | None = None,
+    user_mapping: bool = True,
 ) -> str:
     try:
         repo_name = bug["repository"].replace("/", "-")
         bug_patch: BugPatch = BugPatch.from_dict(bug, repo_clone)
         diff_folder_path = os.path.join(export_path, repo_name, bug_patch.commit)
-
-        Act(base_image=base_image)  # Pass base_image to Act initialization
+        Act(
+            base_image=base_image, user_mapping=user_mapping
+        )  # Initialize Act with user_mapping option
         image_name = f"gitbugactions-run-bug:{str(uuid.uuid4())}"
         docker_client = DockerClient.getInstance()
         create_diff_image(
             "gitbugactions:latest", image_name, get_diff_path(diff_folder_path)
         )
-
         previous_commit_runs = []
         previous_commit_with_diff_runs = []
         current_commit_runs = []
@@ -198,6 +199,7 @@ def filter_bugs(
     offline: bool = True,
     n_executions: int = 5,
     base_image: str | None = None,
+    user_mapping: bool = True,
 ):
     """Creates the list of non-flaky bug-fixes that are able to be reproduced.
 
@@ -209,6 +211,7 @@ def filter_bugs(
         offline (bool, optional): If the containers must be isolated from the internet. Defaults to True.
         n_executions (int, optional): Number of times to execute each test. Defaults to 5.
         base_image (str, optional): Base image to use for building the runner image. If None, uses default.
+        user_mapping (bool, optional): Whether to include user/group ID mapping in the Dockerfile. Defaults to True.
     """
     ActCacheDirManager.init_act_cache_dirs(n_dirs=n_workers)
     executor = ThreadPoolExecutor(max_workers=n_workers)
@@ -247,6 +250,7 @@ def filter_bugs(
                         offline,
                         n_executions,
                         base_image,
+                        user_mapping,
                     )
                     future_to_bug[future] = bug
             finally:
diff --git a/gitbugactions/actions/actions.py b/gitbugactions/actions/actions.py
index 9c3925e703..7959b83771 100644
--- a/gitbugactions/actions/actions.py
+++ b/gitbugactions/actions/actions.py
@@ -244,6 +244,7 @@ def __init__(
         offline: bool = False,
         fail_strategy: ActFailureStrategy = ActTestsFailureStrategy(),
         base_image: str | None = None,
+        user_mapping: bool = True,
     ):
         """
         Args:
@@ -252,9 +253,10 @@ def __init__(
             offline (bool): Whether to run in offline mode
             fail_strategy (ActFailureStrategy): Strategy to determine test failures
             base_image (str): Base image to use for building the runner image. If None, uses default.
+            user_mapping (bool): Whether to include user/group ID mapping in the Dockerfile. Default is True.
         """
         Act.__check_act()
-        Act.__setup_image(runner_image, base_image)
+        Act.__setup_image(runner_image, base_image, user_mapping)
         if reuse:
             self.flags = "--reuse"
         else:
@@ -284,7 +286,9 @@ def __check_act():
         Act.__ACT_CHECK = True
 
     @staticmethod
-    def __setup_image(runner_image: str, base_image: str | None = None):
+    def __setup_image(
+        runner_image: str, base_image: str | None = None, user_mapping: bool = True
+    ):
         with Act.__SETUP_LOCK:
             client = DockerClient.getInstance()
             if Act.__IMAGE_SETUP:
@@ -304,10 +308,14 @@ def __setup_image(runner_image: str, base_image: str | None = None):
             with open("Dockerfile", "w") as f:
                 client = DockerClient.getInstance()
                 dockerfile = f"FROM {base}\n"
-                dockerfile += f"RUN sudo usermod -u 4000000 runneradmin\n"
-                dockerfile += f"RUN sudo groupadd -o -g {os.getgid()} {grp.getgrgid(os.getgid()).gr_name}\n"
-                dockerfile += f"RUN sudo usermod -G {os.getgid()} runner\n"
-                dockerfile += f"RUN sudo usermod -o -u {os.getuid()} runner\n"
+
+                # Only add user mapping if requested
+                if user_mapping:
+                    dockerfile += f"RUN sudo usermod -u 4000000 runneradmin\n"
+                    dockerfile += f"RUN sudo groupadd -o -g {os.getgid()} {grp.getgrgid(os.getgid()).gr_name}\n"
+                    dockerfile += f"RUN sudo usermod -G {os.getgid()} runner\n"
+                    dockerfile += f"RUN sudo usermod -o -u {os.getuid()} runner\n"
+
                 f.write(dockerfile)
 
             client.images.build(path="./", tag="gitbugactions", forcerm=True)
@@ -385,6 +393,7 @@ def __init__(
         runner_image: str = "gitbugactions:latest",
         offline: bool = False,
         base_image: str | None = None,
+        user_mapping: bool = True,
     ):
         self.repo_path = repo_path
         self.keep_containers = keep_containers
@@ -394,6 +403,7 @@ def __init__(
         self.runner_image = runner_image
         self.offline = offline
         self.base_image = base_image
+        self.user_mapping = user_mapping
 
         workflows_path = os.path.join(repo_path, ".github", "workflows")
         for dirpath, dirnames, filenames in os.walk(workflows_path):
@@ -476,6 +486,7 @@ def run_workflow(
             offline=self.offline,
             fail_strategy=act_fail_strategy,
             base_image=self.base_image,
+            user_mapping=self.user_mapping,
         )
         return act.run_act(self.repo_path, workflow, act_cache_dir=act_cache_dir)
 
diff --git a/gitbugactions/test_executor.py b/gitbugactions/test_executor.py
index fd1d4c3ba9..51ffbfcf52 100644
--- a/gitbugactions/test_executor.py
+++ b/gitbugactions/test_executor.py
@@ -27,6 +27,7 @@ def __init__(
         default_actions: GitHubActions,
         runner_image: str = "gitbugactions:latest",
         base_image: str | None = None,
+        user_mapping: bool = True,
     ):
         TestExecutor.__schedule_cleanup(runner_image)
         self.act_cache_dir = act_cache_dir
@@ -34,6 +35,7 @@ def __init__(
         self.runner_image = runner_image
         self.base_image = base_image
         self.language = language
+        self.user_mapping = user_mapping
         # Note: these default actions may have different configuration options
         # such as paths, runners, etc.
         self.default_actions = default_actions
@@ -98,6 +100,7 @@ def run_tests(
             runner_image=self.runner_image,
             offline=offline,
             base_image=self.base_image,
+            user_mapping=self.user_mapping,
         )
         if len(test_actions.test_workflows) == 0 and self.default_actions is not None:
             default_actions = True