diff --git a/ADVANCED_USAGE.md b/ADVANCED_USAGE.md
index 4f48eca..c0905ba 100755
--- a/ADVANCED_USAGE.md
+++ b/ADVANCED_USAGE.md
@@ -50,6 +50,7 @@ Below are all the arguments for `bigcodebench.evaluate` for the remote evaluatio
 - `--n_samples`: The number of samples, default to `1`
 - `--temperature`: The temperature, default to `0.0`
 - `--max_new_tokens`: The length of max new tokens, default to `1280`
+- `--max_model_len`: The length of max tokens in VLLM, default to `12800`
 - `--greedy`: Whether to use greedy decoding, default to `False`
 - `--strip_newlines`: Whether to strip newlines, default to `False`, set to `True` to strip newlines for some model series like StarCoder2
 - `--direct_completion`: Whether to use direct completion, default to `False`
@@ -69,7 +70,8 @@ Below are all the arguments for `bigcodebench.evaluate` for the remote evaluatio
 - `--tokenizer_legacy`: Whether to use the legacy tokenizer, default to `False`
 - `--samples`: The path to the generated samples file, default to `None`
 - `--no_execute`: Whether to not execute the samples, default to `False`
-- `--remote_execute_api`: The API endpoint for remote execution, default to `https://bigcode-bigcodebench-evaluator.hf.space/`, you can also use your own Gradio API endpoint by cloning the [bigcodebench-evaluator](https://huggingface.co/spaces/bigcode/bigcodebench-evaluator) repo and check `Use via API` at the bottom of the HF space page
+- `--e2b_endpoint`: The API endpoint for remote execution, default to `bigcodebench_evaluator`, you can also use your own E2B API endpoint by cloning the [bigcodebench-evaluator](https://huggingface.co/spaces/bigcode/bigcodebench-evaluator) repo and check `Use via API` at the bottom of the HF space page
+- `--gradio_endpoint`: The API endpoint for remote execution, default to `https://bigcode-bigcodebench-evaluator.hf.space/`, you can also use your own Gradio API endpoint by cloning the [bigcodebench-evaluator](https://huggingface.co/spaces/bigcode/bigcodebench-evaluator) repo and check `Use via API` at the bottom of the HF space page
 - `--pass_k`: The `k` in `Pass@k`, default to `[1, 5, 10]`, e.g. `--pass_k 1,5,10` will evaluate `Pass@1`, `Pass@5` and `Pass@10`
 - `--calibrated`: Whether to use the calibrated samples, default to `True`
 - `--save_pass_rate`: Whether to save the pass rate to a file, default to `True`
diff --git a/Docker/Evaluate.Dockerfile b/Docker/Evaluate.Dockerfile
index 90e7f40..8b2cdcd 100755
--- a/Docker/Evaluate.Dockerfile
+++ b/Docker/Evaluate.Dockerfile
@@ -54,7 +54,7 @@ RUN pip install \
     rich \
     accelerate \
     anthropic \
-    google-generativeai \
+    google-genai \
     mistralai \
     openai \
     e2b
diff --git a/README.md b/README.md
index 4bcea25..b5a38af 100755
--- a/README.md
+++ b/README.md
@@ -27,8 +27,8 @@
 
 <div align="center">
     <h2>🎉 Check out our latest work!<br>
-    <a href="https://swe-arena.com">🌟 SWE Arena 🌟</a><br>
-    <strong>🚀 Open Evaluation Platform on AI for Software Engineering 🚀<br>
+    <a href="https://arxiv.org/abs/2510.08697">🌟 BigCodeArena 🌟</a><br>
+    <strong>🚀 Open Evaluation Platform on AI for Vibe Coding 🚀<br>
     ✨ 100% free to use the latest frontier models! ✨</strong></h2>
 </div>
 
@@ -127,7 +127,7 @@ bigcodebench.evaluate \
   --execution [e2b|gradio|local] \
   --split [complete|instruct] \
   --subset [full|hard] \
-  --backend [vllm|openai|anthropic|google|mistral|hf]
+  --backend [vllm|openai|anthropic|google|mistral|hf|hf-inference]
 ```
 
 - All the resulted files will be stored in a folder named `bcb_results`.
@@ -177,10 +177,17 @@ Access Gemini APIs from [Google AI Studio](https://aistudio.google.com/)
 export GOOGLE_API_KEY=<your_google_api_key>
 ```
 
+Access the [Hugging Face Serverless Inference API](https://huggingface.co/docs/api-inference/en/index)
+```bash
+export HF_INFERENCE_API_KEY=<your_hf_api_key>
+```
+
+Please make sure your HF access token has the `Make calls to inference providers` permission.
+
 ## 💻 LLM-generated Code
 
 We share pre-generated code samples from LLMs we have [evaluated](https://huggingface.co/spaces/bigcode/bigcodebench-leaderboard) on the full set:
-*  See the attachment of our [v0.2.1.post7](https://github.com/bigcode-project/bigcodebench/releases/tag/v0.2.1.post7). We include `sanitized_samples_calibrated.zip` for your convenience.
+*  See the attachment of our [v0.2.4](https://github.com/bigcode-project/bigcodebench/releases/tag/v0.2.4). We include `sanitized_samples_calibrated.zip` for your convenience.
 
 ## 🧑 Advanced Usage
 
diff --git a/analysis/get_results.py b/analysis/get_results.py
index fc5aa17..641c43b 100755
--- a/analysis/get_results.py
+++ b/analysis/get_results.py
@@ -4,7 +4,7 @@
 import numpy as np
 from numpy import mean
 from glob import glob
-from utils import *
+from utils import model_info
 from tqdm import tqdm
 import pandas as pd
 import itertools
@@ -48,6 +48,8 @@ def get_results(tids):
             "moe": info["moe"],
             "size": info["size"],
             "act_param": info["act_param"],
+            "date": info.get("date", None),
+            "prefill": info.get("prefill", False),
             # "direct_complete": info["direct_complete"],
         }
         
@@ -118,12 +120,12 @@ def check_valid(results):
 
 
 def split_gen():
-    shutil.rmtree("sanitized_samples", ignore_errors=True)
     shutil.rmtree("sanitized_calibrated_samples", ignore_errors=True)
-    os.makedirs("sanitized_samples/complete", exist_ok=True)
-    os.makedirs("sanitized_samples/instruct", exist_ok=True)
-    os.makedirs("sanitized_calibrated_samples/complete", exist_ok=True)
-    os.makedirs("sanitized_calibrated_samples/instruct", exist_ok=True)
+    os.makedirs("sanitized_calibrated_samples/hard/complete", exist_ok=True)
+    os.makedirs("sanitized_calibrated_samples/hard/instruct", exist_ok=True)
+    os.makedirs("sanitized_calibrated_samples/full/complete", exist_ok=True)
+    os.makedirs("sanitized_calibrated_samples/full/instruct", exist_ok=True)
+    
     for model, info in model_info.items():
         model = model.replace("/", "--")
         files = glob(f"results/{model}--bigcodebench-*.jsonl")
@@ -131,27 +133,21 @@ def split_gen():
             model = info["link"].split("https://huggingface.co/")[-1].replace("/", "--")
         
         for file in files:
+            if "-sanitized" not in file or "calibrated" not in file:
+                continue
+                
             _, suffix = os.path.basename(file).split("--bigcodebench-")
             with open(file, "r") as f:
                 data = f.readlines()
                 
-            if "-sanitized" in file:
-                if "calibrated" in file:
-                    if info["prompted"]:
-                        if suffix.startswith("complete"):
-                            with open(f"sanitized_calibrated_samples/complete/{model}--bigcodebench-{suffix}", "w") as f:
-                                f.writelines(data)
-                        else:
-                            with open(f"sanitized_calibrated_samples/instruct/{model}--bigcodebench-{suffix}", "w") as f:
-                                f.writelines(data)
+            split_type = "hard" if "-hard-" in file else "full"
+            if info["prompted"]:
+                if suffix.startswith("complete") or suffix.startswith("hard-complete"):
+                    with open(f"sanitized_calibrated_samples/{split_type}/complete/{model}--bigcodebench-{suffix}", "w") as f:
+                        f.writelines(data)
                 else:
-                    if suffix.startswith("complete"):
-                        with open(f"sanitized_samples/complete/{model}--bigcodebench-{suffix}", "w") as f:
-                            f.writelines(data)
-                    else:
-                        with open(f"sanitized_samples/instruct/{model}--bigcodebench-{suffix}", "w") as f:
-                            f.writelines(data)
-
+                    with open(f"sanitized_calibrated_samples/{split_type}/instruct/{model}--bigcodebench-{suffix}", "w") as f:
+                        f.writelines(data)
 
 def read_task_perf(tids, task="complete"):
     model_results = dict()
@@ -255,7 +251,7 @@ def get_solve_rate(data_dict, task="complete"):
 
 def get_hf_ds(results):
     hf_dataset = {"model": [], "link": [], "moe": [], "size": [], "act_param": [], "type": [], #"lazy": [],# "direct_complete": [],
-                  "complete": [], "instruct": []}
+                  "complete": [], "instruct": [], "date": [], "prefill": []}
 
     for model, result in results.items():
         hf_dataset["model"].append(model)
@@ -267,6 +263,8 @@ def get_hf_ds(results):
         # hf_dataset["lazy"].append(result["lazy"])
         hf_dataset["complete"].append(result["pass@1"]["complete"])
         hf_dataset["instruct"].append(result["pass@1"]["instruct"])
+        hf_dataset["date"].append(result["date"])
+        hf_dataset["prefill"].append(result["prefill"])
         # hf_dataset["direct_complete"].append(result["direct_complete"])
 
     return Dataset.from_dict(hf_dataset)
@@ -302,7 +300,7 @@ def get_perf_df(data_dict):
 
     
 if __name__ == "__main__":
-    # split_gen()
+    split_gen()
     bcb_orig = load_dataset("bigcode/bigcodebench", split="v0.1.1")
     bcb_hard = load_dataset("bigcode/bigcodebench-hard", split="v0.1.1")
     bcb_config = {
diff --git a/analysis/utils.py b/analysis/utils.py
index 430e113..20ecbf5 100755
--- a/analysis/utils.py
+++ b/analysis/utils.py
@@ -7,7 +7,8 @@
         "size": 6.7,
         "act_param": 6.7,
         "open-data": "Partial",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-12-04",
     },
     "bigcode/starcoder2-15b-instruct-v0.1": {
         "name": "StarCoder2-15B-Instruct-v0.1",
@@ -17,7 +18,8 @@
         "size": 15,
         "act_param": 15,
         "open-data": "Full",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-04-30"
     },
     "bigcode/starcoder2-3b": {
         "name": "StarCoder2-3B",
@@ -27,7 +29,8 @@
         "size": 3,
         "act_param": 3,
         "open-data": "Full",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-02-29"
     },
     "bigcode/starcoder2-7b": {
         "name": "StarCoder2-7B",
@@ -37,7 +40,8 @@
         "size": 7,
         "act_param": 7,
         "open-data": "Full",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-02-29"
     },
     "bigcode/starcoder2-15b": {
         "name": "StarCoder2-15B",
@@ -47,7 +51,8 @@
         "size": 15,
         "act_param": 15,
         "open-data": "Full",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-02-29"
     },
     "Qwen/CodeQwen1.5-7B": {
         "name": "CodeQwen1.5-7B",
@@ -57,7 +62,8 @@
         "size": 7,
         "act_param": 7,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-04-16"
     },
     "google/codegemma-2b": {
         "name": "CodeGemma-2B",
@@ -67,7 +73,8 @@
         "size": 2,
         "act_param": 2,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-04-10"
     },
     "google/codegemma-7b": {
         "name": "CodeGemma-7B",
@@ -77,7 +84,8 @@
         "size": 7,
         "act_param": 7,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-04-10"
     },
     "google/codegemma-7b-it": {
         "name": "CodeGemma-7B-Instruct",
@@ -87,7 +95,8 @@
         "size": 7,
         "act_param": 7,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-04-10"
     },
     "gpt-3.5-turbo-0125": {
         "name": "GPT-3.5-Turbo-0125",
@@ -97,7 +106,8 @@
         "size": None,
         "act_param": None,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-01-25"
     },
     "gpt-4o": {
         "name": "GPT-4o-2024-05-13",
@@ -107,7 +117,8 @@
         "size": None,
         "act_param": None,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-05-13"
     },
     "gpt-4-turbo-2024-04-09": {
         "name": "GPT-4-Turbo-2024-04-09",
@@ -117,7 +128,8 @@
         "size": None,
         "act_param": None,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-04-09"
     },
     "gpt-4-0613": {
         "name": "GPT-4-0613",
@@ -127,7 +139,8 @@
         "size": None,
         "act_param": None,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-06-13"
     },
     "codellama/CodeLlama-7b-hf": {
         "name": "CodeLlama-7B-Base",
@@ -137,7 +150,8 @@
         "size": 7,
         "act_param": 7,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2023-08-25"
     },
     "codellama/CodeLlama-13b-hf": {
         "name": "CodeLlama-13B-Base",
@@ -147,7 +161,8 @@
         "size": 13,
         "act_param": 13,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2023-08-25"
     },
     "codellama/CodeLlama-7b-Instruct-hf": {
         "name": "CodeLlama-7B-Instruct",
@@ -157,7 +172,8 @@
         "size": 7,
         "act_param": 7,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2023-08-25"
     },
     "codellama/CodeLlama-13b-Instruct-hf": {
         "name": "CodeLlama-13B-Instruct",
@@ -167,7 +183,8 @@
         "size": 13,
         "act_param": 13,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2023-08-25"
     },
     "mistral-large-2402": {
         "name": "Mistral-Large-2402",
@@ -177,7 +194,8 @@
         "size": None,
         "act_param": None,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-02-26"
     },
     "mistral-small-2402": {
         "name": "Mistral-Small-2402",
@@ -187,7 +205,8 @@
         "size": None,
         "act_param": None,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-02-26"
     },
     "mistralai/Mixtral-8x22B-v0.1": {
         "name": "Mixtral-8x22B-Base",
@@ -197,7 +216,8 @@
         "size": 176,
         "act_param": 44,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-04-17"
     },
     "mistralai/Mixtral-8x22B-Instruct-v0.1": {
         "name": "Mixtral-8x22B-Instruct",
@@ -207,7 +227,8 @@
         "size": 176,
         "act_param": 44,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-04-17"
     },
     "codellama/CodeLlama-34b-hf": {
         "name": "CodeLlama-34B-Base",
@@ -217,7 +238,8 @@
         "size": 34,
         "act_param": 34,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2023-08-25"
     },
     "codellama/CodeLlama-34b-Instruct-hf": {
         "name": "CodeLlama-34B-Instruct",
@@ -227,7 +249,8 @@
         "size": 34,
         "act_param": 34,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2023-08-25"
     },
     "codellama/CodeLlama-70b-hf": {
         "name": "CodeLlama-70B-Base",
@@ -237,7 +260,8 @@
         "size": 70,
         "act_param": 70,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2023-08-25"
     },
     "codellama/CodeLlama-70b-Instruct-hf": {
         "name": "CodeLlama-70B-Instruct",
@@ -247,7 +271,8 @@
         "size": 70,
         "act_param": 70,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2023-08-25"
     },
     "Qwen/CodeQwen1.5-7B-Chat": {
         "name": "CodeQwen1.5-7B-Chat",
@@ -257,7 +282,8 @@
         "size": 7,
         "act_param": 7,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-04-16"
     },
     "Qwen/Qwen1.5-110B-Chat": {
         "name": "Qwen1.5-110B-Chat",
@@ -267,7 +293,8 @@
         "size": 110,
         "act_param": 110,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-04-26"
     },
     "Qwen/Qwen1.5-72B-Chat": {
         "name": "Qwen1.5-72B-Chat",
@@ -277,7 +304,8 @@
         "size": 72,
         "act_param": 72,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-04-26"
     },
     "Qwen/Qwen1.5-32B-Chat": {
         "name": "Qwen1.5-32B-Chat",
@@ -287,7 +315,8 @@
         "size": 32,
         "act_param": 32,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-04-26"
     },
     "deepseek-ai/DeepSeek-V2-Chat": {
         "name": "DeepSeek-V2-Chat",
@@ -297,7 +326,8 @@
         "size": 236,
         "act_param": 21,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-04-06"
     },
     "deepseek-ai/deepseek-coder-1.3b-base": {
         "name": "DeepSeek-Coder-1.3B-Base",
@@ -307,7 +337,8 @@
         "size": 1.3,
         "act_param": 1.3,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2023-10-28"
     },
     "deepseek-ai/deepseek-coder-1.3b-instruct": {
         "name": "DeepSeek-Coder-1.3B-Instruct",
@@ -317,7 +348,8 @@
         "size": 1.3,
         "act_param": 1.3,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2023-10-28"
     },
     "deepseek-ai/deepseek-coder-33b-base": {
         "name": "DeepSeek-Coder-33B-Base",
@@ -327,7 +359,8 @@
         "size": 33,
         "act_param": 33,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2023-10-28"
     },
     "deepseek-ai/deepseek-coder-33b-instruct": {
         "name": "DeepSeek-Coder-33B-Instruct",
@@ -337,7 +370,8 @@
         "size": 33,
         "act_param": 33,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2023-10-28"
     },
     "deepseek-ai/deepseek-coder-6.7b-base": {
         "name": "DeepSeek-Coder-6.7B-Base",
@@ -347,7 +381,8 @@
         "size": 6.7,
         "act_param": 6.7,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2023-10-28"
     },
     "deepseek-ai/deepseek-coder-6.7b-instruct": {
         "name": "DeepSeek-Coder-6.7B-Instruct",
@@ -357,7 +392,8 @@
         "size": 6.7,
         "act_param": 6.7,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2023-10-28"
     },
     "meta-llama/Meta-Llama-3-70B": {
         "name": "Llama-3-70B-Base",
@@ -367,7 +403,8 @@
         "size": 70,
         "act_param": 70,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-04-18"
     },
     "meta-llama/Meta-Llama-3-70B-Instruct": {
         "name": "Llama-3-70B-Instruct",
@@ -377,7 +414,8 @@
         "size": 70,
         "act_param": 70,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-04-18"
     },
     "meta-llama/Meta-Llama-3-8B": {
         "name": "Llama-3-8B-Base",
@@ -387,7 +425,8 @@
         "size": 8,
         "act_param": 8,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-04-18"
     },
     "meta-llama/Meta-Llama-3-8B-Instruct": {
         "name": "Llama-3-8B-Instruct",
@@ -397,7 +436,8 @@
         "size": 8,
         "act_param": 8,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-04-18"
     },
     "ibm-granite/granite-3b-code-instruct": {
         "name": "Granite-Code-3B-Instruct",
@@ -407,7 +447,8 @@
         "size": 3,
         "act_param": 3,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-05-06"
     },
     "ibm-granite/granite-8b-code-instruct": {
         "name": "Granite-Code-8B-Instruct",
@@ -417,7 +458,8 @@
         "size": 8,
         "act_param": 8,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-05-06"
     },
     "ibm-granite/granite-20b-code-instruct": {
         "name": "Granite-Code-20B-Instruct",
@@ -427,7 +469,8 @@
         "size": 20,
         "act_param": 20,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-05-06"
     },
     "ibm-granite/granite-34b-code-instruct": {
         "name": "Granite-Code-34B-Instruct",
@@ -437,7 +480,8 @@
         "size": 34,
         "act_param": 34,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-05-06"
     },
     "ibm-granite/granite-3b-code-base": {
         "name": "Granite-Code-3B-Base",
@@ -447,7 +491,8 @@
         "size": 3,
         "act_param": 3,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-05-06"
     },
     "ibm-granite/granite-8b-code-base": {
         "name": "Granite-Code-8B-Base",
@@ -457,7 +502,8 @@
         "size": 8,
         "act_param": 8,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-05-06"
     },
     "ibm-granite/granite-20b-code-base": {
         "name": "Granite-Code-20B-Base",
@@ -467,7 +513,8 @@
         "size": 20,
         "act_param": 20,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-05-06"
     },
     "ibm-granite/granite-34b-code-base": {
         "name": "Granite-Code-34B-Base",
@@ -477,7 +524,8 @@
         "size": 34,
         "act_param": 34,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-05-06"
     },
     "claude-3-haiku-20240307": {
         "name": "Claude-3-Haiku-20240307",
@@ -487,7 +535,8 @@
         "size": None,
         "act_param": None,
         "open-data": "None",
-        "reasoning": True,
+        "prefill": False,
+        "date": "2024-03-07"
     },
     "claude-3-sonnet-20240229": {
         "name": "Claude-3-Sonnet-20240229",
@@ -497,7 +546,8 @@
         "size": None,
         "act_param": None,
         "open-data": "None",
-        "reasoning": True,
+        "prefill": False,
+        "date": "2024-02-29"
     },
     "claude-3-opus-20240229": {
         "name": "Claude-3-Opus-20240229",
@@ -507,7 +557,8 @@
         "size": None,
         "act_param": None,
         "open-data": "None",
-        "reasoning": True,
+        "prefill": False,
+        "date": "2024-02-29"
     },
     "01-ai/Yi-1.5-34B-Chat": {
         "name": "Yi-1.5-34B-Chat",
@@ -517,7 +568,8 @@
         "size": 34,
         "act_param": 34,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-05-20"
     },
     "01-ai/Yi-1.5-34B": {
         "name": "Yi-1.5-34B",
@@ -527,7 +579,8 @@
         "size": 34,
         "act_param": 34,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-05-20"
     },
     "01-ai/Yi-1.5-9B-Chat": {
         "name": "Yi-1.5-9B-Chat",
@@ -537,7 +590,8 @@
         "size": 9,
         "act_param": 9,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-05-20"
     },
     "01-ai/Yi-1.5-9B": {
         "name": "Yi-1.5-9B",
@@ -547,7 +601,8 @@
         "size": 9,
         "act_param": 9,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-05-20"
     },
     "01-ai/Yi-1.5-6B-Chat": {
         "name": "Yi-1.5-6B-Chat",
@@ -557,7 +612,8 @@
         "size": 6,
         "act_param": 6,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-05-20"
     },
     "01-ai/Yi-1.5-6B": {
         "name": "Yi-1.5-6B",
@@ -567,7 +623,8 @@
         "size": 6,
         "act_param": 6,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-05-20"
     },
     "Qwen/Qwen2-57B-A14B": {
         "name": "Qwen2-57B-A14B",
@@ -577,7 +634,8 @@
         "size": 57,
         "act_param": 14,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-06-07"
     },
     "Qwen/Qwen2-7B-Instruct": {
         "name": "Qwen2-7B-Instruct",
@@ -587,7 +645,8 @@
         "size": 7,
         "act_param": 7,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-06-07"
     },
     "Qwen/Qwen2-72B-Chat": {
         "name": "Qwen2-72B-Chat",
@@ -597,7 +656,8 @@
         "size": 72,
         "act_param": 72,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-06-07"
     },
     "gemini-1.5-pro": {
         "name": "Gemini-1.5-Pro-API-0514",
@@ -607,7 +667,8 @@
         "size": None,
         "act_param": None,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-05-14"
     },
     "gemini-1.5-flash": {
         "name": "Gemini-1.5-Flash-API-0514",
@@ -617,7 +678,8 @@
         "size": None,
         "act_param": None,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-05-14"
     },
     "m-a-p/OpenCodeInterpreter-DS-33B": {
         "name": "OpenCodeInterpreter-DS-33B",
@@ -627,7 +689,8 @@
         "size": 33,
         "act_param": 33,
         "open-data": "Partial",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-02-22"
     },
     "m-a-p/OpenCodeInterpreter-DS-6.7B": {
         "name": "OpenCodeInterpreter-DS-6.7B",
@@ -637,7 +700,8 @@
         "size": 6.7,
         "act_param": 6.7,
         "open-data": "Partial",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-02-22"
     },
     "m-a-p/OpenCodeInterpreter-DS-1.3B": {
         "name": "OpenCodeInterpreter-DS-1.3B",
@@ -647,7 +711,8 @@
         "size": 1.3,
         "act_param": 1.3,
         "open-data": "Partial",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-02-22"
     },
     "microsoft/Phi-3-medium-128k-instruct": {
         "name": "Phi-3-Medium-128K-Instruct",
@@ -657,7 +722,8 @@
         "size": 14,
         "act_param": 14,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-05-21"
     },
     "microsoft/Phi-3-small-128k-instruct": {
         "name": "Phi-3-Small-128K-Instruct",
@@ -667,7 +733,8 @@
         "size": 7,
         "act_param": 7,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-05-21"
     },
     "codestral-2405": {
         "name": "Codestral-22B-v0.1",
@@ -677,7 +744,8 @@
         "size": 22,
         "act_param": 22,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-05-23"
     },
     "codestral-mamba-2407": {
         "name": "Codestral-Mamba",
@@ -687,7 +755,8 @@
         "size": 7,
         "act_param": 7,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-07-16"
     },
     "mistralai/Mistral-7B-Instruct-v0.3": {
         "name": "Mistral-7B-Instruct-v0.3",
@@ -697,7 +766,8 @@
         "size": 7,
         "act_param": 7,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-05-22"
     },
     "mistralai/Mistral-7B-v0.3": {
         "name": "Mistral-7B-v0.3",
@@ -707,7 +777,8 @@
         "size": 7,
         "act_param": 7,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-05-22"
     },
     "CohereForAI/c4ai-command-r-plus": {
         "name": "Command R+",
@@ -717,7 +788,8 @@
         "size": 104,
         "act_param": 104,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-04-04"
     },
     "deepseek-coder": {
         "name": "DeepSeek-Coder-V2-Instruct",
@@ -727,7 +799,8 @@
         "size": 236,
         "act_param": 21,
         "open-data": "None",
-        "reasoning": True,
+        "prefill": False,
+        "date": "2024-06-17"
     },
     "deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct": {
         "name": "DeepSeek-Coder-V2-Lite-Instruct",
@@ -737,7 +810,8 @@
         "size": 16,
         "act_param": 2.4,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-06-17"
     },
     "deepseek-ai/DeepSeek-Coder-V2-Lite-Base": {
         "name": "DeepSeek-Coder-V2-Lite-Base",
@@ -747,7 +821,8 @@
         "size": 16,
         "act_param": 2.4,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-06-17"
     },
     "claude-3-5-sonnet-20240620": {
         "name": "Claude-3.5-Sonnet-20240620",
@@ -757,7 +832,8 @@
         "size": None,
         "act_param": None,
         "open-data": "None",
-        "reasoning": True,
+        "prefill": False,
+        "date": "2024-06-20"
     },
     "NousResearch/Hermes-2-Theta-Llama-3-70B": {
         "name": "Hermes-2-Theta-Llama-3-70B",
@@ -767,7 +843,8 @@
         "size": 70,
         "act_param": 70,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-06-24"
     },
     "microsoft/wavecoder-ultra-6.7b": {
         "name": "WaveCoder-Ultra-6.7B",
@@ -777,7 +854,8 @@
         "size": 6.7,
         "act_param": 6.7,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2023-12-26"
     },
     "google/gemma-2-9b-it": {
         "name": "Gemma-2-9B-Instruct",
@@ -787,7 +865,8 @@
         "size": 9,
         "act_param": 9,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-06-19"
     },
     "Bin12345/AutoCoder": {
         "name": "AutoCoder",
@@ -797,7 +876,8 @@
         "size": 33,
         "act_param": 33,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-05-23"
     },
     "Bin12345/AutoCoder_S_6.7B": {
         "name": "AutoCoder-S-6.7B",
@@ -807,7 +887,8 @@
         "size": 6.7,
         "act_param": 6.7,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-05-23"
     },
     "Bin12345/AutoCoder_QW_7B": {
         "name": "AutoCoder-QW-7B",
@@ -817,7 +898,8 @@
         "size": 7,
         "act_param": 7,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-05-23"
     },
     "SenseLLM/ReflectionCoder-DS-33B": {
         "name": "ReflectionCoder-DS-33B",
@@ -827,7 +909,8 @@
         "size": 33,
         "act_param": 33,
         "open-data": "Partial",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-05-27"
     },
     "SenseLLM/ReflectionCoder-DS-6.7B": {
         "name": "ReflectionCoder-DS-6.7B",
@@ -837,7 +920,8 @@
         "size": 6.7,
         "act_param": 6.7,
         "open-data": "Partial",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-05-27"
     },
     "SenseLLM/ReflectionCoder-CL-34B": {
         "name": "ReflectionCoder-CL-34B",
@@ -847,7 +931,8 @@
         "size": 34,
         "act_param": 34,
         "open-data": "Partial",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-05-27"
     },
     "SenseLLM/ReflectionCoder-CL-7B": {
         "name": "ReflectionCoder-CL-7B",
@@ -857,7 +942,8 @@
         "size": 7,
         "act_param": 7,
         "open-data": "Partial",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-05-27"
     },
     "new-microsoft/Phi-3-mini-128k-instruct": {
         "name": "Phi-3.1-Mini-128K-Instruct",
@@ -867,7 +953,8 @@
         "size": 3.8,
         "act_param": 3.8,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-05-21"
     },
     "old-microsoft/Phi-3-mini-128k-instruct": {
         "name": "Phi-3-Mini-128K-Instruct",
@@ -877,7 +964,8 @@
         "size": 3.8,
         "act_param": 3.8,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-05-21"
     },
     "internlm/internlm2_5-7b-chat": {
         "name": "InternLM2.5-7B-Chat",
@@ -887,7 +975,8 @@
         "size": 7,
         "act_param": 7,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-07-03"
     },
     "NousResearch/Hermes-2-Pro-Llama-3-70B": {
         "name": "Hermes-2-Pro-Llama-3-70B",
@@ -897,7 +986,8 @@
         "size": 70,
         "act_param": 70,
         "open-data": "Partial",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-06-27"
     },
     "new-deepseek-chat": {
         "name": "DeepSeek-V2-Chat (2024-06-28)",
@@ -907,7 +997,8 @@
         "size": 236,
         "act_param": 21,
         "open-data": "None",
-        "reasoning": True,
+        "prefill": False,
+        "date": "2024-06-28"
     },
     "vllm-google/gemma-2-27b-it": {
         "name": "Gemma-2-27B-Instruct",
@@ -917,7 +1008,8 @@
         "size": 27,
         "act_param": 27,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-06-19"
     },
     "Artigenz/Artigenz-Coder-DS-6.7B": {
         "name": "Artigenz-Coder-DS-6.7B",
@@ -927,7 +1019,8 @@
         "size": 6.7,
         "act_param": 6.7,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-04-16"
     },
     "openchat/openchat-3.6-8b-20240522": {
         "name": "OpenChat-3.6-8B-20240522",
@@ -937,7 +1030,8 @@
         "size": 8,
         "act_param": 8,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-05-22"
     },
     "Phind/Phind-CodeLlama-34B-v2": {
         "name": "Phind-CodeLlama-34B-v2",
@@ -947,7 +1041,8 @@
         "size": 34,
         "act_param": 34,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2023-08-25"
     },
     "yi-large": {
         "name": "Yi-Large",
@@ -957,7 +1052,8 @@
         "size": None,
         "act_param": None,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-05-13"
     },
     "THUDM/codegeex4-all-9b": {
         "name": "CodeGeex4-All-9B",
@@ -967,7 +1063,8 @@
         "size": 9,
         "act_param": 9,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-07-05"
     },
     "gpt-4o-mini-2024-07-18": {
         "name": "GPT-4o-mini-2024-07-18",
@@ -977,7 +1074,8 @@
         "size": None,
         "act_param": None,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-07-18"
     },
     "Nexusflow/Athene-70B": {
         "name": "Athene-70B",
@@ -987,7 +1085,8 @@
         "size": 70,
         "act_param": 70,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-07-20"
     },
     "NTQAI/Nxcode-CQ-7B-orpo": {
         "name": "Nxcode-CQ-7B-Orpo",
@@ -997,7 +1096,8 @@
         "size": 7,
         "act_param": 7,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-04-25"
     },
     "migtissera/Llama-3-70B-Synthia-v3.5": {
         "name": "Llama-3-70B-Synthia-v3.5",
@@ -1007,7 +1107,8 @@
         "size": 70,
         "act_param": 70,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-05-27"
     },
     "migtissera/Tess-v2.5.2-Qwen2-72B": {
         "name": "Tess-v2.5.2-Qwen2-72B",
@@ -1017,7 +1118,8 @@
         "size": 72,
         "act_param": 72,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-07-18"
     },
     "WhiteRabbitNeo/WhiteRabbitNeo-33B-v1.5": {
         "name": "WhiteRabbitNeo-33B-v1.5",
@@ -1027,7 +1129,8 @@
         "size": 33,
         "act_param": 33,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-02-10"
     },
     "mistral-large-2407": {
         "name": "Mistral-Large-Instruct-2407",
@@ -1037,7 +1140,8 @@
         "size": 123,
         "act_param": 123,
         "open-data": "None",
-        "reasoning": True,
+        "prefill": False,
+        "date": "2024-07-24"
     },
     "meta-llama/Meta-Llama-3.1-8B-Instruct": {
         "name": "Llama-3.1-8B-Instruct",
@@ -1047,7 +1151,8 @@
         "size": 8,
         "act_param": 8,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-07-23"
     },
     "meta-llama/Meta-Llama-3.1-70B-Instruct": {
         "name": "Llama-3.1-70B-Instruct",
@@ -1057,7 +1162,8 @@
         "size": 70,
         "act_param": 70,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-07-23"
     },
     "meta--llama-3.1-405b-instruct": {
         "name": "Llama-3.1-405B-Instruct",
@@ -1067,7 +1173,8 @@
         "size": 405,
         "act_param": 405,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-07-23"
     },
     "deepseek-coder-20240724": {
         "name": "DeepSeek-Coder-V2-Instruct (2024-07-24)",
@@ -1077,7 +1184,8 @@
         "size": 236,
         "act_param": 21,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-07-24"
     },
     "microsoft/Phi-3.5-mini-instruct": {
         "name": "Phi-3.5-Mini-Instruct",
@@ -1087,7 +1195,8 @@
         "size": 3.8,
         "act_param": 3.8,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-04-23"
     },
     "nv-mistralai--mistral-nemo-12b-instruct": {
         "name": "Mistral-Nemo-12B-Instruct",
@@ -1097,7 +1206,8 @@
         "size": 12,
         "act_param": 12,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-07-18"
     },
     "wyt2000/InverseCoder-CL-13B": {
         "name": "InverseCoder-CL-13B",
@@ -1107,7 +1217,8 @@
         "size": 13,
         "act_param": 13,
         "open-data": "Partial",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-07-08"
     },
     "wyt2000/InverseCoder-CL-7B": {
         "name": "InverseCoder-CL-7B",
@@ -1117,7 +1228,8 @@
         "size": 7,
         "act_param": 7,
         "open-data": "Partial",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-07-08"
     },
     "wyt2000/InverseCoder-DS-6.7B": {
         "name": "InverseCoder-DS-6.7B",
@@ -1127,7 +1239,8 @@
         "size": 6.7,
         "act_param": 6.7,
         "open-data": "Partial",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-07-08"
     },
     "gemini-1.5-pro-exp-0801": {
         "name": "Gemini-1.5-Pro-Exp-0801",
@@ -1137,7 +1250,8 @@
         "size": None,
         "act_param": None,
         "open-data": "None",
-        "reasoning": True,
+        "prefill": False,
+        "date": "2024-08-01"
     },
     "gpt-4o-2024-08-06": {
         "name": "GPT-4o-2024-08-06",
@@ -1147,7 +1261,8 @@
         "size": None,
         "act_param": None,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-08-06"
     },
     "abacusai/Dracarys-Llama-3.1-70B-Instruct": {
         "name": "Dracarys-Llama-3.1-70B-Instruct",
@@ -1157,7 +1272,8 @@
         "size": 70,
         "act_param": 70,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-08-23"
     },
     "abacusai/Dracarys-72B-Instruct": {
         "name": "Dracarys-72B-Instruct",
@@ -1167,7 +1283,8 @@
         "size": 72,
         "act_param": 72,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-08-23"
     },
     "gemini-1.5-pro-exp-0827": {
         "name": "Gemini-1.5-Pro-Exp-0827",
@@ -1177,7 +1294,8 @@
         "size": None,
         "act_param": None,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-08-27"
     },
     "gemini-1.5-flash-exp-0827": {
         "name": "Gemini-1.5-Flash-Exp-0827",
@@ -1187,7 +1305,8 @@
         "size": None,
         "act_param": None,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-08-27"
     },
     "microsoft/Phi-3.5-mini-instruct": {
         "name": "Phi-3.5-Mini-Instruct",
@@ -1197,7 +1316,8 @@
         "size": 3.8,
         "act_param": 3.8,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-04-23"
     },
     "abacusai/Dracarys-Llama-3.1-70B-Instruct": {
         "name": "Dracarys-Llama-3.1-70B-Instruct",
@@ -1207,7 +1327,8 @@
         "size": 70,
         "act_param": 70,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-04-23"
     },
     "abacusai/Dracarys-72B-Instruct": {
         "name": "Dracarys-72B-Instruct",
@@ -1217,7 +1338,8 @@
         "size": 72,
         "act_param": 72,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-04-23"
     },
     "deepseek-coder-v2.5": {
         "name": "DeepSeek-V2.5",
@@ -1227,7 +1349,8 @@
         "size": 236,
         "act_param": 21,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-09-18"
     },
     "CohereForAI/c4ai-command-r-08-2024": {
         "name": "C4AI-Command-R-08-2024",
@@ -1237,7 +1360,8 @@
         "size": 32.3,
         "act_param": 32.3,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-08-30"
     },
     "CohereForAI/c4ai-command-r-plus-08-2024": {
         "name": "C4AI-Command-R-Plus-08-2024",
@@ -1247,7 +1371,8 @@
         "size": 104,
         "act_param": 104,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-08-30"
     },
     "ayueei--yue-coder-9b-preview": {
         "name": "Yi-Coder-9B-Chat",
@@ -1257,7 +1382,8 @@
         "size": 9,
         "act_param": 9,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-09-04"
     },
     # "mattshumer/ref_70_e3_prefill": {
     #     "name": "Reflection-Llama-3.1-70B",
@@ -1285,7 +1411,8 @@
         "size": None,
         "act_param": None,
         "open-data": "None",
-        "reasoning": True,
+        "prefill": False,
+        "date": "2024-09-12"
     },
     "o1-mini-2024-09-12": {
         "name": "o1-Mini-2024-09-12 (temperature=1)",
@@ -1295,7 +1422,8 @@
         "size": None,
         "act_param": None,
         "open-data": "None",
-        "reasoning": True,
+        "prefill": False,
+        "date": "2024-09-12"
     },
     "Qwen/Qwen2.5-Coder-1.5B-Instruct": {
         "name": "Qwen2.5-Coder-1.5B-Instruct",
@@ -1305,7 +1433,8 @@
         "size": 1.5,
         "act_param": 1.5,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-11-12"
     },
     "Qwen/Qwen2.5-Coder-7B-Instruct": {
         "name": "Qwen2.5-Coder-7B-Instruct",
@@ -1315,7 +1444,8 @@
         "size": 7,
         "act_param": 7,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-11-12"
     },
     "gemini-1.5-pro-002": {
         "name": "Gemini-1.5-Pro-002",
@@ -1325,7 +1455,8 @@
         "size": None,
         "act_param": None,
         "open-data": "None",
-        "reasoning": True,
+        "prefill": False,
+        "date": "2024-09-25"
     },
     "mistralai/Mistral-Small-Instruct-2409": {
         "name": "Mistral-Small-Instruct-2409",
@@ -1335,7 +1466,8 @@
         "size": 22.2,
         "act_param": 22.2,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-09-18"
     },
     "Qwen/Qwen2.5-0.5B-Instruct": {
         "name": "Qwen2.5-0.5B-Instruct",
@@ -1345,7 +1477,8 @@
         "size": 0.5,
         "act_param": 0.5,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-09-19"
     },
     "Qwen/Qwen2.5-1.5B-Instruct": {
         "name": "Qwen2.5-1.5B-Instruct",
@@ -1355,7 +1488,8 @@
         "size": 1.5,
         "act_param": 1.5,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-09-19"
     },
     "Qwen/Qwen2.5-7B-Instruct": {
         "name": "Qwen2.5-7B-Instruct",
@@ -1365,7 +1499,8 @@
         "size": 7,
         "act_param": 7,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-09-19"
     },
     "Qwen/Qwen2.5-14B-Instruct": {
         "name": "Qwen2.5-14B-Instruct",
@@ -1375,7 +1510,8 @@
         "size": 14,
         "act_param": 14,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-09-19"
     },
     "Qwen/Qwen2.5-32B-Instruct": {
         "name": "Qwen2.5-32B-Instruct",
@@ -1385,7 +1521,8 @@
         "size": 32,
         "act_param": 32,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-09-19"
     },
     "Qwen/Qwen2.5-72B-Instruct": {
         "name": "Qwen2.5-72B-Instruct",
@@ -1395,7 +1532,8 @@
         "size": 72,
         "act_param": 72,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-09-19"
     },
     "meta-llama/Llama-3.2-1B-Instruct": {
         "name": "Llama-3.2-1B-Instruct",
@@ -1405,7 +1543,8 @@
         "size": 1,
         "act_param": 1,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-09-25"
     },
     "meta-llama/Llama-3.2-3B-Instruct": {
         "name": "Llama-3.2-3B-Instruct",
@@ -1415,7 +1554,8 @@
         "size": 3,
         "act_param": 3,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-09-25"
     },
     "nvidia/Llama-3.1-Nemotron-70B-Instruct-HF": {
         "name": "Llama-3.1-Nemotron-70B-Instruct",
@@ -1425,7 +1565,8 @@
         "size": 70,
         "act_param": 70,
         "open-data": "Partial",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-09-25"
     },
     "claude-3-5-sonnet-20241022": {
         "name": "Claude-3.5-Sonnet-20241022",
@@ -1435,7 +1576,8 @@
         "size": None,
         "act_param": None,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-10-22"
     },
     "ibm-granite/granite-3.0-8b-instruct": {
         "name": "Granite-3.0-8B-Instruct",
@@ -1445,7 +1587,8 @@
         "size": 8,
         "act_param": 8,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-10-21"
     },
     "ibm-granite/granite-3.0-2b-instruct": {
         "name": "Granite-3.0-2B-Instruct",
@@ -1455,7 +1598,8 @@
         "size": 2,
         "act_param": 2,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-10-21"
     },
     "grok-beta--main": {
         "name": "Grok-Beta",
@@ -1465,7 +1609,8 @@
         "size": None,
         "act_param": None,
         "open-data": "None",
-        "reasoning": True,
+        "prefill": False,
+        "date": "2024-03-17"
     },
     "claude-3-5-haiku-20241022--main": {
         "name": "Claude-3.5-Haiku-20241022",
@@ -1475,7 +1620,8 @@
         "size": None,
         "act_param": None,
         "open-data": "None",
-        "reasoning": True,
+        "prefill": False,
+        "date": "2024-10-22"
     },
     "Qwen/Qwen2.5-Coder-14B-Instruct--main": {
         "name": "Qwen2.5-Coder-14B-Instruct",
@@ -1485,7 +1631,8 @@
         "size": 14,
         "act_param": 14,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-09-19"
     },
     "Qwen/Qwen2.5-Coder-32B-Instruct--main": {
         "name": "Qwen2.5-Coder-32B-Instruct",
@@ -1495,7 +1642,8 @@
         "size": 32,
         "act_param": 32,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-09-19"
     },
     "infly/OpenCoder-1.5B-Instruct--main": {
         "name": "OpenCoder-1.5B-Instruct",
@@ -1505,7 +1653,8 @@
         "size": 1.5,
         "act_param": 1.5,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-11-09"
     },
     "infly/OpenCoder-8B-Instruct--main": {
         "name": "OpenCoder-8B-Instruct",
@@ -1515,7 +1664,8 @@
         "size": 8,
         "act_param": 8,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-11-09"
     },
     "microsoft/Phi-3.5-mini-instruct--main": {
         "name": "Phi-3.5-Mini-Instruct",
@@ -1525,7 +1675,8 @@
         "size": 3.8,
         "act_param": 3.8,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-08-21"
     },
     "Nexusflow/Athene-V2-Agent--main": {
         "name": "Athene-V2-Agent",
@@ -1535,7 +1686,8 @@
         "size": 72,
         "act_param": 72,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-11-14"
     },
     "Nexusflow/Athene-V2-Chat--main": {
         "name": "Athene-V2-Chat",
@@ -1545,7 +1697,8 @@
         "size": 72,
         "act_param": 72,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-11-14"
     },
     "gemini-exp-1114--main": {
         "name": "Gemini-Exp-1114",
@@ -1555,7 +1708,8 @@
         "size": None,
         "act_param": None,
         "open-data": "None",
-        "reasoning": True,
+        "prefill": False,
+        "date": "2024-11-14"
     },
     "gpt-4o-2024-11-20--main": {
         "name": "GPT-4o-2024-11-20",
@@ -1565,7 +1719,8 @@
         "size": None,
         "act_param": None,
         "open-data": "None",
-        "reasoning": True,
+        "prefill": False,
+        "date": "2024-11-20"
     },
     "gemini-exp-1121--main": {
         "name": "Gemini-Exp-1121",
@@ -1575,7 +1730,8 @@
         "size": None,
         "act_param": None,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-11-21"
     },
     "gemini-exp-1206--main": {
         "name": "Gemini-Exp-1206",
@@ -1585,7 +1741,8 @@
         "size": None,
         "act_param": None,
         "open-data": "None",
-        "reasoning": True,
+        "prefill": False,
+        "date": "2024-12-06"
     },
     "meta-llama--Llama-3.3-70B-Instruct--main": {
         "name": "Llama-3.3-70B-Instruct",
@@ -1595,7 +1752,8 @@
         "size": 70,
         "act_param": 70,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-12-19"
     },
     "deepseek-ai--DeepSeek-V2.5-1210--main": {
         "name": "DeepSeek-V2.5-1210",
@@ -1605,7 +1763,8 @@
         "size": 236,
         "act_param": 21,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-12-10"
     },
     "gemini-2.0-flash-exp--main": {
         "name": "Gemini-2.0-Flash-Exp",
@@ -1615,7 +1774,8 @@
         "size": None,
         "act_param": None,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-12-11"
     },
     "gemini-2.0-flash-thinking-exp-1219--main": {
         "name": "Gemini-2.0-Flash-Thinking-Exp-1219",
@@ -1625,7 +1785,8 @@
         "size": None,
         "act_param": None,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-12-19"
     },
     "gemini-2.0-flash-thinking-exp-01-21--main": {
         "name": "Gemini-2.0-Flash-Thinking-Exp-01-21",
@@ -1635,7 +1796,8 @@
         "size": None,
         "act_param": None,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2025-01-21"
     },
     "o1-2024-12-17--main": {
         "name": "o1-2024-12-17 (temperature=1, reasoning=medium)",
@@ -1645,7 +1807,8 @@
         "size": None,
         "act_param": None,
         "open-data": "None",
-        "reasoning": True,
+        "prefill": False,
+        "date": "2024-12-17"
     },
     "o1-2024-12-17--low--main": {
         "name": "o1-2024-12-17 (temperature=1, reasoning=low)",
@@ -1655,7 +1818,8 @@
         "size": None,
         "act_param": None,
         "open-data": "None",
-        "reasoning": True,
+        "prefill": False,
+        "date": "2024-12-17"
     },
     "o1-2024-12-17--high--main": {
         "name": "o1-2024-12-17 (temperature=1, reasoning=high)",
@@ -1665,17 +1829,19 @@
         "size": None,
         "act_param": None,
         "open-data": "None",
-        "reasoning": True,
+        "prefill": False,
+        "date": "2024-12-17"
     },
     "deepseek-v3-chat--main": {
-        "name": "DeepSeek-V3-Chat",
-        "link": "https://huggingface.co/deepseek-ai/DeepSeek-V3-Chat",
+        "name": "DeepSeek-V3",
+        "link": "https://huggingface.co/deepseek-ai/DeepSeek-V3",
         "prompted": True,
         "moe": True,
         "size": 671,
         "act_param": 37,
         "open-data": "None",
-        "reasoning": True,
+        "prefill": False,
+        "date": "2024-12-26"
     },
     "microsoft--phi-4--main": {
         "name": "Phi-4",
@@ -1685,7 +1851,8 @@
         "size": 14.7,
         "act_param": 14.7,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-12-13"
     },
     "deepseek-reasoner--main": {
         "name": "DeepSeek-R1",
@@ -1695,7 +1862,8 @@
         "size": 671,
         "act_param": 37,
         "open-data": "None",
-        "reasoning": True,
+        "prefill": False,
+        "date": "2025-01-20"
     },
     "deepseek-ai/DeepSeek-R1-Distill-Llama-70B--main": {
         "name": "DeepSeek-R1-Distill-Llama-70B",
@@ -1705,7 +1873,8 @@
         "size": 70,
         "act_param": 70,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2025-01-20"
     },
     "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B--main": {
         "name": "DeepSeek-R1-Distill-Qwen-32B",
@@ -1715,7 +1884,8 @@
         "size": 32,
         "act_param": 32,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2025-01-20"
     },
     "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B--main": {
         "name": "DeepSeek-R1-Distill-Qwen-14B",
@@ -1725,27 +1895,30 @@
         "size": 14,
         "act_param": 14,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2025-01-20"
     },
     "deepseek-ai/DeepSeek-R1-Distill-Llama-8B--main": {
         "name": "DeepSeek-R1-Distill-Llama-8B",
         "link": "https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Llama-8B",
         "prompted": True,
         "moe": False,
-        "size": 14,
-        "act_param": 14,
+        "size": 8,
+        "act_param": 8,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2025-01-20"
     },
     "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B--main": {
         "name": "DeepSeek-R1-Distill-Qwen-7B",
         "link": "https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B",
         "prompted": True,
         "moe": False,
-        "size": 14,
-        "act_param": 14,
+        "size": 7,
+        "act_param": 7,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2025-01-20"
     },
     "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B--main": {
         "name": "DeepSeek-R1-Distill-Qwen-1.5B",
@@ -1755,7 +1928,8 @@
         "size": 1.5,
         "act_param": 1.5,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2025-01-20"
     },
     "mistralai/Mistral-Small-24B-Instruct-2501--main": {
         "name": "Mistral-Small-24B-Instruct-2501",
@@ -1765,7 +1939,8 @@
         "size": 24,
         "act_param": 24,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2025-01-31"
     },
     "o3-mini-2025-01-31--medium--main": {
         "name": "o3-mini-2025-01-31 (temperature=1, reasoning=medium)",
@@ -1775,7 +1950,8 @@
         "size": None,
         "act_param": None,
         "open-data": "None",
-        "reasoning": True,
+        "prefill": False,
+        "date": "2025-01-31"
     },
     "o3-mini-2025-01-31--low--main": {
         "name": "o3-mini-2025-01-31 (temperature=1, reasoning=low)",
@@ -1785,7 +1961,8 @@
         "size": None,
         "act_param": None,
         "open-data": "None",
-        "reasoning": True,
+        "prefill": False,
+        "date": "2025-01-31"
     },
     "o3-mini-2025-01-31--high--main": {
         "name": "o3-mini-2025-01-31 (temperature=1, reasoning=high)",
@@ -1795,7 +1972,8 @@
         "size": None,
         "act_param": None,
         "open-data": "None",
-        "reasoning": True,
+        "prefill": False,
+        "date": "2025-01-31"
     },
     "gemini-2.0-flash-001--main": {
         "name": "Gemini-2.0-Flash-001",
@@ -1805,7 +1983,8 @@
         "size": None,
         "act_param": None,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2025-02-05"
     },
     "gemini-2.0-flash-exp--main": {
         "name": "Gemini-2.0-Flash-Exp",
@@ -1815,7 +1994,8 @@
         "size": None,
         "act_param": None,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2025-02-05"
     },
     "gemini-2.0-flash-lite-preview-02-05--main": {
         "name": "Gemini-2.0-Flash-Lite-Preview-02-05",
@@ -1825,7 +2005,8 @@
         "size": None,
         "act_param": None,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2025-02-05"
     },
     "gemini-2.0-pro-exp-02-05--main": {
         "name": "Gemini-2.0-Pro-Exp-02-05",
@@ -1835,7 +2016,8 @@
         "size": None,
         "act_param": None,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2025-02-05"
     },
     "NovaSky-AI--Sky-T1-32B-Flash--main": {
         "name": "Sky-T1-32B-Flash",
@@ -1845,7 +2027,8 @@
         "size": 32,
         "act_param": 32,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2025-01-12"
     },
     "NovaSky-AI--Sky-T1-32B-Preview--main": {
         "name": "Sky-T1-32B-Preview",
@@ -1855,7 +2038,8 @@
         "size": 32,
         "act_param": 32,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2025-01-12"
     },
     "Qwen--QwQ-32B-Preview--main": {
         "name": "QwQ-32B-Preview",
@@ -1865,6 +2049,205 @@
         "size": 32,
         "act_param": 32,
         "open-data": "None",
-        "reasoning": False,
+        "prefill": True,
+        "date": "2024-11-28"
+    },
+    "claude-3-7-sonnet-20250219--main": {
+        "name": "Claude-3-Haiku-20240307",
+        "link": "https://www.anthropic.com/news/claude-3-family",
+        "prompted": True,
+        "moe": False,
+        "size": None,
+        "act_param": None,
+        "open-data": "None",
+        "prefill": False,
+        "date": "2025-02-19"
+    },
+    "chatgpt-4o-latest--main": {
+        "name": "ChatGPT-4o-latest-20250129",
+        "link": "https://chat.openai.com/",
+        "open-data": "None",
+        "prompted": True,
+        "moe": False,
+        "size": None,
+        "act_param": None,
+        "prefill": False,
+        "date": "2025-01-29"
+    },
+    "Kwaipilot--KwaiCoder-23B-A4B-v1--main": {
+        "name": "KwaiCoder-23B-A4B-v1",
+        "link": "https://huggingface.co/Kwaipilot/KwaiCoder-23B-A4B-v1",
+        "open-data": "None",
+        "prompted": False,
+        "moe": True,
+        "size": 23,
+        "act_param": 4,
+        "prefill": True,
+        "date": "2025-01-25"
+    },
+    "qwen-max-latest--main": {
+        "name": "Qwen2.5-Max",
+        "link": "https://qwenlm.github.io/blog/qwen2.5-max/",
+        "open-data": "None",
+        "prompted": True,
+        "moe": True,
+        "size": None,
+        "act_param": None,
+        "prefill": False,
+        "date": "2025-01-28"
+    },
+    "claude-3-7-sonnet-20250219--3200-output-128k-2025-02-19--main": {
+        "name": "Claude-3.7-Sonnet-20250219 (temperature=1, length=12800, reasoning=3200)",
+        "link": "https://www.anthropic.com/news/claude-3-7-sonnet",
+        "prompted": True,
+        "moe": False,
+        "size": None,
+        "act_param": None,
+        "open-data": "None",
+        "prefill": False,
+        "date": "2025-02-19"
+    },
+    "claude-3-7-sonnet-20250219--main": {
+        "name": "Claude-3.7-Sonnet-20250219",
+        "link": "https://www.anthropic.com/news/claude-3-7-sonnet",
+        "prompted": True,
+        "moe": False,
+        "size": None,
+        "act_param": None,
+        "open-data": "None",
+        "prefill": False,
+        "date": "2025-02-19"
+    },
+    "WarriorCoder-6.7B--main": {
+        "name": "WarriorCoder-6.7B (Reproduced)",
+        "link": "https://arxiv.org/abs/2412.17395",
+        "open-data": "None",
+        "prompted": True,
+        "moe": False,
+        "size": 6.7,
+        "act_param": 6.7,
+        "open-data": "None",
+        "prefill": True,
+        "date": "2025-02-18"
+    },
+    "google--gemma-3-27b-it--main": {
+        "name": "Gemma-3-27B-Instruct",
+        "link": "https://huggingface.co/google/gemma-3-27b-it",
+        "open-data": "None",
+        "prompted": True,
+        "moe": False,
+        "size": 27,
+        "act_param": 27,
+        "open-data": "None",
+        "prefill": True,
+        "date": "2025-03-12"
+    },
+    "Qwen--QwQ-32B--skip_prefill--main": {
+        "name": "QwQ-32B (w/ Reasoning)",
+        "link": "https://huggingface.co/Qwen/QwQ-32B",
+        "open-data": "None",
+        "prompted": True,
+        "moe": False,
+        "size": 32,
+        "act_param": 32,
+        "open-data": "None",
+        "prefill": False,
+        "date": "2025-03-06"
+    },
+    "deepseek-chat-0324--main": {
+        "name": "DeepSeek-V3-0324",
+        "link": "https://huggingface.co/deepseek-ai/DeepSeek-V3-0324",
+        "open-data": "None",
+        "prompted": True,
+        "moe": True,
+        "size": 671,
+        "act_param": 37,
+        "open-data": "None",
+        "prefill": False,
+        "date": "2025-03-24"
+    },
+    "gemini-2.5-pro-exp-03-25--main": {
+        "name": "Gemini-2.5-Pro-Exp-03-25",
+        "link": "https://blog.google/technology/google-deepmind/gemini-model-thinking-updates-march-2025/",
+        "open-data": "None",
+        "prompted": True,
+        "moe": False,
+        "size": None,
+        "act_param": 37,
+        "open-data": "None",
+        "prefill": False,
+        "date": "2025-03-25"
+    },
+    "meta/llama-4-scout-17b-16e-instruct--main": {
+        "name": "Llama-4-Scout",
+        "link": "https://huggingface.co/meta-llama/Llama-4-Scout-17B-16E-Instruct",
+        "open-data": "None",
+        "prompted": True,
+        "moe": True,
+        "size": 109,
+        "act_param": 17,
+        "open-data": "None",
+        "prefill": False,
+        "date": "2025-04-05"
+    },
+    "meta/llama-4-maverick-17b-128e-instruct--main": {
+        "name": "Llama-4-Maverick",
+        "link": "https://huggingface.co/meta-llama/Llama-4-Maverick-17B-128E-Instruct",
+        "open-data": "None",
+        "prompted": True,
+        "moe": True,
+        "size": 109,
+        "act_param": 17,
+        "open-data": "None",
+        "prefill": False,
+        "date": "2025-04-05"
     },
-}
+    "agentica-org/DeepCoder-14B-Preview--main": {
+        "name": "DeepCoder-14B-Preview",
+        "link": "https://huggingface.co/agentica-org/DeepCoder-14B-Preview",
+        "open-data": "None",
+        "prompted": True,
+        "moe": True,
+        "size": 14,
+        "act_param": 14,
+        "open-data": "None",
+        "prefill": True,
+        "date": "2025-04-09"
+    },
+    "openrouter/quasar-alpha--main": {
+        "name": "Quasar-Alpha",
+        "link": "https://openrouter.ai/openrouter/quasar-alpha",
+        "open-data": "None",
+        "prompted": True,
+        "moe": True,
+        "size": None,
+        "act_param": None,
+        "open-data": "None",
+        "prefill": False,
+        "date": "2025-04-02"
+    },
+    "agentica-org/DeepCoder-14B-Preview--skip_prefill--main": {
+        "name": "DeepCoder-14B-Preview (w/ Reasoning, 64k tokens, temperature=0.6)",
+        "link": "https://huggingface.co/agentica-org/DeepCoder-14B-Preview",
+        "open-data": "None",
+        "prompted": True,
+        "moe": False,
+        "size": 14,
+        "act_param": 14,
+        "open-data": "None",
+        "prefill": False,
+        "date": "2025-04-09"
+    },
+    "openrouter/optimus-alpha--main": {
+        "name": "Optimus-Alpha",
+        "link": "https://openrouter.ai/openrouter/optimus-alpha",
+        "open-data": "None",
+        "prompted": True,
+        "moe": True,
+        "size": None,
+        "act_param": None,
+        "open-data": "None",
+        "prefill": False,
+        "date": "2025-04-10"
+    }
+}
\ No newline at end of file
diff --git a/bigcodebench/data/bigcodebench.py b/bigcodebench/data/bigcodebench.py
index 26090f1..87cfdf6 100644
--- a/bigcodebench/data/bigcodebench.py
+++ b/bigcodebench/data/bigcodebench.py
@@ -14,7 +14,7 @@
 
 BIGCODEBENCH_OVERRIDE_PATH = os.environ.get("BIGCODEBENCH_OVERRIDE_PATH", None)
 BIGCODEBENCH_HF = "bigcode/bigcodebench"
-BIGCODEBENCH_VERSION = "v0.1.3"
+BIGCODEBENCH_VERSION = "v0.1.4"
 
 def _ready_bigcodebench_path(subset="full", version="default") -> str:
     if BIGCODEBENCH_OVERRIDE_PATH:
diff --git a/bigcodebench/evaluate.py b/bigcodebench/evaluate.py
index 9e1fd45..5122a89 100644
--- a/bigcodebench/evaluate.py
+++ b/bigcodebench/evaluate.py
@@ -189,12 +189,19 @@ def evaluate(
         
         # run the evaluation
         print(f"Command run in sandbox {e2b_endpoint}")
-        sandbox.commands.run("bigcodebench.evaluate  --execution 'local' "
-                        f"--split {split} --subset {subset} --samples {samples} "
-                        f"--pass_k {pass_k} --save_pass_rate {save_pass_rate} --calibrated {calibrated} "
-                        f"--parallel {parallel} --selective_evaluate {selective_evaluate} --min_time_limit {min_time_limit} "
+        command = "bigcodebench.evaluate  --execution 'local' "\
+                        f"--split {split} --subset {subset} --samples {samples} "\
+                        f"--pass_k {pass_k} --save_pass_rate {save_pass_rate} --calibrated {calibrated} "\
+                        f"--parallel {parallel} --selective_evaluate {selective_evaluate} --min_time_limit {min_time_limit} "\
                         f"--max_as_limit {max_as_limit} --max_data_limit {max_data_limit} --max_stack_limit {max_stack_limit} "
-                        f"--check_gt_only {check_gt_only} --no_gt {no_gt}", on_stderr=lambda x: print(x), on_stdout=lambda x: print(x), timeout=60*50)
+        
+        if  check_gt_only:
+            command += f"--check_gt_only "
+        if no_gt:
+            command += f"--no_gt "
+        if no_execute:
+            command += f"--no_execute "
+        sandbox.commands.run(command, on_stdout=lambda x: print(x), on_stderr=lambda x: print(x), timeout=60*60)
         
         if not check_gt_only:
             # download the results
@@ -205,9 +212,17 @@ def evaluate(
     else:
         
         pass_at_k = dict()
-        passk = list(pass_k)
+
+        if isinstance(pass_k, str):
+            passk = [int(k) for k in pass_k.split(",") if k.strip()]
+        elif isinstance(pass_k, int):
+            passk = [pass_k]
+        elif isinstance(pass_k, (list, tuple)):
+            passk = list(pass_k)
+        else:
+            raise ValueError(f"Invalid type for pass_k: {type(pass_k)}")
         
-        if isinstance(selective_evaluate, str):
+        if selective_evaluate and isinstance(selective_evaluate, str):
             selected_ids = set(selective_evaluate.split(","))
         else:
             try:
@@ -311,14 +326,13 @@ def evaluate(
                     assert len(completion_id) == len(problems), f"Missing problems in samples. Expected {len(problems)} problems, got {len(completion_id)}"
 
                     def stucking_checker():
-                        while remainings:
-                            last_size = len(remainings)
-                            time.sleep(240)
-                            if last_size != len(remainings) or len(remainings) == 0:
-                                continue
-                            # Potential stucking
-                            warn("No samples had finished testing in the last 240s")
-                            warn(f"{len(remainings)} samples to be tested: {remainings}")
+                        not_done = futures
+                        while len(not_done) > 0:
+                            done, not_done = wait(not_done, timeout=240, return_when=FIRST_COMPLETED)
+
+                            if len(done) == 0:
+                                warn("No samples have finished testing in the last 240s")
+                                warn(f"{len(remainings)} samples to be tested: {remainings}")
 
                     threading.Thread(target=stucking_checker).start()
 
diff --git a/bigcodebench/gen/util/anthropic_request.py b/bigcodebench/gen/util/anthropic_request.py
index e53feab..f6d18fd 100644
--- a/bigcodebench/gen/util/anthropic_request.py
+++ b/bigcodebench/gen/util/anthropic_request.py
@@ -16,7 +16,19 @@ def make_auto_request(client: anthropic.Client, *args, **kwargs) -> Message:
         try:
             signal.signal(signal.SIGALRM, handler)
             signal.alarm(100)
-            ret = client.messages.create(*args, **kwargs)
+            if "reasoning_budget" in kwargs and "reasoning_beta" in kwargs:
+                kwargs["thinking"] = {
+                    "type": "enabled",
+                    "budget_tokens": kwargs["reasoning_budget"],
+                }
+                kwargs["betas"] = [kwargs["reasoning_beta"]]
+                kwargs.pop("reasoning_budget")
+                kwargs.pop("reasoning_beta")
+                kwargs.pop("temperature")
+            if "thinking" in kwargs:
+                ret = client.beta.messages.create(*args, **kwargs, stream=True)
+            else:
+                ret = client.messages.create(*args, **kwargs)
             signal.alarm(0)
         except anthropic.RateLimitError:
             print("Rate limit exceeded. Waiting...")
diff --git a/bigcodebench/gen/util/google_request.py b/bigcodebench/gen/util/google_request.py
index 9e13607..5a76362 100644
--- a/bigcodebench/gen/util/google_request.py
+++ b/bigcodebench/gen/util/google_request.py
@@ -1,11 +1,12 @@
 import time
 
-import google.generativeai as genai
+from google import genai
 from google.api_core.exceptions import GoogleAPICallError, ResourceExhausted
 
 
 def make_request(
-    client: genai.GenerativeModel,
+    model: str,
+    client: genai.Client,
     message: str,
     temperature: float,
     n: int,
@@ -13,21 +14,34 @@ def make_request(
 ) -> genai.types.GenerateContentResponse:
     kwargs = {"temperature": temperature, "max_output_tokens": max_new_tokens}
 
-    if "-thinking-" in client.model_name:
+    if "-thinking-" in model:
         kwargs.pop("max_output_tokens")
-
-    response = client.generate_content(
-        [{"role": "user", "parts": [message]}],
-        generation_config=genai.types.GenerationConfig(
+    
+    response = client.models.generate_content(
+        model=model,
+        contents=message,
+        config=genai.types.GenerateContentConfig(
             candidate_count=n,
+            safety_settings=[
+                genai.types.SafetySetting(
+                    category='HARM_CATEGORY_DANGEROUS_CONTENT',
+                    threshold='BLOCK_NONE'
+                ),
+                genai.types.SafetySetting(
+                    category='HARM_CATEGORY_SEXUALLY_EXPLICIT',
+                    threshold='BLOCK_NONE'
+                ),
+                genai.types.SafetySetting(
+                    category='HARM_CATEGORY_HATE_SPEECH',
+                    threshold='BLOCK_NONE'
+                ),
+                genai.types.SafetySetting(
+                    category='HARM_CATEGORY_HARASSMENT',
+                    threshold='BLOCK_NONE'
+                ),
+            ],
             **kwargs
-        ),
-        safety_settings=[
-            {"category": "HARM_CATEGORY_DANGEROUS_CONTENT", "threshold": "BLOCK_NONE"},
-            {"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT", "threshold": "BLOCK_NONE"},
-            {"category": "HARM_CATEGORY_HATE_SPEECH", "threshold": "BLOCK_NONE"},
-            {"category": "HARM_CATEGORY_HARASSMENT", "threshold": "BLOCK_NONE"},
-        ],
+        ),            
     )
 
     return response
diff --git a/bigcodebench/gen/util/hf_inference_request.py b/bigcodebench/gen/util/hf_inference_request.py
new file mode 100644
index 0000000..fe4aaf3
--- /dev/null
+++ b/bigcodebench/gen/util/hf_inference_request.py
@@ -0,0 +1,34 @@
+import time
+
+from huggingface_hub import InferenceClient
+from huggingface_hub.inference._generated.types import TextGenerationOutput
+
+
+def make_request(
+    client: InferenceClient,
+    message: str,
+    model: str,
+    temperature: float,
+    n: int,
+    max_new_tokens: int = 2048,
+) -> TextGenerationOutput:
+    response = client.text_generation(
+        model=model,
+        prompt=message,
+        do_sample=False,
+        max_new_tokens=max_new_tokens,
+    )
+
+    return response
+
+
+def make_auto_request(*args, **kwargs) -> TextGenerationOutput:
+    ret = None
+    while ret is None:
+        try:
+            ret = make_request(*args, **kwargs)
+        except Exception as e:
+            print("Unknown error. Waiting...")
+            print(e)
+            time.sleep(1)
+    return ret
diff --git a/bigcodebench/gen/util/openai_request.py b/bigcodebench/gen/util/openai_request.py
index f8db3f5..3c8b741 100644
--- a/bigcodebench/gen/util/openai_request.py
+++ b/bigcodebench/gen/util/openai_request.py
@@ -17,7 +17,7 @@ def make_request(
     kwargs["top_p"] = 0.95
     kwargs["max_completion_tokens"] = max_tokens
     kwargs["temperature"] = temperature
-    if model.startswith("o1-") or model.startswith("o3-") or model.endswith("-reasoner"):  # pop top-p and max_completion_tokens
+    if any(model.startswith(m) or model.endswith(m) for m in ["o1-", "o3-", "reasoner", "grok-3-mini-beta"]):  # pop top-p and max_completion_tokens
         kwargs.pop("top_p")
         kwargs.pop("max_completion_tokens")
         kwargs.pop("temperature")
diff --git a/bigcodebench/generate.py b/bigcodebench/generate.py
index bcf1463..adbf892 100644
--- a/bigcodebench/generate.py
+++ b/bigcodebench/generate.py
@@ -127,12 +127,19 @@ def run_codegen(
     split: str,
     subset: str,
     root: str = "bcb_results",
+    lora_path: str = None,
     bs: Optional[int] = None,
     n_samples: int = 1,
     temperature: float = 0.0,
     max_new_tokens: int = 1280,
+    # vllm
+    max_model_len: int = 12800,
     greedy: bool = False,
+    # openai
     reasoning_effort: str = "medium",
+    # anthropic
+    reasoning_budget: int = 0,
+    reasoning_beta: str = "output-128k-2025-02-19",
     strip_newlines: bool = False,
     direct_completion: bool = False,
     resume: bool = True,
@@ -170,9 +177,13 @@ def run_codegen(
         backend=backend,
         subset=subset,
         split=split,
+        lora_path=lora_path,
         temperature=temperature,
         max_new_tokens=max_new_tokens,
+        max_model_len=max_model_len,
         reasoning_effort=reasoning_effort,
+        reasoning_budget=reasoning_budget,
+        reasoning_beta=reasoning_beta,
         instruction_prefix=instruction_prefix,
         response_prefix=response_prefix,
         prefill=not skip_prefill,
@@ -186,9 +197,15 @@ def run_codegen(
     )
     
     extra = "-" + subset if subset != "full" else ""
-    if reasoning_effort and model.startswith("o1-") or model.startswith("o3-") or model.endswith("-reasoner"):
+    if backend == "openai" and reasoning_effort and any(model.startswith(m) or model.endswith(m) for m in ["o1-", "o3-", "reasoner", "grok-3-mini-beta"]):
         model = model + f"--{reasoning_effort}"
-
+    
+    if lora_path:
+        model = model + f"--lora-{lora_path}"
+    
+    if backend == "anthropic" and reasoning_budget and reasoning_beta:
+        model = model + f"--{reasoning_budget}-{reasoning_beta}"
+    
     if skip_prefill:
         identifier = model.replace("/", "--") + "--skip_prefill" + f"--{revision}--bigcodebench{extra}-{split}--{backend}-{temperature}-{n_samples}-sanitized_calibrated.jsonl"
     else:
diff --git a/bigcodebench/provider/__init__.py b/bigcodebench/provider/__init__.py
index dbadfd4..4cb3410 100644
--- a/bigcodebench/provider/__init__.py
+++ b/bigcodebench/provider/__init__.py
@@ -6,11 +6,16 @@ def make_model(
     backend: str,
     subset: str,
     split: str,
+    lora_path: str = None,
     dataset: str = "bigcodebench",
     temperature: float = 0.0,
     max_new_tokens: int = 1280,
-    # o1 and o3 only
+    max_model_len: int = 12800,
+    # openai only
     reasoning_effort: str = "medium",
+    # anthropic only
+    reasoning_budget: int = 0,
+    reasoning_beta: str = "output-128k-2025-02-19",
     # instruction model only
     instruction_prefix: str = None,
     response_prefix: str = None,
@@ -35,8 +40,10 @@ def make_model(
             name=model,
             subset=subset,
             split=split,
+            lora_path=lora_path,
             temperature=temperature,
             max_new_tokens=max_new_tokens,
+            max_model_len=max_model_len,
             revision=revision,
             dataset=dataset,
             direct_completion=direct_completion,
@@ -55,6 +62,7 @@ def make_model(
             name=model,
             subset=subset,
             split=split,
+            lora_path=lora_path,
             temperature=temperature,
             max_new_tokens=max_new_tokens,
             revision=revision,
@@ -68,6 +76,19 @@ def make_model(
             tokenizer_name=tokenizer_name,
             tokenizer_legacy=tokenizer_legacy,
         )
+    elif backend == "hf-inference":
+        from bigcodebench.provider.hf_inference import HuggingFaceInferenceDecoder
+
+        return HuggingFaceInferenceDecoder(
+            name=model,
+            subset=subset,
+            split=split,
+            temperature=temperature,
+            max_new_tokens=max_new_tokens,
+            direct_completion=direct_completion,
+            instruction_prefix=instruction_prefix,
+            response_prefix=response_prefix,
+        )
     elif backend == "openai":
         from bigcodebench.provider.openai import OpenAIChatDecoder
 
@@ -105,6 +126,8 @@ def make_model(
             split=split,
             temperature=temperature,
             max_new_tokens=max_new_tokens,
+            reasoning_budget=reasoning_budget,
+            reasoning_beta=reasoning_beta,
             instruction_prefix=instruction_prefix,
             response_prefix=response_prefix,
         )
diff --git a/bigcodebench/provider/anthropic.py b/bigcodebench/provider/anthropic.py
index 1969e0c..b4a7e43 100644
--- a/bigcodebench/provider/anthropic.py
+++ b/bigcodebench/provider/anthropic.py
@@ -9,9 +9,11 @@
 from bigcodebench.provider.utility import make_raw_chat_prompt
 
 class AnthropicDecoder(DecoderBase):
-    def __init__(self, name: str, **kwargs) -> None:
+    def __init__(self, name: str, reasoning_budget: int = 0, reasoning_beta: str = "output-128k-2025-02-19", **kwargs) -> None:
         super().__init__(name, **kwargs)
         self.client = anthropic.Anthropic(api_key=os.getenv("ANTHROPIC_KEY"))
+        self.reasoning_budget = reasoning_budget
+        self.reasoning_beta = reasoning_beta
 
     def codegen(
         self, prompts: List[str], do_sample: bool = True, num_samples: int = 200
@@ -43,8 +45,20 @@ def codegen(
                     max_tokens=self.max_new_tokens,
                     temperature=self.temperature,
                     stop_sequences=self.eos,
+                    reasoning_budget=self.reasoning_budget,
+                    reasoning_beta=self.reasoning_beta,
                 )
-                outputs.append(ret.content[0].text)
+                if isinstance(ret, anthropic.Stream):
+                    output = ""
+                    for chunk in ret:
+                        if chunk.type == "content_block_delta":
+                            # if chunk.delta.type == "thinking_delta":
+                            #     output += chunk.delta.thinking
+                            if chunk.delta.type == "text_delta":
+                                output += chunk.delta.text
+                    outputs.append(output)
+                else:
+                    outputs.append(ret.content[0].text)
             all_outputs.append(outputs)
         return all_outputs
 
diff --git a/bigcodebench/provider/google.py b/bigcodebench/provider/google.py
index 2194c47..e3b18ff 100644
--- a/bigcodebench/provider/google.py
+++ b/bigcodebench/provider/google.py
@@ -2,7 +2,7 @@
 from typing import List
 from tqdm import tqdm
 
-import google.generativeai as genai
+from google import genai
 
 from bigcodebench.provider.base import DecoderBase
 from bigcodebench.gen.util.google_request import make_auto_request
@@ -12,8 +12,8 @@
 class GoogleDecoder(DecoderBase):
     def __init__(self, name: str, **kwargs):
         super().__init__(name, **kwargs)
-        genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
-        self.client = genai.GenerativeModel(name)
+        self.model = name
+        self.client = genai.Client(api_key=os.getenv("GOOGLE_API_KEY"))
 
     def codegen(
         self, prompts: List[str], do_sample: bool = True, num_samples: int = 200
@@ -34,7 +34,8 @@ def codegen(
                 tokenizer=None,
             )
             ret = make_auto_request(
-                self.client,
+                model=self.model,
+                client=self.client,
                 message=message,
                 n=num_samples,
                 temperature=self.temperature,
diff --git a/bigcodebench/provider/hf.py b/bigcodebench/provider/hf.py
index a85957d..fbe50e5 100644
--- a/bigcodebench/provider/hf.py
+++ b/bigcodebench/provider/hf.py
@@ -41,7 +41,8 @@ def __init__(
         if self.is_direct_completion():  # no chat template
             self.eos += extra_eos_for_direct_completion(dataset)
         else:  # with chat template
-            self.eos += ["\n```\n"]
+            if self.prefill and "```" in self.response_prefix:
+                self.eos += ["\n```\n"]
 
         print(f"{self.eos = }")
         self.model = AutoModelForCausalLM.from_pretrained(name, **kwargs)
diff --git a/bigcodebench/provider/hf_inference.py b/bigcodebench/provider/hf_inference.py
new file mode 100644
index 0000000..1737448
--- /dev/null
+++ b/bigcodebench/provider/hf_inference.py
@@ -0,0 +1,54 @@
+import os
+from typing import List
+from tqdm import tqdm
+
+from huggingface_hub import InferenceClient
+
+from bigcodebench.provider.base import DecoderBase
+from bigcodebench.gen.util.hf_inference_request import make_auto_request
+from bigcodebench.provider.utility import make_raw_chat_prompt
+
+
+class HuggingFaceInferenceDecoder(DecoderBase):
+    def __init__(self, name: str, **kwargs):
+        super().__init__(name, **kwargs)
+        self.client = InferenceClient(
+            provider="hf-inference", api_key=os.getenv("HF_INFERENCE_API_KEY")
+        )
+
+    def codegen(
+        self, prompts: List[str], do_sample: bool = True, num_samples: int = 200
+    ) -> List[str]:
+        if do_sample:
+            assert self.temperature > 0, "Temperature must be positive for sampling"
+
+        all_outputs = []
+
+        for prompt in tqdm(prompts):
+            outputs = []
+            message = (
+                prompt
+                if self.is_direct_completion()
+                else make_raw_chat_prompt(
+                    task_prompt=prompt,
+                    subset=self.subset,
+                    split=self.split,
+                    instruction_prefix=self.instruction_prefix,
+                    response_prefix=self.response_prefix,
+                    tokenizer=None,
+                )
+            )
+            ret = make_auto_request(
+                self.client,
+                message=message,
+                model=self.name,
+                n=num_samples,
+                temperature=self.temperature,
+                max_new_tokens=self.max_new_tokens,
+            )
+            outputs.append(ret)
+            all_outputs.append(outputs)
+        return all_outputs
+
+    def is_direct_completion(self) -> bool:
+        return self.direct_completion
diff --git a/bigcodebench/provider/openai.py b/bigcodebench/provider/openai.py
index 12790f6..ff1459f 100644
--- a/bigcodebench/provider/openai.py
+++ b/bigcodebench/provider/openai.py
@@ -28,7 +28,7 @@ def codegen(
             tokenizer=None,
         ) for prompt in prompts]
         # use concurrency based batching for o1 and deepseek models
-        if self.name.startswith("o1-") or self.name.startswith("o3-") or self.name.startswith("deepseek"):
+        if any(self.name.startswith(model) or self.name.endswith(model) for model in ["o1-", "o3-", "reasoner", "grok-3-mini-beta"]):
             return self._codegen_batch_via_concurrency(messages, num_samples)
 
         return self._codegen_api_batch(messages, num_samples)
diff --git a/bigcodebench/provider/vllm.py b/bigcodebench/provider/vllm.py
index 5ce67ab..41cd251 100644
--- a/bigcodebench/provider/vllm.py
+++ b/bigcodebench/provider/vllm.py
@@ -3,6 +3,8 @@
 
 from transformers import AutoTokenizer
 from vllm import LLM, SamplingParams
+from vllm.lora.request import LoRARequest
+from huggingface_hub import snapshot_download
 
 from bigcodebench.provider.base import DecoderBase
 from bigcodebench.provider.utility import (
@@ -11,7 +13,7 @@
 )
 
 class VllmDecoder(DecoderBase):
-    def __init__(self, name: str, dataset: str, tp: int, **kwargs) -> None:
+    def __init__(self, name: str, lora_path: str, dataset: str, tp: int, max_model_len: int, **kwargs) -> None:
         super().__init__(name, **kwargs)
 
         kwargs = {
@@ -27,8 +29,19 @@ def __init__(self, name: str, dataset: str, tp: int, **kwargs) -> None:
         if self.is_direct_completion():
             self.eos += extra_eos_for_direct_completion(dataset)
         else:
-            self.eos += ["\n```\n"]
-        self.llm = LLM(model=name, max_model_len=self.max_new_tokens, **kwargs)
+            if self.prefill and "```" in self.response_prefix:
+                self.eos += ["\n```\n"]
+        
+        self.lora_request = None
+        if lora_path:
+            local_lora_path = snapshot_download(lora_path)
+            self.lora_request = LoRARequest(
+                "lora",
+                1,
+                local_lora_path,
+            )
+        
+        self.llm = LLM(model=name, max_model_len=max_model_len, enable_lora=True if self.lora_request else False, **kwargs)
         self.llm.set_tokenizer(tokenizer=self.tokenizer)
 
     def is_direct_completion(self) -> bool:
@@ -63,6 +76,7 @@ def codegen(
                 stop=self.eos,
                 skip_special_tokens=self.skip_special_tokens,
             ),
+            lora_request=self.lora_request,
             use_tqdm=True,
         )
 
diff --git a/run.sh b/run.sh
index 6242abd..8bfcdd7 100755
--- a/run.sh
+++ b/run.sh
@@ -10,5 +10,4 @@ bigcodebench.evaluate \
   --model $MODEL \
   --split $SPLIT \
   --subset $SUBSET \
-  --backend $BACKEND \
-  --check_gt_only
\ No newline at end of file
+  --backend $BACKEND
\ No newline at end of file
diff --git a/sandbox-templates/e2b.Dockerfile b/sandbox-templates/e2b.Dockerfile
index c6ba2ca..a667880 100644
--- a/sandbox-templates/e2b.Dockerfile
+++ b/sandbox-templates/e2b.Dockerfile
@@ -27,6 +27,7 @@ RUN adduser --disabled-password --gecos "" bigcodebenchuser
 
 RUN rm -rf /bigcodebench
 
+RUN echo 1
 # Acquire benchmark code to local
 ADD "https://api.github.com/repos/bigcode-project/bigcodebench/commits?per_page=1" latest_commit
 RUN git clone https://github.com/bigcode-project/bigcodebench.git /bigcodebench
diff --git a/setup.cfg b/setup.cfg
index cc20139..ea71dc0 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -35,10 +35,10 @@ install_requires =
     rich
     accelerate>=0.30.1
     anthropic>=0.26.1
-    google-generativeai>=0.5.4
+    google-genai
     mistralai>=0.2.0,<1.0.0
     openai>=1.11.1
-    e2b
+    e2b<=1.11.1
 
 [options.entry_points]
 console_scripts =
diff --git a/tools/fix_v023.py b/tools/fix_v023.py
new file mode 100644
index 0000000..22b1559
--- /dev/null
+++ b/tools/fix_v023.py
@@ -0,0 +1,91 @@
+from datasets import load_dataset, Dataset, DatasetDict
+from huggingface_hub import HfApi
+
+import json
+import copy
+
+BIGCODEBENCH_HF = "bigcode/bigcodebench"
+BIGCODEBENCH_HARD_HF = "bigcode/bigcodebench-hard"
+BIGCODEBENCH_VERSION = "v0.1.3"
+BIGCODEBENCH_UPDATE = "bigcode/bcb_update"
+BIGCODEBENCH_NEW_VERSION = "v0.1.4"
+
+def map_ds(sample):
+    if sample["task_id"] in ["BigCodeBench/211"]:
+        sample['test'] = sample['test'].replace(
+"""
+        mock_response = MagicMock()
+        mock_response.content = MOCK_CONTENT
+""",
+"""
+        mock_response = MagicMock()
+        mock_response.content = MOCK_CONTENT
+        mock_response.status_code = 200
+"""
+        )
+    if sample["task_id"] in ["BigCodeBench/215"]:
+        sample['test'] = sample['test'].replace(
+"""
+        mock_response = Mock()
+""",
+"""
+        mock_response = Mock()
+        mock_response.status_code = 200
+"""
+        )
+        sample['test'] = sample['test'].replace(
+"""
+        mock_response.text =""",
+"""
+        MOCK_TEXT ="""
+        )
+        sample['test'] = sample['test'].replace(
+"""
+        mock_get.return_value = mock_response
+""",
+"""
+        mock_response.text = MOCK_TEXT
+        mock_response.json = lambda: json.loads(MOCK_TEXT)
+        mock_get.return_value = mock_response
+"""
+        )
+        sample['complete_prompt'] = sample['complete_prompt'].replace("Thif function will raise", "This function will raise")
+        sample['instruct_prompt'] = sample['instruct_prompt'].replace("Thif function will raise", "This function will raise")
+        sample['doc_struct'] = sample['doc_struct'].replace("Thif function will raise", "This function will raise")
+    return sample
+    
+if __name__ == "__main__":
+    api = HfApi()
+    ds_dict = load_dataset(BIGCODEBENCH_HF)
+    hard_ds_dict = load_dataset(BIGCODEBENCH_HARD_HF)
+    ds = ds_dict[BIGCODEBENCH_VERSION]
+    hard_ds = hard_ds_dict[BIGCODEBENCH_VERSION]
+    function_id = [211, 215]
+    
+    new_ds = ds.map(map_ds)
+    new_ds.to_json("BigCodeBench.jsonl")
+    ds_dict[BIGCODEBENCH_NEW_VERSION] = new_ds
+    ds_dict.push_to_hub(BIGCODEBENCH_HF)
+    
+    new_hard_ds = hard_ds.map(map_ds)
+    new_hard_ds.to_json("BigCodeBench-Hard.jsonl")
+    hard_ds_dict[BIGCODEBENCH_NEW_VERSION] = new_hard_ds
+    hard_ds_dict.push_to_hub(BIGCODEBENCH_HARD_HF)
+    
+    for i in function_id:
+        old_sample = ds.select([i])
+        new_sample = new_ds.select([i])
+        old_sample.to_json("old.jsonl")
+        new_sample.to_json("new.jsonl")
+        api.upload_file(
+            path_or_fileobj="old.jsonl",
+            path_in_repo=f"{i}/old.jsonl",
+            repo_id=BIGCODEBENCH_UPDATE,
+            # repo_type="dataset"
+        )
+        api.upload_file(
+            path_or_fileobj="new.jsonl",
+            path_in_repo=f"{i}/new.jsonl",
+            repo_id=BIGCODEBENCH_UPDATE,
+            # repo_type="dataset"
+        )
diff --git a/tools/fix_v025.py b/tools/fix_v025.py
new file mode 100644
index 0000000..edbeb71
--- /dev/null
+++ b/tools/fix_v025.py
@@ -0,0 +1,135 @@
+from datasets import load_dataset
+from huggingface_hub import HfApi
+
+BIGCODEBENCH_HF = "bigcode/bigcodebench"
+BIGCODEBENCH_HARD_HF = "bigcode/bigcodebench-hard"
+BIGCODEBENCH_VERSION = "v0.1.4"
+BIGCODEBENCH_UPDATE = "bigcode/bcb_update"
+BIGCODEBENCH_NEW_VERSION = "v0.1.5"
+
+def map_ds(sample):
+    if sample["task_id"] in ["BigCodeBench/332"]:
+        sample['code_prompt'] = "import nltk\nnltk.download('stopwords')\n" + sample['code_prompt']
+        sample['complete_prompt'] = "import nltk\nnltk.download('stopwords')\n" + sample['complete_prompt']
+        sample['instruct_prompt'] = sample['instruct_prompt'].replace(
+            "\nYou should write self-contained code starting with:\n```\n",
+            "\nYou should write self-contained code starting with:\n```\nimport nltk\nnltk.download('stopwords')\n"
+        )
+
+    if sample["task_id"] in ["BigCodeBench/334"]:
+        sample['code_prompt'] = "import nltk\nnltk.download('punkt')\n" + sample['code_prompt']
+        sample['complete_prompt'] = "import nltk\nnltk.download('punkt')\n" + sample['complete_prompt']
+        sample['instruct_prompt'] = sample['instruct_prompt'].replace(
+            "\nYou should write self-contained code starting with:\n```\n",
+            "\nYou should write self-contained code starting with:\n```\nimport nltk\nnltk.download('punkt')\n"
+        )
+
+    if sample["task_id"] in ["BigCodeBench/376"]:
+        sample['code_prompt'] = sample['code_prompt'].replace(
+            "import nltk\n",
+            "import nltk\nnltk.download('stopwords')\n",
+            1
+        )
+        sample['complete_prompt'] = sample['complete_prompt'].replace(
+                "import nltk\n",
+                "import nltk\nnltk.download('stopwords')\n",
+                1
+        )
+        sample['instruct_prompt'] = sample['instruct_prompt'].replace(
+            "\nYou should write self-contained code starting with:\n```\nimport nltk\n",
+            "\nYou should write self-contained code starting with:\n```\nimport nltk\nnltk.download('stopwords')\n"
+        )
+        
+    if sample["task_id"] in ["BigCodeBench/383"]:
+        sample['code_prompt'] = "import nltk\nnltk.download('punkt')\n" + sample['code_prompt']
+        sample['complete_prompt'] = "import nltk\nnltk.download('punkt')\n" + sample['complete_prompt']
+        sample['instruct_prompt'] = sample['instruct_prompt'].replace(
+            "\nYou should write self-contained code starting with:\n```\n",
+            "\nYou should write self-contained code starting with:\n```\nimport nltk\nnltk.download('punkt')\n"
+        )
+
+    if sample["task_id"] in ["BigCodeBench/633"]:
+        sample['code_prompt'] = "import nltk\nnltk.download('stopwords')\n" + sample['code_prompt']
+        sample['complete_prompt'] = "import nltk\nnltk.download('stopwords')\n" + sample['complete_prompt']
+        sample['instruct_prompt'] = sample['instruct_prompt'].replace(
+            "\nYou should write self-contained code starting with:\n```\n",
+            "\nYou should write self-contained code starting with:\n```\nimport nltk\nnltk.download('stopwords')\n"
+        )
+
+    if sample["task_id"] in ["BigCodeBench/635"]:
+        sample['code_prompt'] = sample['code_prompt'].replace(
+            "# Importing the required libraries",
+            "# Importing the required libraries\nimport nltk\nnltk.download('stopwords')\n"
+        )
+                
+        sample['complete_prompt'] = sample['complete_prompt'].replace(
+            "# Importing the required libraries",
+            "# Importing the required libraries\nimport nltk\nnltk.download('stopwords')\n"
+        )
+
+        sample['instruct_prompt'] = sample['instruct_prompt'].replace(
+            "\nYou should write self-contained code starting with:\n```\n",
+            "\nYou should write self-contained code starting with:\n```\nimport nltk\nnltk.download('stopwords')\n"
+        )
+
+    if sample["task_id"] in ["BigCodeBench/849"]:
+        sample['code_prompt'] = "import nltk\nnltk.download('stopwords')\n" + sample['code_prompt']
+        sample['complete_prompt'] = "import nltk\nnltk.download('stopwords')\n" + sample['complete_prompt']
+        sample['instruct_prompt'] = sample['instruct_prompt'].replace(
+            "\nYou should write self-contained code starting with:\n```\n",
+            "\nYou should write self-contained code starting with:\n```\nimport nltk\nnltk.download('stopwords')\n"
+        )
+
+    if sample["task_id"] in ["BigCodeBench/940"]:
+        sample['code_prompt'] = "import nltk\nnltk.download('punkt')\n" + sample['code_prompt']
+        sample['complete_prompt'] = "import nltk\nnltk.download('punkt')\n" + sample['complete_prompt']
+        sample['instruct_prompt'] = sample['instruct_prompt'].replace(
+            "\nYou should write self-contained code starting with:\n```\n",
+            "\nYou should write self-contained code starting with:\n```\nimport nltk\nnltk.download('punkt')\n"
+        )
+
+    if sample["task_id"] in ["BigCodeBench/1109"]:
+        sample['code_prompt'] = "import nltk\nnltk.download('punkt')\n" + sample['code_prompt']
+        sample['complete_prompt'] = "import nltk\nnltk.download('punkt')\n" + sample['complete_prompt']
+        sample['instruct_prompt'] = sample['instruct_prompt'].replace(
+            "\nYou should write self-contained code starting with:\n```\n",
+            "\nYou should write self-contained code starting with:\n```\nimport nltk\nnltk.download('punkt')\n"
+        )
+   
+    return sample
+    
+if __name__ == "__main__":
+    api = HfApi()
+    ds_dict = load_dataset(BIGCODEBENCH_HF)
+    hard_ds_dict = load_dataset(BIGCODEBENCH_HARD_HF)
+    ds = ds_dict[BIGCODEBENCH_VERSION]
+    hard_ds = hard_ds_dict[BIGCODEBENCH_VERSION]
+    function_id = [332, 334, 376, 383, 633, 635, 849, 940, 1109]
+    
+    new_ds = ds.map(map_ds)
+    new_ds.to_json("BigCodeBench.jsonl")
+    ds_dict[BIGCODEBENCH_NEW_VERSION] = new_ds
+    ds_dict.push_to_hub(BIGCODEBENCH_HF)
+    
+    new_hard_ds = hard_ds.map(map_ds)
+    new_hard_ds.to_json("BigCodeBench-Hard.jsonl")
+    hard_ds_dict[BIGCODEBENCH_NEW_VERSION] = new_hard_ds
+    hard_ds_dict.push_to_hub(BIGCODEBENCH_HARD_HF)
+
+    for i in function_id:
+        old_sample = ds.select([i])
+        new_sample = new_ds.select([i])
+        old_sample.to_json("old.jsonl")
+        new_sample.to_json("new.jsonl")
+        api.upload_file(
+            path_or_fileobj="old.jsonl",
+            path_in_repo=f"{i}/old.jsonl",
+            repo_id=BIGCODEBENCH_UPDATE,
+            # repo_type="dataset"
+        )
+        api.upload_file(
+            path_or_fileobj="new.jsonl",
+            path_in_repo=f"{i}/new.jsonl",
+            repo_id=BIGCODEBENCH_UPDATE,
+            # repo_type="dataset"
+        )
\ No newline at end of file