From 72cae1cb92630aad54768b3c851c5947ad7cc8f0 Mon Sep 17 00:00:00 2001 From: Alexandre van Beurden <1949482+alexvbrdn@users.noreply.github.com> Date: Sun, 28 Sep 2025 21:57:20 +0200 Subject: [PATCH 01/20] Implemetation of v1.1.0 --- pyproject.toml | 2 +- regexsolver/__init__.py | 366 ++++++++++++++---- regexsolver/details.py | 29 +- setup.py | 2 +- .../assets/response_analyze_cardinality.json | 4 + ...ils.json => response_analyze_details.json} | 2 +- ...on => response_analyze_details_empty.json} | 2 +- ...=> response_analyze_details_infinite.json} | 2 +- ...setOf.json => response_analyze_empty.json} | 0 .../assets/response_analyze_empty_string.json | 4 + ....json => response_analyze_equivalent.json} | 0 tests/assets/response_analyze_length.json | 5 + .../assets/response_analyze_length_empty.json | 3 + tests/assets/response_analyze_subset.json | 4 + tests/assets/response_analyze_total.json | 4 + tests/assets/response_compute_concat.json | 4 + ....json => response_compute_difference.json} | 0 ...son => response_compute_intersection.json} | 0 ...union.json => response_compute_union.json} | 0 ...gs.json => response_generate_strings.json} | 0 tests/serialization_test.py | 30 +- tests/term_operation_test.py | 210 +++++++--- 22 files changed, 525 insertions(+), 148 deletions(-) create mode 100644 tests/assets/response_analyze_cardinality.json rename tests/assets/{response_getDetails.json => response_analyze_details.json} (84%) rename tests/assets/{response_getDetails_empty.json => response_analyze_details_empty.json} (85%) rename tests/assets/{response_getDetails_infinite.json => response_analyze_details_infinite.json} (83%) rename tests/assets/{response_isSubsetOf.json => response_analyze_empty.json} (100%) create mode 100644 tests/assets/response_analyze_empty_string.json rename tests/assets/{response_isEquivalentTo.json => response_analyze_equivalent.json} (100%) create mode 100644 tests/assets/response_analyze_length.json create mode 100644 tests/assets/response_analyze_length_empty.json create mode 100644 tests/assets/response_analyze_subset.json create mode 100644 tests/assets/response_analyze_total.json create mode 100644 tests/assets/response_compute_concat.json rename tests/assets/{response_subtraction.json => response_compute_difference.json} (100%) rename tests/assets/{response_intersection.json => response_compute_intersection.json} (100%) rename tests/assets/{response_union.json => response_compute_union.json} (100%) rename tests/assets/{response_generateStrings.json => response_generate_strings.json} (100%) diff --git a/pyproject.toml b/pyproject.toml index bda1679..796bf24 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "regexsolver" -version = "1.0.3" +version = "1.1.0" authors = [ { name = "RegexSolver", email = "contact@regexsolver.com" } ] diff --git a/regexsolver/__init__.py b/regexsolver/__init__.py index ae744d6..a107e23 100644 --- a/regexsolver/__init__.py +++ b/regexsolver/__init__.py @@ -1,12 +1,11 @@ +from enum import Enum from regexsolver.details import Details, Cardinality, Length from typing import List, Optional -from pydantic import BaseModel +from pydantic import Field, BaseModel import requests -from regexsolver.details import Details - class ApiError(Exception): """ @@ -28,7 +27,7 @@ def __init__(self): self.base_url = "https://api.regexsolver.com/" self.api_token = None self.headers = { - 'User-Agent': 'RegexSolver Python / 1.0.3', + 'User-Agent': 'RegexSolver Python / 1.1.0', 'Content-Type': 'application/json' } @@ -57,70 +56,115 @@ def _request(self, endpoint: str, request: BaseModel) -> dict: response = requests.post( self._get_request_url(endpoint), headers=self.headers, - json=request.model_dump() + json=request.model_dump(exclude_none=True) ) if response.ok: return response.json() - else: - raise ApiError(response.json().get('message')) - - def compute_intersection(self, request: 'MultiTermsRequest') -> 'Term': + try: + data = response.json() + msg = data.get("message", response.text) + except Exception: + msg = response.text + raise ApiError(msg) + + # Analyze + + def _analyze_details(self, term: 'Term') -> Details: + return Details(**self._request('api/analyze/details', term)) + + def _analyze_cardinality(self, term: 'Term') -> Cardinality: + return Cardinality(**self._request('api/analyze/cardinality', term)) + + def _analyze_length(self, term: 'Term') -> Length: + return Length(**self._request('api/analyze/length', term)) + + def _analyze_equivalent(self, request: 'MultiTermsRequest') -> bool: + return self._request('api/analyze/equivalent', request).get('value') + + def _analyze_subset(self, request: 'MultiTermsRequest') -> bool: + return self._request('api/analyze/subset', request).get('value') + + def _analyze_empty(self, term: 'Term') -> bool: + return self._request('api/analyze/empty', term).get('value') + + def _analyze_total(self, term: 'Term') -> bool: + return self._request('api/analyze/total', term).get('value') + + def _analyze_empty_string(self, term: 'Term') -> bool: + return self._request('api/analyze/empty_string', term).get('value') + + def _analyze_dot(self, term: 'Term') -> str: + return self._request('api/analyze/dot', term).get('value') + + # Compute + + def _compute_repeat(self, request: 'RepeatRequest') -> 'Term': + return Term(**self._request('api/compute/repeat', request)) + + def _compute_intersection(self, request: 'MultiTermsRequest') -> 'Term': return Term(**self._request('api/compute/intersection', request)) - def compute_union(self, request: 'MultiTermsRequest') -> 'Term': + def _compute_union(self, request: 'MultiTermsRequest') -> 'Term': return Term(**self._request('api/compute/union', request)) - def compute_subtraction(self, request: 'MultiTermsRequest') -> 'Term': - return Term(**self._request('api/compute/subtraction', request)) - - def get_details(self, term: 'Term') -> Details: - return Details(**self._request('api/analyze/details', term)) - - def equivalence(self, request: 'MultiTermsRequest') -> bool: - return self._request('api/analyze/equivalence', request).get('value') + def _compute_difference(self, request: 'MultiTermsRequest') -> 'Term': + return Term(**self._request('api/compute/difference', request)) + + def _compute_concat(self, request: 'MultiTermsRequest') -> 'Term': + return Term(**self._request('api/compute/concat', request)) + + # Generate - def subset(self, request: 'MultiTermsRequest') -> bool: - return self._request('api/analyze/subset', request).get('value') - - def generate_strings(self, request: 'GenerateStringsRequest') -> List[str]: + def _generate_strings(self, request: 'GenerateStringsRequest') -> List[str]: return self._request('api/generate/strings', request).get('value') - - -_REGEX_PREFIX = "regex" -_FAIR_PREFIX = "fair" -_UNKNOWN_PREFIX = "unknown" + + +class TermType(str, Enum): + FAIR = "fair" + REGEX = "regex" class Term(BaseModel): """ - This class represents a term on which it is possible to perform operations. - It can either be a regular expression (regex) or a FAIR (Fast Automaton Internal Representation). + Represents a term on which operations can be performed. + A term can be either: + - A regular expression (`regex`) + - A FAIR (Fast Automaton Internal Representation, `fair`) + + Convenience constructors: + - `Term.regex(pattern: str)` + - `Term.fair(fair: str)` """ - type: str + type: TermType value: str _details: Optional['Details'] = None + _empty: Optional[bool] = None + _total: Optional[bool] = None + _empty_string: Optional[bool] = None + + model_config = {"use_enum_values": True} @classmethod def fair(cls, fair: str) -> 'Term': """ Initialize a Fast Automaton Internal Representation (FAIR). """ - return cls(type=_FAIR_PREFIX, value=fair) + return cls(type=TermType.FAIR, value=fair) @classmethod def regex(cls, pattern: str) -> 'Term': """ Initialize a regex. """ - return cls(type=_REGEX_PREFIX, value=pattern) + return cls(type=TermType.REGEX, value=pattern) def get_fair(self) -> Optional[str]: """ Return the Fast Automaton Internal Representation (FAIR). """ - if type == _FAIR_PREFIX: + if self.type == TermType.FAIR: return self.value return None @@ -128,88 +172,223 @@ def get_pattern(self) -> Optional[str]: """ Return the regular expression pattern. """ - if type == _REGEX_PREFIX: + if self.type == TermType.REGEX: return self.value return None def get_details(self) -> Details: """ - Get the details of this term. - Cache the result to avoid calling the API again if this method is called multiple times. + Analyze this term and return detailed information including cardinality, + length, and whether it is empty or total. + + Results are cached on the instance to avoid repeated API calls. """ if self._details: return self._details else: - self._details = RegexSolver.get_instance().get_details(self) + self._details = RegexSolver.get_instance()._analyze_details(self) return self._details - def generate_strings(self, count: int) -> List[str]: + def generate_strings(self, count: int, execution_timeout=None) -> List[str]: + """ + Generate up to `count` example strings that match this term. + + Parameters: + count: Maximum number of unique strings to generate. + execution_timeout: Timeout in milliseconds for the server. + + Returns: + A list of strings matched by this term. """ - Generate the given number of unique strings matched by this term. + request = GenerateStringsRequest(term=self, count=count, options=RequestOptions.from_args(execution_timeout=execution_timeout)) + return RegexSolver.get_instance()._generate_strings(request) + + def intersection(self, *terms: 'Term', response_format=None, execution_timeout=None) -> 'Term': """ - request = GenerateStringsRequest(term=self, count=count) - return RegexSolver.get_instance().generate_strings(request) + Compute the intersection of this term with one or more other terms. - def intersection(self, *terms: 'Term') -> 'Term': + Parameters: + terms: Additional terms to intersect with. + response_format: Output format (`regex`, `fair`, or `any`). + execution_timeout: Timeout in milliseconds for the server. + + Returns: + A new term representing the intersection. """ - Compute the intersection with the given terms and return the resulting term. + request = MultiTermsRequest(terms=[self] + list(terms), options=RequestOptions.from_args(response_format=response_format, execution_timeout=execution_timeout)) + return RegexSolver.get_instance()._compute_intersection(request) + + def union(self, *terms: 'Term', response_format=None, execution_timeout=None) -> 'Term': """ - request = MultiTermsRequest(terms=[self] + list(terms)) - return RegexSolver.get_instance().compute_intersection(request) + Compute the union of this term with one or more other terms. - def union(self, *terms: 'Term') -> 'Term': + Parameters: + terms: Terms to combine with this one. + response_format: Output format (`regex`, `fair`, or `any`). + execution_timeout: Timeout in milliseconds for the server. + + Returns: + A new term representing the union. """ - Compute the union with the given terms and return the resulting term. + request = MultiTermsRequest(terms=[self] + list(terms), options=RequestOptions.from_args(response_format=response_format, execution_timeout=execution_timeout)) + return RegexSolver.get_instance()._compute_union(request) + + def difference(self, term: 'Term', response_format=None, execution_timeout=None) -> 'Term': """ - request = MultiTermsRequest(terms=[self] + list(terms)) - return RegexSolver.get_instance().compute_union(request) + Compute the difference between this term and another. - def subtraction(self, term: 'Term') -> 'Term': + Parameters: + term: The term to subtract from this one. + response_format: Output format (`regex`, `fair`, or `any`). + execution_timeout: Timeout in milliseconds for the server. + + Returns: + A new term representing the set difference (this - term). """ - Compute the subtraction with the given term and return the resulting term. + request = MultiTermsRequest(terms=[self, term], options=RequestOptions.from_args(response_format=response_format, execution_timeout=execution_timeout)) + return RegexSolver.get_instance()._compute_difference(request) + + def concat(self, *terms: 'Term', response_format=None, execution_timeout=None) -> 'Term': """ - request = MultiTermsRequest(terms=[self, term]) - return RegexSolver.get_instance().compute_subtraction(request) + Concatenate this term with one or more other terms. + + Parameters: + terms: Additional terms to append in sequence. + response_format: Output format (`regex`, `fair`, or `any`). + execution_timeout: Timeout in milliseconds for the server. - def is_equivalent_to(self, term: 'Term') -> bool: + Returns: + A new term representing the concatenation. """ - Check equivalence with the given term. + request = MultiTermsRequest(terms=[self] + list(terms), options=RequestOptions.from_args(response_format=response_format, execution_timeout=execution_timeout)) + return RegexSolver.get_instance()._compute_concat(request) + + def equivalent(self, term: 'Term', execution_timeout=None) -> bool: """ - request = MultiTermsRequest(terms=[self, term]) - return RegexSolver.get_instance().equivalence(request) + Check whether this term is equivalent to another. - def is_subset_of(self, term: 'Term') -> bool: + Parameters: + term: The term to compare against. + execution_timeout: Timeout in milliseconds for the server. + + Returns: + True if both terms accept exactly the same language. """ - Check if is a subset of the given term. + request = MultiTermsRequest(terms=[self, term], options=RequestOptions.from_args(execution_timeout=execution_timeout)) + return RegexSolver.get_instance()._analyze_equivalent(request) + + def subset(self, term: 'Term', execution_timeout=None) -> bool: """ - request = MultiTermsRequest(terms=[self, term]) - return RegexSolver.get_instance().subset(request) + Check whether this term is a subset of another. - def serialize(self) -> str: + Parameters: + term: The term to compare against. + execution_timeout: Timeout in milliseconds for the server. + + Returns: + True if every string matched by this term is also matched by `term`. """ - Generate a string representation that can be parsed by deserialize(). + request = MultiTermsRequest(terms=[self, term], options=RequestOptions.from_args(execution_timeout=execution_timeout)) + return RegexSolver.get_instance()._analyze_subset(request) + + def is_empty(self) -> bool: """ - prefix = _UNKNOWN_PREFIX - if self.type == _FAIR_PREFIX: - prefix = _FAIR_PREFIX - elif self.type == _REGEX_PREFIX: - prefix = _REGEX_PREFIX + Check whether this term matches no string. - return prefix + "=" + self.value + Results are cached on the instance to avoid repeated API calls. + """ + if self._empty: + return self._empty + else: + self._empty = RegexSolver.get_instance()._analyze_empty(self) + return self._empty + + def is_total(self) -> bool: + """ + Check whether this term matches all possible strings. - def deserialize(string: str) -> Optional['Term']: + Results are cached on the instance to avoid repeated API calls. """ - Parse a string representation of a Term produced by serialize(). + if self._total: + return self._total + else: + self._total = RegexSolver.get_instance()._analyze_total(self) + return self._total + + def is_empty_string(self) -> bool: """ - if not string: - return None + Check whether this term matches only the empty string. + + Results are cached on the instance to avoid repeated API calls. + """ + if self._empty_string: + return self._empty_string + else: + self._empty_string = RegexSolver.get_instance()._analyze_empty_string(self) + return self._empty_string + + def get_dot(self) -> str: + """ + Get the GraphViz DOT representation of this term. + + Returns: + A DOT language string describing the automaton for this term. + """ + return RegexSolver.get_instance()._analyze_dot(self) + + def get_cardinality(self) -> Cardinality: + """ + Get the cardinality of this term. + + Returns: + A `Cardinality` object describing how many distinct strings + are matched. + """ + return RegexSolver.get_instance()._analyze_cardinality(self) + + def get_length(self) -> Length: + """ + Get the length bounds of this term. + + Returns: + A `Length` object with the minimum and maximum string length + matched by this term. + """ + return RegexSolver.get_instance()._analyze_length(self) - if string.startswith(_REGEX_PREFIX): - return Term.regex(string[len(_REGEX_PREFIX)+1:]) - elif string.startswith(_FAIR_PREFIX): - return Term.fair(string[len(_FAIR_PREFIX)+1:]) + def serialize(self) -> str: + """ + Return a string representation of this term in the format + `=`, which can later be parsed by `deserialize()`. + """ + if self.type == TermType.FAIR: + prefix = TermType.FAIR + elif self.type == TermType.REGEX: + prefix = TermType.REGEX else: + raise ValueError(f"Unknown type: {self.type}") + + return prefix + "=" + self.value + + @staticmethod + def deserialize(string: str) -> Optional['Term']: + """ + Parse a string representation produced by `serialize()`. + + Parameters: + string: The serialized term, e.g. `"regex=abc"`. + + Returns: + A Term instance, or None if the input is empty or invalid. + """ + if not string or "=" not in string: return None + prefix, value = string.split("=", 1) + if prefix == TermType.REGEX: + return Term.regex(value) + elif prefix == TermType.FAIR: + return Term.fair(value) + return None def __str__(self): return self.serialize() @@ -222,11 +401,42 @@ def __eq__(self, other): def __hash__(self): return hash(self.serialize()) - +class ResponseFormat(str, Enum): + ANY = "any" + REGEX = "regex" + FAIR = "fair" + +class ResponseOptions(BaseModel): + format: Optional[ResponseFormat] = None + + model_config = {"use_enum_values": True} + +class ExecutionOptions(BaseModel): + timeout: Optional[int] = None + +class RequestOptions(BaseModel): + schema_version: int = 1 + response: ResponseOptions = Field(default_factory=ResponseOptions) + execution: ExecutionOptions = Field(default_factory=ExecutionOptions) + + @classmethod + def from_args(cls, response_format: ResponseFormat = None, execution_timeout: int = None): + return cls( + response=ResponseOptions(format=response_format), + execution=ExecutionOptions(timeout=execution_timeout), + ) + class MultiTermsRequest(BaseModel): terms: List[Term] + options: Optional[RequestOptions] = None +class RepeatRequest(BaseModel): + term: Term + min: int + max: Optional[int] + options: Optional[RequestOptions] = None class GenerateStringsRequest(BaseModel): term: Term count: int + options: Optional[RequestOptions] = None \ No newline at end of file diff --git a/regexsolver/details.py b/regexsolver/details.py index ec799c1..f7741d0 100644 --- a/regexsolver/details.py +++ b/regexsolver/details.py @@ -1,4 +1,4 @@ -from typing import Optional +from typing import Any, Optional from pydantic import BaseModel, model_validator @@ -12,19 +12,16 @@ class Cardinality(BaseModel): def is_infinite(self) -> bool: """ - True if it has a finite number of values, False otherwise. + True if it has a infinite number of values, False otherwise. """ - if self.type == 'Infinite': - return True - else: - return False + return self.type == 'infinite' def __str__(self): - if self.type == 'Infinite': + if self.type == 'infinite': return "Infinite" - elif self.type == 'BigInteger': + elif self.type == 'bigInteger': return 'BigInteger' - elif self.type == 'Integer': + elif self.type == 'integer': return "Integer({})".format(self.value) else: return 'Unknown' @@ -39,10 +36,16 @@ class Length(BaseModel): maximum: Optional[int] @model_validator(mode="before") - def from_list(cls, values: list): - if len(values) != 2: - raise ValueError("List must contain exactly two elements") - return {'minimum': values[0], 'maximum': values[1]} + def from_list(cls, values: Any): + if isinstance(values, dict): + return {'minimum': values.get('min'), 'maximum': values.get('max')} + + if isinstance(values, list): + if len(values) != 2: + raise ValueError("List must contain exactly two elements") + return {'minimum': values[0], 'maximum': values[1]} + + return values def __str__(self): return "Length[minimum={}, maximum={}]".format( diff --git a/setup.py b/setup.py index d6b4483..10a66e1 100644 --- a/setup.py +++ b/setup.py @@ -2,7 +2,7 @@ setup( name="regexsolver", - version="1.0.3", + version="1.1.0", description="RegexSolver allows you to manipulate regular expressions as sets, enabling operations such as intersection, union, and subtraction.", long_description=open('README.md').read(), long_description_content_type='text/markdown', diff --git a/tests/assets/response_analyze_cardinality.json b/tests/assets/response_analyze_cardinality.json new file mode 100644 index 0000000..157edb5 --- /dev/null +++ b/tests/assets/response_analyze_cardinality.json @@ -0,0 +1,4 @@ +{ + "type": "integer", + "value": 5 +} \ No newline at end of file diff --git a/tests/assets/response_getDetails.json b/tests/assets/response_analyze_details.json similarity index 84% rename from tests/assets/response_getDetails.json rename to tests/assets/response_analyze_details.json index 65e0539..07ce803 100644 --- a/tests/assets/response_getDetails.json +++ b/tests/assets/response_analyze_details.json @@ -1,7 +1,7 @@ { "type": "details", "cardinality": { - "type": "Integer", + "type": "integer", "value": 2 }, "length": [ diff --git a/tests/assets/response_getDetails_empty.json b/tests/assets/response_analyze_details_empty.json similarity index 85% rename from tests/assets/response_getDetails_empty.json rename to tests/assets/response_analyze_details_empty.json index d33b6f3..f50bf22 100644 --- a/tests/assets/response_getDetails_empty.json +++ b/tests/assets/response_analyze_details_empty.json @@ -1,7 +1,7 @@ { "type": "details", "cardinality": { - "type": "Integer", + "type": "integer", "value": 0 }, "length": [ diff --git a/tests/assets/response_getDetails_infinite.json b/tests/assets/response_analyze_details_infinite.json similarity index 83% rename from tests/assets/response_getDetails_infinite.json rename to tests/assets/response_analyze_details_infinite.json index ae72fc8..fe08178 100644 --- a/tests/assets/response_getDetails_infinite.json +++ b/tests/assets/response_analyze_details_infinite.json @@ -1,7 +1,7 @@ { "type": "details", "cardinality": { - "type": "Infinite" + "type": "infinite" }, "length": [ 0, diff --git a/tests/assets/response_isSubsetOf.json b/tests/assets/response_analyze_empty.json similarity index 100% rename from tests/assets/response_isSubsetOf.json rename to tests/assets/response_analyze_empty.json diff --git a/tests/assets/response_analyze_empty_string.json b/tests/assets/response_analyze_empty_string.json new file mode 100644 index 0000000..84ed493 --- /dev/null +++ b/tests/assets/response_analyze_empty_string.json @@ -0,0 +1,4 @@ +{ + "type": "boolean", + "value": true +} \ No newline at end of file diff --git a/tests/assets/response_isEquivalentTo.json b/tests/assets/response_analyze_equivalent.json similarity index 100% rename from tests/assets/response_isEquivalentTo.json rename to tests/assets/response_analyze_equivalent.json diff --git a/tests/assets/response_analyze_length.json b/tests/assets/response_analyze_length.json new file mode 100644 index 0000000..0109dd3 --- /dev/null +++ b/tests/assets/response_analyze_length.json @@ -0,0 +1,5 @@ +{ + "type": "length", + "min": 0, + "max": 3 +} \ No newline at end of file diff --git a/tests/assets/response_analyze_length_empty.json b/tests/assets/response_analyze_length_empty.json new file mode 100644 index 0000000..eb3a50f --- /dev/null +++ b/tests/assets/response_analyze_length_empty.json @@ -0,0 +1,3 @@ +{ + "type": "length" +} \ No newline at end of file diff --git a/tests/assets/response_analyze_subset.json b/tests/assets/response_analyze_subset.json new file mode 100644 index 0000000..84ed493 --- /dev/null +++ b/tests/assets/response_analyze_subset.json @@ -0,0 +1,4 @@ +{ + "type": "boolean", + "value": true +} \ No newline at end of file diff --git a/tests/assets/response_analyze_total.json b/tests/assets/response_analyze_total.json new file mode 100644 index 0000000..25147f3 --- /dev/null +++ b/tests/assets/response_analyze_total.json @@ -0,0 +1,4 @@ +{ + "type": "boolean", + "value": false +} \ No newline at end of file diff --git a/tests/assets/response_compute_concat.json b/tests/assets/response_compute_concat.json new file mode 100644 index 0000000..c316789 --- /dev/null +++ b/tests/assets/response_compute_concat.json @@ -0,0 +1,4 @@ +{ + "type": "regex", + "value": "abcde" +} \ No newline at end of file diff --git a/tests/assets/response_subtraction.json b/tests/assets/response_compute_difference.json similarity index 100% rename from tests/assets/response_subtraction.json rename to tests/assets/response_compute_difference.json diff --git a/tests/assets/response_intersection.json b/tests/assets/response_compute_intersection.json similarity index 100% rename from tests/assets/response_intersection.json rename to tests/assets/response_compute_intersection.json diff --git a/tests/assets/response_union.json b/tests/assets/response_compute_union.json similarity index 100% rename from tests/assets/response_union.json rename to tests/assets/response_compute_union.json diff --git a/tests/assets/response_generateStrings.json b/tests/assets/response_generate_strings.json similarity index 100% rename from tests/assets/response_generateStrings.json rename to tests/assets/response_generate_strings.json diff --git a/tests/serialization_test.py b/tests/serialization_test.py index 8682208..9840455 100644 --- a/tests/serialization_test.py +++ b/tests/serialization_test.py @@ -1,6 +1,6 @@ import unittest -from regexsolver import GenerateStringsRequest, MultiTermsRequest, Term +from regexsolver import GenerateStringsRequest, MultiTermsRequest, RequestOptions, ResponseFormat, Term class SerializationTest(unittest.TestCase): @@ -31,7 +31,31 @@ def test_serialize_requests(self): {"type": "regex", "value": "ghi"} ] }, - request.model_dump() + request.model_dump(exclude_none=True) + ) + + request = MultiTermsRequest( + terms=[Term.regex(r"abc"), Term.regex(r"def"), Term.regex(r"ghi")], + options=RequestOptions.from_args(response_format=ResponseFormat.FAIR, execution_timeout=400) + ) + self.assertEqual( + { + "terms": [ + {"type": "regex", "value": "abc"}, + {"type": "regex", "value": "def"}, + {"type": "regex", "value": "ghi"} + ], + "options": { + "schema_version": 1, + "response": { + "format": "fair" + }, + "execution": { + "timeout": 400 + } + } + }, + request.model_dump(exclude_none=True) ) request = GenerateStringsRequest( @@ -41,7 +65,7 @@ def test_serialize_requests(self): "term": {"type": "regex", "value": "(abc|de){2,3}"}, "count": 10 }, - request.model_dump() + request.model_dump(exclude_none=True) ) request = Term.regex(r"(abc|de){2,3}") diff --git a/tests/term_operation_test.py b/tests/term_operation_test.py index 94a680a..da997da 100644 --- a/tests/term_operation_test.py +++ b/tests/term_operation_test.py @@ -2,15 +2,32 @@ import requests_mock import unittest -from regexsolver import ApiError, RegexSolver, Term +from regexsolver import ApiError, RegexSolver, ResponseFormat, Term class TermsOperationTest(unittest.TestCase): def setUp(self): - RegexSolver.get_instance().initialize("TOKEN") + RegexSolver.initialize("TOKEN") + + def test_analyze_cardinality(self): + with open('tests/assets/response_analyze_cardinality.json') as response: + json_response = json.load(response) + with requests_mock.Mocker() as mock: + mock.post( + "https://api.regexsolver.com/api/analyze/cardinality", + json=json_response, status_code=200 + ) + + term = Term.regex(r"[0-4]") + cardinality = term.get_cardinality() + + self.assertEqual( + "Integer(5)", + str(cardinality) + ) - def test_get_details(self): - with open('tests/assets/response_getDetails.json') as response: + def test_analyze_details(self): + with open('tests/assets/response_analyze_details.json') as response: json_response = json.load(response) with requests_mock.Mocker() as mock: mock.post( @@ -26,8 +43,8 @@ def test_get_details(self): str(details) ) - def test_get_details_infinite(self): - with open('tests/assets/response_getDetails_infinite.json') as response: + def test_analyze_details_infinite(self): + with open('tests/assets/response_analyze_details_infinite.json') as response: json_response = json.load(response) with requests_mock.Mocker() as mock: mock.post( @@ -43,8 +60,8 @@ def test_get_details_infinite(self): str(details) ) - def test_get_details_empty(self): - with open('tests/assets/response_getDetails_empty.json') as response: + def test_analyze_details_empty(self): + with open('tests/assets/response_analyze_details_empty.json') as response: json_response = json.load(response) with requests_mock.Mocker() as mock: mock.post( @@ -59,103 +76,198 @@ def test_get_details_empty(self): "Details[cardinality=Integer(0), length=Length[minimum=None, maximum=None], empty=True, total=False]", str(details) ) + + def test_analyze_empty_string(self): + with open('tests/assets/response_analyze_empty_string.json') as response: + json_response = json.load(response) + with requests_mock.Mocker() as mock: + mock.post( + "https://api.regexsolver.com/api/analyze/empty_string", + json=json_response, status_code=200 + ) - def test_generate_strings(self): - with open('tests/assets/response_generateStrings.json') as response: + term = Term.regex(r"") + + result = term.is_empty_string() + + self.assertEqual(True, result) + + def test_analyze_empty(self): + with open('tests/assets/response_analyze_empty.json') as response: json_response = json.load(response) with requests_mock.Mocker() as mock: mock.post( - "https://api.regexsolver.com/api/generate/strings", + "https://api.regexsolver.com/api/analyze/empty", json=json_response, status_code=200 ) - term = Term.regex(r"(abc|de){2}") - strings = term.generate_strings(10) + term = Term.regex(r"[]") - self.assertEqual(4, len(strings)) + result = term.is_empty() - def test_intersection(self): - with open('tests/assets/response_intersection.json') as response: + self.assertEqual(True, result) + + def test_analyze_total(self): + with open('tests/assets/response_analyze_total.json') as response: json_response = json.load(response) with requests_mock.Mocker() as mock: mock.post( - "https://api.regexsolver.com/api/compute/intersection", + "https://api.regexsolver.com/api/analyze/total", json=json_response, status_code=200 ) - term1 = Term.regex(r"(abc|de){2}") - term2 = Term.regex(r"de.*") - term3 = Term.regex(r".*abc") + term = Term.regex(r"abc") - result = term1.intersection(term2, term3) + result = term.is_total() - self.assertEqual("regex=deabc", str(result)) + self.assertEqual(False, result) + + def test_analyze_equivalent(self): + with open('tests/assets/response_analyze_equivalent.json') as response: + json_response = json.load(response) + with requests_mock.Mocker() as mock: + mock.post( + "https://api.regexsolver.com/api/analyze/equivalent", + json=json_response, status_code=200 + ) + + term1 = Term.regex(r"(abc|de)") + term2 = Term.fair( + "rgmsW[1g2LvP=Gr&V>sLc#w-!No&(oq@Sf>X).?lI3{uh{80qWEH[#0.pHq@B-9o[LpP-a#fYI+") + + result = term1.equivalent(term2) + + self.assertEqual(False, result) + + def test_analyze_length_empty(self): + with open('tests/assets/response_analyze_length_empty.json') as response: + json_response = json.load(response) + with requests_mock.Mocker() as mock: + mock.post( + "https://api.regexsolver.com/api/analyze/length", + json=json_response, status_code=200 + ) + + term = Term.regex(r"[]") + length = term.get_length() - def test_union(self): - with open('tests/assets/response_union.json') as response: + self.assertEqual( + "Length[minimum=None, maximum=None]", + str(length) + ) + + def test_analyze_length(self): + with open('tests/assets/response_analyze_length.json') as response: json_response = json.load(response) with requests_mock.Mocker() as mock: mock.post( - "https://api.regexsolver.com/api/compute/union", + "https://api.regexsolver.com/api/analyze/length", + json=json_response, status_code=200 + ) + + term = Term.regex(r"(abc)?") + length = term.get_length() + + self.assertEqual( + "Length[minimum=0, maximum=3]", + str(length) + ) + + def test_analyze_subset(self): + with open('tests/assets/response_analyze_subset.json') as response: + json_response = json.load(response) + with requests_mock.Mocker() as mock: + mock.post( + "https://api.regexsolver.com/api/analyze/subset", + json=json_response, status_code=200 + ) + + term1 = Term.regex(r"de") + term2 = Term.regex(r"(abc|de)") + + result = term1.subset(term2) + + self.assertEqual(True, result) + + def test_compute_concat(self): + with open('tests/assets/response_compute_concat.json') as response: + json_response = json.load(response) + with requests_mock.Mocker() as mock: + mock.post( + "https://api.regexsolver.com/api/compute/concat", json=json_response, status_code=200 ) term1 = Term.regex(r"abc") term2 = Term.regex(r"de") - term3 = Term.regex(r"fghi") - - result = term1.union(term2, term3) - self.assertEqual("regex=(abc|de|fghi)", str(result)) + result = term1.concat(term2, response_format=ResponseFormat.REGEX) - def test_subtraction(self): - with open('tests/assets/response_subtraction.json') as response: + self.assertEqual("regex=abcde", str(result)) + + def test_compute_difference(self): + with open('tests/assets/response_compute_difference.json') as response: json_response = json.load(response) with requests_mock.Mocker() as mock: mock.post( - "https://api.regexsolver.com/api/compute/subtraction", + "https://api.regexsolver.com/api/compute/difference", json=json_response, status_code=200 ) term1 = Term.regex(r"(abc|de)") term2 = Term.regex(r"de") - result = term1.subtraction(term2) + result = term1.difference(term2, response_format=ResponseFormat.REGEX) self.assertEqual("regex=abc", str(result)) - - def test_is_equivalent_to(self): - with open('tests/assets/response_isEquivalentTo.json') as response: + + def test_compute_intersection(self): + with open('tests/assets/response_compute_intersection.json') as response: json_response = json.load(response) with requests_mock.Mocker() as mock: mock.post( - "https://api.regexsolver.com/api/analyze/equivalence", + "https://api.regexsolver.com/api/compute/intersection", json=json_response, status_code=200 ) - term1 = Term.regex(r"(abc|de)") - term2 = Term.fair( - "rgmsW[1g2LvP=Gr&V>sLc#w-!No&(oq@Sf>X).?lI3{uh{80qWEH[#0.pHq@B-9o[LpP-a#fYI+") + term1 = Term.regex(r"(abc|de){2}") + term2 = Term.regex(r"de.*") + term3 = Term.regex(r".*abc") - result = term1.is_equivalent_to(term2) + result = term1.intersection(term2, term3, response_format=ResponseFormat.REGEX) - self.assertEqual(False, result) + self.assertEqual("regex=deabc", str(result)) - def test_is_subset_of(self): - with open('tests/assets/response_isSubsetOf.json') as response: + def test_compute_union(self): + with open('tests/assets/response_compute_union.json') as response: json_response = json.load(response) with requests_mock.Mocker() as mock: mock.post( - "https://api.regexsolver.com/api/analyze/subset", + "https://api.regexsolver.com/api/compute/union", json=json_response, status_code=200 ) - term1 = Term.regex(r"de") - term2 = Term.regex(r"(abc|de)") + term1 = Term.regex(r"abc") + term2 = Term.regex(r"de") + term3 = Term.regex(r"fghi") - result = term1.is_subset_of(term2) + result = term1.union(term2, term3, response_format=ResponseFormat.REGEX) - self.assertEqual(True, result) + self.assertEqual("regex=(abc|de|fghi)", str(result)) + + def test_generate_strings(self): + with open('tests/assets/response_generate_strings.json') as response: + json_response = json.load(response) + with requests_mock.Mocker() as mock: + mock.post( + "https://api.regexsolver.com/api/generate/strings", + json=json_response, status_code=200 + ) + + term = Term.regex(r"(abc|de){2}") + strings = term.generate_strings(10) + + self.assertEqual(4, len(strings)) def test_error_response(self): with open('tests/assets/response_error.json') as response: From a8dd099baaf91400cb116bd485a362bd57bf1ce6 Mon Sep 17 00:00:00 2001 From: Alexandre van Beurden <1949482+alexvbrdn@users.noreply.github.com> Date: Mon, 29 Sep 2025 21:51:12 +0200 Subject: [PATCH 02/20] update readme + add tests + caching --- README.md | 223 +++++++-------------- regexsolver/__init__.py | 49 ++++- tests/assets/response_analyze_dot.json | 4 + tests/assets/response_analyze_pattern.json | 4 + tests/term_operation_test.py | 34 ++++ 5 files changed, 164 insertions(+), 150 deletions(-) create mode 100644 tests/assets/response_analyze_dot.json create mode 100644 tests/assets/response_analyze_pattern.json diff --git a/README.md b/README.md index 7335535..ee6f636 100644 --- a/README.md +++ b/README.md @@ -11,192 +11,123 @@ they were sets. ```sh pip install --upgrade regexsolver ``` +Requirements: Python >= 3.7 -### Requirements +## Quick Start -- Python >=3.7 - -## Usage - -In order to use the library you need to generate an API Token on our [Developer Console](https://console.regexsolver.com/). +1. Create an API token in the [Developer Console](https://console.regexsolver.com/). +2. Initialize the client and start working with terms: ```python -from regexsolver import RegexSolver, Term +from regexsolver import RegexSolver, ResponseFormat, Term -RegexSolver.initialize("YOUR TOKEN HERE") +# Initialize with your API token +RegexSolver.initialize("YOUR_API_TOKEN") +# Create terms term1 = Term.regex(r"(abc|de|fg){2,}") term2 = Term.regex(r"de.*") term3 = Term.regex(r".*abc") -term4 = Term.regex(r".+(abc|de).+") +# Compute intersection and difference +result = term1.intersection(term2, term3, response_format="regex").difference( + Term.regex(r".+(abc|de).+"), response_format=ResponseFormat.REGEX +) -result = term1.intersection(term2, term3)\ - .subtraction(term4) - -print(result) +print(result) # regex=deabc ``` -## Features +## Key Concepts & Limitations -- [Intersection](#intersection) -- [Union](#union) -- [Subtraction / Difference](#subtraction--difference) -- [Equivalence](#equivalence) -- [Subset](#subset) -- [Details](#details) -- [Generate Strings](#generate-strings) +RegexSolver supports a subset of regular expressions that adhere to the principles of regular languages. Here are the key characteristics and limitations of the regular expressions supported by RegexSolver: +- **Anchored Expressions:** All regular expressions in RegexSolver are anchored. This means that the expressions are treated as if they start and end at the boundaries of the input text. For example, the expression `abc` will match the string "abc" but not "xabc" or "abcx". +- **Lookahead/Lookbehind:** RegexSolver does not support lookahead (`(?=...)`) or lookbehind (`(?<=...)`) assertions. Using them returns an error. +- **Pure Regular Expressions:** RegexSolver focuses on pure regular expressions as defined in regular language theory. This means features that extend beyond regular languages, such as backreferences (`\1`, `\2`, etc.), are not supported. Any use of backreference would return an error. +- **Greedy/Ungreedy Quantifiers:** The concept of ungreedy (`*?`, `+?`, `??`) quantifiers is not supported. All quantifiers are treated as greedy. For example, `a*` or `a*?` will match the longest possible sequence of "a"s. +- **Line Feed and Dot:** RegexSolver handles all characters the same way. The dot `.` matches any Unicode character including line feed (`\n`). +- **Empty Regular Expressions:** The empty language (matches no string) is represented by constructs like `[]` (empty character class). This is distinct from the empty string. -### Intersection +RegexSolver is based on the [regex-syntax](https://docs.rs/regex-syntax/0.8.5/regex_syntax/) library for parsing patterns. Unsupported features are parsed but ignored; they do not raise an error unless they affect semantics that cannot be represented (e.g., backreferences). This allows for some flexibility in writing regular expressions, but it is important to be aware of the unsupported features to avoid unexpected behavior. -#### Request +## Response formats -Compute the intersection of the provided terms and return the resulting term. +The API can handle terms in two formats: +- `regex`: a regular expression pattern +- `fair`: FAIR (Fast Automaton Internal Representation); a representation used internally by the RegexSolver engine. -The maximum number of terms is currently limited to 10. +For some operations returning a FAIR is cheaper for the engine. If you do not force a format, it will choose the most suitable one. To control the output, pass `response_format`: ```python -term1 = Term.regex(r"(abc|de){2}") -term2 = Term.regex(r"de.*") -term3 = Term.regex(r".*abc") - -result = term1.intersection(term2, term3) -print(result) -``` - -#### Response - -``` -regex=deabc -``` - -### Union - -Compute the union of the provided terms and return the resulting term. - -The maximum number of terms is currently limited to 10. - -#### Request - -```python -term1 = Term.regex(r"abc") -term2 = Term.regex(r"de") -term3 = Term.regex(r"fghi") - -result = term1.union(term2, term3) -print(result) -``` - -#### Response - -``` -regex=(abc|de|fghi) -``` - -### Subtraction / Difference - -Compute the first term minus the second and return the resulting term. - -#### Request - -```python -term1 = Term.regex(r"(abc|de)") -term2 = Term.regex(r"de") - -result = term1.subtraction(term2) -print(result) -``` +from regexsolver import RegexSolver, ResponseFormat, Term -#### Response +term = Term.regex(r"(ab|c){2}") +u = term.union(Term.regex(r"de"), response_format=ResponseFormat.REGEX) +print(u) # regex=((c|ab){2}|de) -``` -regex=abc +i = term.intersection(Term.regex(r"de.*"), response_format=ResponseFormat.FAIR) +print(i) # fair=... ``` -### Equivalence +If the response format does not matter the argument `response_format` can be omitted or its value can be set to `ResponseFormat.ANY`. -Analyze if the two provided terms are equivalent. +## Bounding execution time -#### Request +Long computations can be bounded with `execution_timeout` (milliseconds). Most methods on Term accepts it: ```python -term1 = Term.regex(r"(abc|de)") -term2 = Term.regex(r"(abc|de)*") - -result = term1.is_equivalent_to(term2) -print(result) +# Limit the server-side compute time to 300 ms +res = Term.regex(r"(a|b){100}").intersection( + Term.regex(r"a+"), + execution_timeout=300 +) ``` +If time is exceeded, the API will return an error. Catch `ApiError` to handle it. -#### Response +## API Overview -``` -False -``` +The client exposes three main groups of operations: -### Subset +### Analyze -Analyze if the second term is a subset of the first. +| Method | Return | Description | +| -------- | ------- | ------- | +| `t.get_details()` | `Details` | Return cardinality, length bounds, and if it is empty or total. | +| `t.get_cardinality()` | `Cardinality` | Returns the cardinality of the term (i.e., the number of possible matched strings). | +| `t.get_length()` | `Length` | Returns the minimum and maximum length of matched strings. | +| `t.is_empty()` | `bool` | `True` if the term matches no string. | +| `t.is_total()` | `bool` | `True` if the term matches all possible strings. | +| `t.is_empty_string()` | `bool` | `True` if the term matches only the empty string. | +| `t.equivalent(term: Term)` | `bool` | `True` if `t` and `term` accept exactly the same language. Supports `execution_timeout`. | +| `t.subset(term: Term)` | `bool` | `True` if every string matched by `t` is also matched by `term`. Supports `execution_timeout`. | +| `t.get_dot()` | `str` | Return a GraphViz DOT representation of the automaton for the term. | +| `t.get_pattern()` | `str` | Return a regular expression pattern for the term. | -#### Request +### Compute -```java -term1 = Term.regex(r"de") -term2 = Term.regex(r"(abc|de)") +| Method | Return | Description | +| -------- | ------- | ------- | +| `t.concat(*terms: Term)` | `Term` | Concatenate `t` with the given terms. Supports `response_format` and `execution_timeout`. | +| `t.union(*terms: Term)` | `Term` | Compute the union of `t` with the given terms. Supports `response_format` and `execution_timeout`. | +| `t.intersection(*terms: Term)` | `Term` | Compute the intersection of `t` with the given terms. Supports `response_format` and `execution_timeout`. | +| `t.difference(term: Term)` | `Term` | Compute the difference `t - term`. Supports `response_format` and `execution_timeout`. | +| `t.repeat(min: int, max: Optional[int])` | `Term` | Computes the repetition of the term between `min` and `max` times; if `max` is `None`, the repetition is unbounded. Supports `response_format` and `execution_timeout`. | -result = term1.is_subset_of(term2) -print(result) -``` +### Generate -#### Response +| Method | Return | Description | +| -------- | ------- | ------- | +| `t.generate_strings(count: int)` | `List[str]` | Generate up to `count` unique example strings matched by `t`. Supports `execution_timeout`. | -``` -True -``` +## Cross-Language Support -### Details +If you want to use this library with other programming languages, we provide a wide range of wrappers: +- [regexsolver-java](https://github.com/RegexSolver/regexsolver-java) +- [regexsolver-js](https://github.com/RegexSolver/regexsolver-js) -Compute the details of the provided term. - -The computed details are: - -- **Cardinality:** the number of possible values. -- **Length:** the minimum and maximum length of possible values. -- **Empty:** true if is an empty set (does not contain any value), false otherwise. -- **Total:** true if is a total set (contains all values), false otherwise. - -#### Request - -```python -term = Term.regex(r"(abc|de)") - -details = term.get_details() -print(details) -``` +For more information about how to use the wrappers, you can refer to our [guide](https://docs.regexsolver.com/getting-started.html). -#### Response +If you want to run the engine yourself you can also take a look at [regexsolver](https://github.com/RegexSolver/regexsolver). -``` -Details[cardinality=Integer(2), length=Length[minimum=2, maximum=3], empty=false, total=false] -``` - -### Generate Strings - -Generate the given number of strings that can be matched by the provided term. - -The maximum number of strings to generate is currently limited to 200. - -#### Request - -```python -term = Term.regex(r"(abc|de){2}") - -strings = term.generate_strings(3) -print(strings) -``` - -#### Response - -``` -['deabc', 'abcde', 'dede'] -``` +## License +This project is licensed under the MIT License. diff --git a/regexsolver/__init__.py b/regexsolver/__init__.py index a107e23..31c9169 100644 --- a/regexsolver/__init__.py +++ b/regexsolver/__init__.py @@ -97,6 +97,9 @@ def _analyze_empty_string(self, term: 'Term') -> bool: def _analyze_dot(self, term: 'Term') -> str: return self._request('api/analyze/dot', term).get('value') + def _analyze_pattern(self, term: 'Term') -> str: + return self._request('api/analyze/pattern', term).get('value') + # Compute def _compute_repeat(self, request: 'RepeatRequest') -> 'Term': @@ -140,9 +143,13 @@ class Term(BaseModel): type: TermType value: str _details: Optional['Details'] = None + _cardinality: Optional[Cardinality] = None + _length: Optional[Length] = None _empty: Optional[bool] = None _total: Optional[bool] = None _empty_string: Optional[bool] = None + _dot: Optional[str] = None + _pattern: Optional[str] = None model_config = {"use_enum_values": True} @@ -171,10 +178,17 @@ def get_fair(self) -> Optional[str]: def get_pattern(self) -> Optional[str]: """ Return the regular expression pattern. + + If the term is not a regex the pattern will be resolved. + Results are cached on the instance to avoid repeated API calls. """ if self.type == TermType.REGEX: return self.value - return None + elif self._pattern: + return self._pattern + else: + self._pattern = RegexSolver.get_instance()._analyze_pattern(self) + return self._pattern def get_details(self) -> Details: """ @@ -299,6 +313,8 @@ def is_empty(self) -> bool: """ if self._empty: return self._empty + elif self._details: + return self._details.empty else: self._empty = RegexSolver.get_instance()._analyze_empty(self) return self._empty @@ -311,6 +327,8 @@ def is_total(self) -> bool: """ if self._total: return self._total + elif self._details: + return self._details.total else: self._total = RegexSolver.get_instance()._analyze_total(self) return self._total @@ -330,31 +348,54 @@ def is_empty_string(self) -> bool: def get_dot(self) -> str: """ Get the GraphViz DOT representation of this term. + + Results are cached on the instance to avoid repeated API calls. Returns: A DOT language string describing the automaton for this term. """ - return RegexSolver.get_instance()._analyze_dot(self) + if self._dot: + return self._dot + else: + self._dot = RegexSolver.get_instance()._analyze_dot(self) + return self._dot def get_cardinality(self) -> Cardinality: """ Get the cardinality of this term. + + Results are cached on the instance to avoid repeated API calls. Returns: A `Cardinality` object describing how many distinct strings are matched. """ - return RegexSolver.get_instance()._analyze_cardinality(self) + + if self._cardinality: + return self._cardinality + elif self._details: + return self._details.cardinality + else: + self._cardinality = RegexSolver.get_instance()._analyze_cardinality(self) + return self._cardinality def get_length(self) -> Length: """ Get the length bounds of this term. + + Results are cached on the instance to avoid repeated API calls. Returns: A `Length` object with the minimum and maximum string length matched by this term. """ - return RegexSolver.get_instance()._analyze_length(self) + if self._length: + return self._length + elif self._length: + return self._details.length + else: + self._length = RegexSolver.get_instance()._analyze_length(self) + return self._length def serialize(self) -> str: """ diff --git a/tests/assets/response_analyze_dot.json b/tests/assets/response_analyze_dot.json new file mode 100644 index 0000000..5b71397 --- /dev/null +++ b/tests/assets/response_analyze_dot.json @@ -0,0 +1,4 @@ +{ + "type": "string", + "value": "digraph G { ... }" +} \ No newline at end of file diff --git a/tests/assets/response_analyze_pattern.json b/tests/assets/response_analyze_pattern.json new file mode 100644 index 0000000..42cb41f --- /dev/null +++ b/tests/assets/response_analyze_pattern.json @@ -0,0 +1,4 @@ +{ + "type": "string", + "value": "abc.*" +} \ No newline at end of file diff --git a/tests/term_operation_test.py b/tests/term_operation_test.py index da997da..d814a9e 100644 --- a/tests/term_operation_test.py +++ b/tests/term_operation_test.py @@ -77,6 +77,23 @@ def test_analyze_details_empty(self): str(details) ) + def test_analyze_dot(self): + with open('tests/assets/response_analyze_dot.json') as response: + json_response = json.load(response) + with requests_mock.Mocker() as mock: + mock.post( + "https://api.regexsolver.com/api/analyze/dot", + json=json_response, status_code=200 + ) + + term = Term.regex(r"(abc|de)") + dot = term.get_dot() + + self.assertEqual( + "digraph G { ... }", + str(dot) + ) + def test_analyze_empty_string(self): with open('tests/assets/response_analyze_empty_string.json') as response: json_response = json.load(response) @@ -172,6 +189,23 @@ def test_analyze_length(self): "Length[minimum=0, maximum=3]", str(length) ) + + def test_analyze_pattern(self): + with open('tests/assets/response_analyze_pattern.json') as response: + json_response = json.load(response) + with requests_mock.Mocker() as mock: + mock.post( + "https://api.regexsolver.com/api/analyze/pattern", + json=json_response, status_code=200 + ) + + term = Term.regex(r"abc.*") + pattern = term.get_pattern() + + self.assertEqual( + "abc.*", + str(pattern) + ) def test_analyze_subset(self): with open('tests/assets/response_analyze_subset.json') as response: From 41a01b37d634f30de0aa9dec4f29487589737244 Mon Sep 17 00:00:00 2001 From: Alexandre van Beurden <1949482+alexvbrdn@users.noreply.github.com> Date: Wed, 8 Oct 2025 21:49:31 +0200 Subject: [PATCH 03/20] readme wip --- README.md | 54 +++++++++++++++++++++++++++++++----------------------- 1 file changed, 31 insertions(+), 23 deletions(-) diff --git a/README.md b/README.md index ee6f636..cfef2b0 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ # RegexSolver Python API Client [Homepage](https://regexsolver.com) | [Online Demo](https://regexsolver.com/demo) | [Documentation](https://docs.regexsolver.com) | [Developer Console](https://console.regexsolver.com) -This repository contains the source code of the Python library for [RegexSolver](https://regexsolver.com) API. +Python client for the RegexSolver API. RegexSolver is a powerful regular expression manipulation toolkit, that gives you the power to manipulate regex as if they were sets. @@ -19,7 +19,7 @@ Requirements: Python >= 3.7 2. Initialize the client and start working with terms: ```python -from regexsolver import RegexSolver, ResponseFormat, Term +from regexsolver import RegexSolver, Term # Initialize with your API token RegexSolver.initialize("YOUR_API_TOKEN") @@ -30,11 +30,11 @@ term2 = Term.regex(r"de.*") term3 = Term.regex(r".*abc") # Compute intersection and difference -result = term1.intersection(term2, term3, response_format="regex").difference( - Term.regex(r".+(abc|de).+"), response_format=ResponseFormat.REGEX +result = term1.intersection(term2, term3).difference( + Term.regex(r".+(abc|de).+") ) -print(result) # regex=deabc +print(result.get_pattern()) # de(fg)*abc ``` ## Key Concepts & Limitations @@ -47,41 +47,49 @@ RegexSolver supports a subset of regular expressions that adhere to the principl - **Line Feed and Dot:** RegexSolver handles all characters the same way. The dot `.` matches any Unicode character including line feed (`\n`). - **Empty Regular Expressions:** The empty language (matches no string) is represented by constructs like `[]` (empty character class). This is distinct from the empty string. -RegexSolver is based on the [regex-syntax](https://docs.rs/regex-syntax/0.8.5/regex_syntax/) library for parsing patterns. Unsupported features are parsed but ignored; they do not raise an error unless they affect semantics that cannot be represented (e.g., backreferences). This allows for some flexibility in writing regular expressions, but it is important to be aware of the unsupported features to avoid unexpected behavior. - ## Response formats The API can handle terms in two formats: - `regex`: a regular expression pattern -- `fair`: FAIR (Fast Automaton Internal Representation); a representation used internally by the RegexSolver engine. +- `fair`: FAIR (Fast Automaton Internal Representation); a representation used internally by the RegexSolver engine + +FAIR is a stable, versioned internal format intended for programmatic use. -For some operations returning a FAIR is cheaper for the engine. If you do not force a format, it will choose the most suitable one. To control the output, pass `response_format`: +For some operations, returning FAIR is cheaper. If you do not force a format, it will choose the most suitable one. To control the output, pass `response_format`: ```python from regexsolver import RegexSolver, ResponseFormat, Term -term = Term.regex(r"(ab|c){2}") -u = term.union(Term.regex(r"de"), response_format=ResponseFormat.REGEX) -print(u) # regex=((c|ab){2}|de) +term = Term.regex(r"abcde") +result = term.union(Term.regex(r"de"), response_format=ResponseFormat.REGEX) +print(result) # regex=(abc)?de -i = term.intersection(Term.regex(r"de.*"), response_format=ResponseFormat.FAIR) -print(i) # fair=... +result = term.intersection(Term.regex(r"de.*"), response_format=ResponseFormat.FAIR) +print(result) # fair=... ``` If the response format does not matter the argument `response_format` can be omitted or its value can be set to `ResponseFormat.ANY`. +Regardless of a term's internal format, call `get_pattern()` to obtain a regex string. + ## Bounding execution time -Long computations can be bounded with `execution_timeout` (milliseconds). Most methods on Term accepts it: +Set a server-side compute timeout in milliseconds with `execution_timeout`: ```python -# Limit the server-side compute time to 300 ms -res = Term.regex(r"(a|b){100}").intersection( - Term.regex(r"a+"), - execution_timeout=300 -) +from regexsolver import ApiError, RegexSolver, Term + +# Limit the server-side compute time to 5 ms +try: + res = Term.regex(r".*ab.*c(de|fg).*dab.*c(de|fg).*ab.*c(de|fg).*dab.*c").difference( + Term.regex(r".*abc.*"), + execution_timeout=5 + ) +except ApiError as error: + print(error) # The API returned the following error: The operation took too much time. ``` -If time is exceeded, the API will return an error. Catch `ApiError` to handle it. + +There is no guarantee that the exact time will be respected. ## API Overview @@ -120,13 +128,13 @@ The client exposes three main groups of operations: ## Cross-Language Support -If you want to use this library with other programming languages, we provide a wide range of wrappers: +If you want to use this library with other programming languages, we provide: - [regexsolver-java](https://github.com/RegexSolver/regexsolver-java) - [regexsolver-js](https://github.com/RegexSolver/regexsolver-js) For more information about how to use the wrappers, you can refer to our [guide](https://docs.regexsolver.com/getting-started.html). -If you want to run the engine yourself you can also take a look at [regexsolver](https://github.com/RegexSolver/regexsolver). +You can also take a look at [regexsolver](https://github.com/RegexSolver/regexsolver) which contains the source code of the engine. ## License From 96626c6450e942ad8cf676447b4f0befd02cfd18 Mon Sep 17 00:00:00 2001 From: Alexandre van Beurden <1949482+alexvbrdn@users.noreply.github.com> Date: Mon, 13 Oct 2025 20:49:30 +0200 Subject: [PATCH 04/20] Add dotenv --- regexsolver/__init__.py | 52 ++++++++++++++++++++++++++--------------- requirements.txt | 3 ++- 2 files changed, 35 insertions(+), 20 deletions(-) diff --git a/regexsolver/__init__.py b/regexsolver/__init__.py index 31c9169..2bea193 100644 --- a/regexsolver/__init__.py +++ b/regexsolver/__init__.py @@ -1,11 +1,13 @@ from enum import Enum +from importlib import metadata +import os from regexsolver.details import Details, Cardinality, Length from typing import List, Optional -from pydantic import Field, BaseModel +from pydantic import BaseModel import requests - +from dotenv import load_dotenv class ApiError(Exception): """ @@ -24,12 +26,18 @@ def __init__(self): raise Exception("This class is a singleton.") else: RegexSolver._instance = self - self.base_url = "https://api.regexsolver.com/" - self.api_token = None - self.headers = { + + load_dotenv() + + self._base_url = os.environ.get("REGEXSOLVER_BASE_URL", "https://api.regexsolver.com") + self._api_token = os.environ.get("REGEXSOLVER_API_TOKEN") or None + + self._headers = { 'User-Agent': 'RegexSolver Python / 1.1.0', 'Content-Type': 'application/json' } + if self._api_token: + self._headers['Authorization'] = f'Bearer {self._api_token}' @classmethod def get_instance(cls): @@ -40,22 +48,22 @@ def get_instance(cls): @classmethod def initialize(cls, api_token: str, base_url: str = None): instance = cls.get_instance() - instance.api_token = api_token + instance._api_token = api_token if base_url: - instance.base_url = base_url + instance._base_url = base_url - instance.headers['Authorization'] = f'Bearer {instance.api_token}' + instance._headers['Authorization'] = f'Bearer {instance._api_token}' def _get_request_url(self, endpoint: str) -> str: - if self.base_url.endswith('/'): - return self.base_url + endpoint + if self._base_url.endswith('/'): + return self._base_url + endpoint else: - return self.base_url + '/' + endpoint + return self._base_url + '/' + endpoint def _request(self, endpoint: str, request: BaseModel) -> dict: response = requests.post( self._get_request_url(endpoint), - headers=self.headers, + headers=self._headers, json=request.model_dump(exclude_none=True) ) @@ -457,15 +465,21 @@ class ExecutionOptions(BaseModel): class RequestOptions(BaseModel): schema_version: int = 1 - response: ResponseOptions = Field(default_factory=ResponseOptions) - execution: ExecutionOptions = Field(default_factory=ExecutionOptions) + response: Optional[ResponseOptions] = None + execution: Optional[ExecutionOptions] = None @classmethod - def from_args(cls, response_format: ResponseFormat = None, execution_timeout: int = None): - return cls( - response=ResponseOptions(format=response_format), - execution=ExecutionOptions(timeout=execution_timeout), - ) + def from_args(cls, response_format: ResponseFormat = None, execution_timeout: int = None) -> "RequestOptions | None": + response = None + if response_format: + response=ResponseOptions(format=response_format) + execution = None + if execution_timeout: + execution=ExecutionOptions(timeout=execution_timeout) + if response or execution: + return cls(response=response, execution=execution) + else: + return None class MultiTermsRequest(BaseModel): terms: List[Term] diff --git a/requirements.txt b/requirements.txt index 63b3919..767ba20 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,4 @@ requests>=2.20.0 pydantic<=2.5.3, >2.4.0; python_version<"3.8" -pydantic>=2.6.0; python_version>="3.8" \ No newline at end of file +pydantic>=2.6.0; python_version>="3.8" +python-dotenv==1.1.1 \ No newline at end of file From 8cb340e203de18d2f9fc9d4bb504d4b8c2ade3bd Mon Sep 17 00:00:00 2001 From: Alexandre van Beurden <1949482+alexvbrdn@users.noreply.github.com> Date: Mon, 13 Oct 2025 21:34:44 +0200 Subject: [PATCH 05/20] Env variables should be read in initialize --- README.md | 25 ++++++++++++------------- regexsolver/__init__.py | 20 +++++++++----------- requirements.txt | 3 +-- 3 files changed, 22 insertions(+), 26 deletions(-) diff --git a/README.md b/README.md index cfef2b0..6f15048 100644 --- a/README.md +++ b/README.md @@ -21,8 +21,9 @@ Requirements: Python >= 3.7 ```python from regexsolver import RegexSolver, Term -# Initialize with your API token -RegexSolver.initialize("YOUR_API_TOKEN") +# Set REGEXSOLVER_API_TOKEN in your env and call initialize(), +# or pass the token directly: +RegexSolver.initialize() # or RegexSolver.initialize("YOUR_API_TOKEN") # Create terms term1 = Term.regex(r"(abc|de|fg){2,}") @@ -47,15 +48,13 @@ RegexSolver supports a subset of regular expressions that adhere to the principl - **Line Feed and Dot:** RegexSolver handles all characters the same way. The dot `.` matches any Unicode character including line feed (`\n`). - **Empty Regular Expressions:** The empty language (matches no string) is represented by constructs like `[]` (empty character class). This is distinct from the empty string. -## Response formats +## Response Formats The API can handle terms in two formats: - `regex`: a regular expression pattern -- `fair`: FAIR (Fast Automaton Internal Representation); a representation used internally by the RegexSolver engine +- `fair`: FAIR (Fast Automaton Internal Representation), a stable, versioned programmatic format -FAIR is a stable, versioned internal format intended for programmatic use. - -For some operations, returning FAIR is cheaper. If you do not force a format, it will choose the most suitable one. To control the output, pass `response_format`: +If you do not force a format, the server picks the most efficient one. Control it with `response_format`: ```python from regexsolver import RegexSolver, ResponseFormat, Term @@ -68,9 +67,9 @@ result = term.intersection(Term.regex(r"de.*"), response_format=ResponseFormat.F print(result) # fair=... ``` -If the response format does not matter the argument `response_format` can be omitted or its value can be set to `ResponseFormat.ANY`. +If the format does not matter, omit `response_format` or set `ResponseFormat.ANY`. -Regardless of a term's internal format, call `get_pattern()` to obtain a regex string. +Regardless of internal format, use `get_pattern()` to obtain a regex string. ## Bounding execution time @@ -89,7 +88,7 @@ except ApiError as error: print(error) # The API returned the following error: The operation took too much time. ``` -There is no guarantee that the exact time will be respected. +Timeout is best effort. The exact time is not guaranteed. ## API Overview @@ -100,14 +99,14 @@ The client exposes three main groups of operations: | Method | Return | Description | | -------- | ------- | ------- | | `t.get_details()` | `Details` | Return cardinality, length bounds, and if it is empty or total. | -| `t.get_cardinality()` | `Cardinality` | Returns the cardinality of the term (i.e., the number of possible matched strings). | -| `t.get_length()` | `Length` | Returns the minimum and maximum length of matched strings. | +| `t.get_cardinality()` | `Cardinality` | Return the cardinality of the term (i.e., the number of possible matched strings). | +| `t.get_length()` | `Length` | Return the minimum and maximum length of matched strings. | | `t.is_empty()` | `bool` | `True` if the term matches no string. | | `t.is_total()` | `bool` | `True` if the term matches all possible strings. | | `t.is_empty_string()` | `bool` | `True` if the term matches only the empty string. | | `t.equivalent(term: Term)` | `bool` | `True` if `t` and `term` accept exactly the same language. Supports `execution_timeout`. | | `t.subset(term: Term)` | `bool` | `True` if every string matched by `t` is also matched by `term`. Supports `execution_timeout`. | -| `t.get_dot()` | `str` | Return a GraphViz DOT representation of the automaton for the term. | +| `t.get_dot()` | `str` | Return a Graphviz DOT representation of the automaton for the term. | | `t.get_pattern()` | `str` | Return a regular expression pattern for the term. | ### Compute diff --git a/regexsolver/__init__.py b/regexsolver/__init__.py index 2bea193..71e95cd 100644 --- a/regexsolver/__init__.py +++ b/regexsolver/__init__.py @@ -7,7 +7,6 @@ from typing import List, Optional from pydantic import BaseModel import requests -from dotenv import load_dotenv class ApiError(Exception): """ @@ -26,18 +25,11 @@ def __init__(self): raise Exception("This class is a singleton.") else: RegexSolver._instance = self - - load_dotenv() - - self._base_url = os.environ.get("REGEXSOLVER_BASE_URL", "https://api.regexsolver.com") - self._api_token = os.environ.get("REGEXSOLVER_API_TOKEN") or None - + self._headers = { 'User-Agent': 'RegexSolver Python / 1.1.0', 'Content-Type': 'application/json' } - if self._api_token: - self._headers['Authorization'] = f'Bearer {self._api_token}' @classmethod def get_instance(cls): @@ -46,11 +38,17 @@ def get_instance(cls): return cls._instance @classmethod - def initialize(cls, api_token: str, base_url: str = None): + def initialize(cls, api_token: str = None, base_url: str = None): instance = cls.get_instance() - instance._api_token = api_token + if api_token: + instance._api_token = api_token + else: + instance._api_token = os.environ.get("REGEXSOLVER_API_TOKEN") or None + if base_url: instance._base_url = base_url + else: + instance._base_url = os.environ.get("REGEXSOLVER_BASE_URL", "https://api.regexsolver.com") instance._headers['Authorization'] = f'Bearer {instance._api_token}' diff --git a/requirements.txt b/requirements.txt index 767ba20..63b3919 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,3 @@ requests>=2.20.0 pydantic<=2.5.3, >2.4.0; python_version<"3.8" -pydantic>=2.6.0; python_version>="3.8" -python-dotenv==1.1.1 \ No newline at end of file +pydantic>=2.6.0; python_version>="3.8" \ No newline at end of file From da3bdf78d6e522e0ec6718e54a650ec8d3878569 Mon Sep 17 00:00:00 2001 From: Alexandre van Beurden <1949482+alexvbrdn@users.noreply.github.com> Date: Mon, 13 Oct 2025 21:36:42 +0200 Subject: [PATCH 06/20] Update readme --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 6f15048..255b9e4 100644 --- a/README.md +++ b/README.md @@ -117,7 +117,7 @@ The client exposes three main groups of operations: | `t.union(*terms: Term)` | `Term` | Compute the union of `t` with the given terms. Supports `response_format` and `execution_timeout`. | | `t.intersection(*terms: Term)` | `Term` | Compute the intersection of `t` with the given terms. Supports `response_format` and `execution_timeout`. | | `t.difference(term: Term)` | `Term` | Compute the difference `t - term`. Supports `response_format` and `execution_timeout`. | -| `t.repeat(min: int, max: Optional[int])` | `Term` | Computes the repetition of the term between `min` and `max` times; if `max` is `None`, the repetition is unbounded. Supports `response_format` and `execution_timeout`. | +| `t.repeat(min: int, max: Optional[int])` | `Term` | Compute the repetition of the term between `min` and `max` times; if `max` is `None`, the repetition is unbounded. Supports `response_format` and `execution_timeout`. | ### Generate From 360cbd40e124f841d549fe5b9a51626b72c0dc7e Mon Sep 17 00:00:00 2001 From: Alexandre van Beurden <1949482+alexvbrdn@users.noreply.github.com> Date: Mon, 13 Oct 2025 21:59:03 +0200 Subject: [PATCH 07/20] Update readme --- README.md | 43 ++++++++++++++++++++++++++++--------------- 1 file changed, 28 insertions(+), 15 deletions(-) diff --git a/README.md b/README.md index 255b9e4..da56ac3 100644 --- a/README.md +++ b/README.md @@ -69,7 +69,7 @@ print(result) # fair=... If the format does not matter, omit `response_format` or set `ResponseFormat.ANY`. -Regardless of internal format, use `get_pattern()` to obtain a regex string. +Regardless of internal format, call `get_pattern()` to obtain a regex string. ## Bounding execution time @@ -92,38 +92,51 @@ Timeout is best effort. The exact time is not guaranteed. ## API Overview -The client exposes three main groups of operations: +`Term` exposes the following methods. + +### Build +| Method | Return | Description | +| -------- | ------- | ------- | +| `Term.fair(fair: str)` | `Term` | Creates a term from FAIR. | +| `Term.regex(regex: str)` | `Term` | Creates a term from a regex pattern. | ### Analyze | Method | Return | Description | | -------- | ------- | ------- | -| `t.get_details()` | `Details` | Return cardinality, length bounds, and if it is empty or total. | -| `t.get_cardinality()` | `Cardinality` | Return the cardinality of the term (i.e., the number of possible matched strings). | -| `t.get_length()` | `Length` | Return the minimum and maximum length of matched strings. | +| `t.equivalent(term: Term)` | `bool` | `True` if `t` and `term` accept exactly the same language. Supports `execution_timeout`. | +| `t.get_cardinality()` | `Cardinality` | Returns the cardinality of the term (i.e., the number of possible matched strings). | +| `t.get_details()` | `Details` | Returns cardinality, length bounds, and if it is empty or total. | +| `t.get_dot()` | `str` | Returns a Graphviz DOT representation of the automaton for the term. | +| `t.get_fair()` | `str` | Returns the FAIR of the term if defined. | +| `t.get_length()` | `Length` | Returns the minimum and maximum length of matched strings. | +| `t.get_pattern()` | `str` | Returns a regular expression pattern for the term. | | `t.is_empty()` | `bool` | `True` if the term matches no string. | -| `t.is_total()` | `bool` | `True` if the term matches all possible strings. | | `t.is_empty_string()` | `bool` | `True` if the term matches only the empty string. | -| `t.equivalent(term: Term)` | `bool` | `True` if `t` and `term` accept exactly the same language. Supports `execution_timeout`. | +| `t.is_total()` | `bool` | `True` if the term matches all possible strings. | | `t.subset(term: Term)` | `bool` | `True` if every string matched by `t` is also matched by `term`. Supports `execution_timeout`. | -| `t.get_dot()` | `str` | Return a Graphviz DOT representation of the automaton for the term. | -| `t.get_pattern()` | `str` | Return a regular expression pattern for the term. | ### Compute | Method | Return | Description | | -------- | ------- | ------- | -| `t.concat(*terms: Term)` | `Term` | Concatenate `t` with the given terms. Supports `response_format` and `execution_timeout`. | -| `t.union(*terms: Term)` | `Term` | Compute the union of `t` with the given terms. Supports `response_format` and `execution_timeout`. | -| `t.intersection(*terms: Term)` | `Term` | Compute the intersection of `t` with the given terms. Supports `response_format` and `execution_timeout`. | -| `t.difference(term: Term)` | `Term` | Compute the difference `t - term`. Supports `response_format` and `execution_timeout`. | -| `t.repeat(min: int, max: Optional[int])` | `Term` | Compute the repetition of the term between `min` and `max` times; if `max` is `None`, the repetition is unbounded. Supports `response_format` and `execution_timeout`. | +| `t.concat(*terms: Term)` | `Term` | Concatenates `t` with the given terms. Supports `response_format` and `execution_timeout`. | +| `t.difference(term: Term)` | `Term` | Computes the difference `t - term`. Supports `response_format` and `execution_timeout`. | +| `t.intersection(*terms: Term)` | `Term` | Computes the intersection of `t` with the given terms. Supports `response_format` and `execution_timeout`. | +| `t.repeat(min: int, max: Optional[int])` | `Term` | Computes the repetition of the term between `min` and `max` times; if `max` is `None`, the repetition is unbounded. Supports `response_format` and `execution_timeout`. | +| `t.union(*terms: Term)` | `Term` | Computes the union of `t` with the given terms. Supports `response_format` and `execution_timeout`. | ### Generate | Method | Return | Description | | -------- | ------- | ------- | -| `t.generate_strings(count: int)` | `List[str]` | Generate up to `count` unique example strings matched by `t`. Supports `execution_timeout`. | +| `t.generate_strings(count: int)` | `List[str]` | Generates up to `count` unique example strings matched by `t`. Supports `execution_timeout`. | + +### Other +| Method | Return | Description | +| -------- | ------- | ------- | +| `t.serialize()` | `str` | Returns a serialized form of `t`. | +| `Term.deserialize(string: str)` | `Term` | Returns a deserialized term. | ## Cross-Language Support From 6ad21e7dd530c80a8f0278257cbc4c12b2d090e3 Mon Sep 17 00:00:00 2001 From: Alexandre van Beurden <1949482+alexvbrdn@users.noreply.github.com> Date: Mon, 13 Oct 2025 22:02:08 +0200 Subject: [PATCH 08/20] update readme --- README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index da56ac3..3091bb9 100644 --- a/README.md +++ b/README.md @@ -52,7 +52,7 @@ RegexSolver supports a subset of regular expressions that adhere to the principl The API can handle terms in two formats: - `regex`: a regular expression pattern -- `fair`: FAIR (Fast Automaton Internal Representation), a stable, versioned programmatic format +- `fair`: FAIR (Fast Automaton Internal Representation), a stable, versioned programmatic format used internally by the engine If you do not force a format, the server picks the most efficient one. Control it with `response_format`: @@ -67,7 +67,7 @@ result = term.intersection(Term.regex(r"de.*"), response_format=ResponseFormat.F print(result) # fair=... ``` -If the format does not matter, omit `response_format` or set `ResponseFormat.ANY`. +If the format does not matter, omit `response_format` or set it to `ResponseFormat.ANY`. Regardless of internal format, call `get_pattern()` to obtain a regex string. @@ -136,7 +136,7 @@ Timeout is best effort. The exact time is not guaranteed. | Method | Return | Description | | -------- | ------- | ------- | | `t.serialize()` | `str` | Returns a serialized form of `t`. | -| `Term.deserialize(string: str)` | `Term` | Returns a deserialized term. | +| `Term.deserialize(string: str)` | `Term` | Returns a deserialized term from the given `string`. | ## Cross-Language Support From 0a7b773f108388f941f14fab231b1f2e918b0827 Mon Sep 17 00:00:00 2001 From: Alexandre van Beurden <1949482+alexvbrdn@users.noreply.github.com> Date: Mon, 13 Oct 2025 22:05:50 +0200 Subject: [PATCH 09/20] update readme --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 3091bb9..b2a291c 100644 --- a/README.md +++ b/README.md @@ -97,7 +97,7 @@ Timeout is best effort. The exact time is not guaranteed. ### Build | Method | Return | Description | | -------- | ------- | ------- | -| `Term.fair(fair: str)` | `Term` | Creates a term from FAIR. | +| `Term.fair(fair: str)` | `Term` | Creates a term from a FAIR. | | `Term.regex(regex: str)` | `Term` | Creates a term from a regex pattern. | ### Analyze From 63ff2b641aed57f292521ae303971d468878ee96 Mon Sep 17 00:00:00 2001 From: Alexandre van Beurden <1949482+alexvbrdn@users.noreply.github.com> Date: Mon, 13 Oct 2025 22:19:54 +0200 Subject: [PATCH 10/20] update readme --- README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index b2a291c..05e3b4d 100644 --- a/README.md +++ b/README.md @@ -52,9 +52,9 @@ RegexSolver supports a subset of regular expressions that adhere to the principl The API can handle terms in two formats: - `regex`: a regular expression pattern -- `fair`: FAIR (Fast Automaton Internal Representation), a stable, versioned programmatic format used internally by the engine +- `fair`: FAIR (Fast Automaton Internal Representation), a stable, signed format used internally by the engine -If you do not force a format, the server picks the most efficient one. Control it with `response_format`: +By default, the server returns whatever the operation produces, with no extra convertion. Override with `response_format`: ```python from regexsolver import RegexSolver, ResponseFormat, Term @@ -69,7 +69,7 @@ print(result) # fair=... If the format does not matter, omit `response_format` or set it to `ResponseFormat.ANY`. -Regardless of internal format, call `get_pattern()` to obtain a regex string. +Regardless of internal format, you can call `get_pattern()` to obtain a regex string. ## Bounding execution time From 58a8146238721cfbb64b417c510d36e191118b84 Mon Sep 17 00:00:00 2001 From: Alexandre van Beurden <1949482+alexvbrdn@users.noreply.github.com> Date: Mon, 13 Oct 2025 22:21:23 +0200 Subject: [PATCH 11/20] update readme --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 05e3b4d..727d68b 100644 --- a/README.md +++ b/README.md @@ -54,7 +54,7 @@ The API can handle terms in two formats: - `regex`: a regular expression pattern - `fair`: FAIR (Fast Automaton Internal Representation), a stable, signed format used internally by the engine -By default, the server returns whatever the operation produces, with no extra convertion. Override with `response_format`: +By default, the engine returns whatever the operation produces, with no extra convertion. Override with `response_format`: ```python from regexsolver import RegexSolver, ResponseFormat, Term From 8415d288e0b97bd5952453eb6ba92db75fc479f8 Mon Sep 17 00:00:00 2001 From: Alexandre van Beurden <1949482+alexvbrdn@users.noreply.github.com> Date: Mon, 13 Oct 2025 22:23:35 +0200 Subject: [PATCH 12/20] update readme --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 727d68b..87a80f9 100644 --- a/README.md +++ b/README.md @@ -69,7 +69,7 @@ print(result) # fair=... If the format does not matter, omit `response_format` or set it to `ResponseFormat.ANY`. -Regardless of internal format, you can call `get_pattern()` to obtain a regex string. +Regardless of internal format, you can always call `get_pattern()` to obtain the regex pattern of a term. ## Bounding execution time From b4e45141f5e64fb57339683b3e4784eb2cb321ea Mon Sep 17 00:00:00 2001 From: Alexandre van Beurden <1949482+alexvbrdn@users.noreply.github.com> Date: Mon, 13 Oct 2025 22:24:50 +0200 Subject: [PATCH 13/20] update readme --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 87a80f9..0e128ae 100644 --- a/README.md +++ b/README.md @@ -69,7 +69,7 @@ print(result) # fair=... If the format does not matter, omit `response_format` or set it to `ResponseFormat.ANY`. -Regardless of internal format, you can always call `get_pattern()` to obtain the regex pattern of a term. +Regardless of the format, you can always call `get_pattern()` to obtain the regex pattern of a term. ## Bounding execution time From bee91137bfb8fe8d95187c82e97da148122d2321 Mon Sep 17 00:00:00 2001 From: Alexandre van Beurden <1949482+alexvbrdn@users.noreply.github.com> Date: Tue, 14 Oct 2025 08:55:42 +0200 Subject: [PATCH 14/20] Update README.md --- README.md | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/README.md b/README.md index 0e128ae..71457be 100644 --- a/README.md +++ b/README.md @@ -1,10 +1,7 @@ # RegexSolver Python API Client [Homepage](https://regexsolver.com) | [Online Demo](https://regexsolver.com/demo) | [Documentation](https://docs.regexsolver.com) | [Developer Console](https://console.regexsolver.com) -Python client for the RegexSolver API. - -RegexSolver is a powerful regular expression manipulation toolkit, that gives you the power to manipulate regex as if -they were sets. +**RegexSolver** is a powerful regular expression manipulation toolkit that lets you manipulate regular expressions as if they were sets. It provides a powerful API to perform operations like union, intersection, and difference on regex patterns, enabling advanced regex analysis and transformation. ## Installation From b8fa9946df54683680f053aa894aa8bb1a750aab Mon Sep 17 00:00:00 2001 From: Alexandre van Beurden <1949482+alexvbrdn@users.noreply.github.com> Date: Tue, 14 Oct 2025 10:08:21 +0200 Subject: [PATCH 15/20] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 71457be..d7cb56e 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ # RegexSolver Python API Client [Homepage](https://regexsolver.com) | [Online Demo](https://regexsolver.com/demo) | [Documentation](https://docs.regexsolver.com) | [Developer Console](https://console.regexsolver.com) -**RegexSolver** is a powerful regular expression manipulation toolkit that lets you manipulate regular expressions as if they were sets. It provides a powerful API to perform operations like union, intersection, and difference on regex patterns, enabling advanced regex analysis and transformation. +**RegexSolver** is a powerful toolkit for building, combining, and analyzing regular expressions. It is designed for constraint solvers, test generators, and other systems that need advanced regex operations. ## Installation From 585c8bd3f906f0c6ea7f09119c25100fe3b327cd Mon Sep 17 00:00:00 2001 From: Alexandre van Beurden <1949482+alexvbrdn@users.noreply.github.com> Date: Tue, 14 Oct 2025 21:13:57 +0200 Subject: [PATCH 16/20] Update project --- pyproject.toml | 6 +++--- setup.py | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 796bf24..6367854 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -8,15 +8,15 @@ version = "1.1.0" authors = [ { name = "RegexSolver", email = "contact@regexsolver.com" } ] -description = "RegexSolver allows you to manipulate regular expressions as sets, enabling operations such as intersection, union, and subtraction." +description = "RegexSolver is a powerful toolkit for building, combining, and analyzing regular expressions." keywords = [ "Regular Expression", "regex", "regexp", - "set", + "pattern", "intersection", "union", - "subtraction", + "concat", "difference", "equivalence", "subset", diff --git a/setup.py b/setup.py index 10a66e1..9db4e61 100644 --- a/setup.py +++ b/setup.py @@ -3,14 +3,14 @@ setup( name="regexsolver", version="1.1.0", - description="RegexSolver allows you to manipulate regular expressions as sets, enabling operations such as intersection, union, and subtraction.", + description="RegexSolver is a powerful toolkit for building, combining, and analyzing regular expressions.", long_description=open('README.md').read(), long_description_content_type='text/markdown', author="RegexSolver", author_email="contact@regexsolver.com", url="https://github.com/RegexSolver/regexsolver-python", license="MIT", - keywords="regex regexp set intersection union subtraction difference equivalence subset nfa dfa", + keywords="regex regexp pattern intersection union difference concat equivalence subset nfa dfa", packages=find_packages(exclude=["tests", "tests.*"]), install_requires=[ From 5a5ef35322ef930be3138d1cb5c4d57e86cec82a Mon Sep 17 00:00:00 2001 From: Alexandre van Beurden <1949482+alexvbrdn@users.noreply.github.com> Date: Fri, 17 Oct 2025 19:29:44 +0200 Subject: [PATCH 17/20] Add missing repeat --- regexsolver/__init__.py | 291 ++++++++++++---------- tests/assets/response_compute_repeat.json | 4 + tests/term_operation_test.py | 23 +- 3 files changed, 184 insertions(+), 134 deletions(-) create mode 100644 tests/assets/response_compute_repeat.json diff --git a/regexsolver/__init__.py b/regexsolver/__init__.py index 71e95cd..22be8b3 100644 --- a/regexsolver/__init__.py +++ b/regexsolver/__init__.py @@ -173,6 +173,69 @@ def regex(cls, pattern: str) -> 'Term': """ return cls(type=TermType.REGEX, value=pattern) + # Analyze + + def equivalent(self, term: 'Term', execution_timeout=None) -> bool: + """ + Check whether this term is equivalent to another. + + Parameters: + term: The term to compare against. + execution_timeout: Timeout in milliseconds for the server. + + Returns: + True if both terms accept exactly the same language. + """ + request = MultiTermsRequest(terms=[self, term], options=RequestOptions.from_args(execution_timeout=execution_timeout)) + return RegexSolver.get_instance()._analyze_equivalent(request) + + def get_cardinality(self) -> Cardinality: + """ + Get the cardinality of this term. + + Results are cached on the instance to avoid repeated API calls. + + Returns: + A `Cardinality` object describing how many distinct strings + are matched. + """ + + if self._cardinality: + return self._cardinality + elif self._details: + return self._details.cardinality + else: + self._cardinality = RegexSolver.get_instance()._analyze_cardinality(self) + return self._cardinality + + def get_details(self) -> Details: + """ + Analyze this term and return detailed information including cardinality, + length, and whether it is empty or total. + + Results are cached on the instance to avoid repeated API calls. + """ + if self._details: + return self._details + else: + self._details = RegexSolver.get_instance()._analyze_details(self) + return self._details + + def get_dot(self) -> str: + """ + Get the GraphViz DOT representation of this term. + + Results are cached on the instance to avoid repeated API calls. + + Returns: + A DOT language string describing the automaton for this term. + """ + if self._dot: + return self._dot + else: + self._dot = RegexSolver.get_instance()._analyze_dot(self) + return self._dot + def get_fair(self) -> Optional[str]: """ Return the Fast Automaton Internal Representation (FAIR). @@ -180,6 +243,24 @@ def get_fair(self) -> Optional[str]: if self.type == TermType.FAIR: return self.value return None + + def get_length(self) -> Length: + """ + Get the length bounds of this term. + + Results are cached on the instance to avoid repeated API calls. + + Returns: + A `Length` object with the minimum and maximum string length + matched by this term. + """ + if self._length: + return self._length + elif self._length: + return self._details.length + else: + self._length = RegexSolver.get_instance()._analyze_length(self) + return self._length def get_pattern(self) -> Optional[str]: """ @@ -195,64 +276,78 @@ def get_pattern(self) -> Optional[str]: else: self._pattern = RegexSolver.get_instance()._analyze_pattern(self) return self._pattern - - def get_details(self) -> Details: + + def is_empty(self) -> bool: """ - Analyze this term and return detailed information including cardinality, - length, and whether it is empty or total. + Check whether this term matches no string. Results are cached on the instance to avoid repeated API calls. """ - if self._details: - return self._details + if self._empty: + return self._empty + elif self._details: + return self._details.empty else: - self._details = RegexSolver.get_instance()._analyze_details(self) - return self._details - - def generate_strings(self, count: int, execution_timeout=None) -> List[str]: + self._empty = RegexSolver.get_instance()._analyze_empty(self) + return self._empty + + def is_empty_string(self) -> bool: """ - Generate up to `count` example strings that match this term. - - Parameters: - count: Maximum number of unique strings to generate. - execution_timeout: Timeout in milliseconds for the server. + Check whether this term matches only the empty string. - Returns: - A list of strings matched by this term. + Results are cached on the instance to avoid repeated API calls. """ - request = GenerateStringsRequest(term=self, count=count, options=RequestOptions.from_args(execution_timeout=execution_timeout)) - return RegexSolver.get_instance()._generate_strings(request) + if self._empty_string: + return self._empty_string + else: + self._empty_string = RegexSolver.get_instance()._analyze_empty_string(self) + return self._empty_string + + def is_total(self) -> bool: + """ + Check whether this term matches all possible strings. - def intersection(self, *terms: 'Term', response_format=None, execution_timeout=None) -> 'Term': + Results are cached on the instance to avoid repeated API calls. """ - Compute the intersection of this term with one or more other terms. + if self._total: + return self._total + elif self._details: + return self._details.total + else: + self._total = RegexSolver.get_instance()._analyze_total(self) + return self._total + + def subset(self, term: 'Term', execution_timeout=None) -> bool: + """ + Check whether this term is a subset of another. Parameters: - terms: Additional terms to intersect with. - response_format: Output format (`regex`, `fair`, or `any`). + term: The term to compare against. execution_timeout: Timeout in milliseconds for the server. Returns: - A new term representing the intersection. + True if every string matched by this term is also matched by `term`. """ - request = MultiTermsRequest(terms=[self] + list(terms), options=RequestOptions.from_args(response_format=response_format, execution_timeout=execution_timeout)) - return RegexSolver.get_instance()._compute_intersection(request) + request = MultiTermsRequest(terms=[self, term], options=RequestOptions.from_args(execution_timeout=execution_timeout)) + return RegexSolver.get_instance()._analyze_subset(request) - def union(self, *terms: 'Term', response_format=None, execution_timeout=None) -> 'Term': + # Compute + + def concat(self, *terms: 'Term', response_format=None, execution_timeout=None) -> 'Term': """ - Compute the union of this term with one or more other terms. + Concatenate this term with one or more other terms. Parameters: - terms: Terms to combine with this one. + terms: Additional terms to append in sequence. response_format: Output format (`regex`, `fair`, or `any`). execution_timeout: Timeout in milliseconds for the server. Returns: - A new term representing the union. + A new term representing the concatenation. """ request = MultiTermsRequest(terms=[self] + list(terms), options=RequestOptions.from_args(response_format=response_format, execution_timeout=execution_timeout)) - return RegexSolver.get_instance()._compute_union(request) - + return RegexSolver.get_instance()._compute_concat(request) + def difference(self, term: 'Term', response_format=None, execution_timeout=None) -> 'Term': """ Compute the difference between this term and another. @@ -268,141 +363,71 @@ def difference(self, term: 'Term', response_format=None, execution_timeout=None) request = MultiTermsRequest(terms=[self, term], options=RequestOptions.from_args(response_format=response_format, execution_timeout=execution_timeout)) return RegexSolver.get_instance()._compute_difference(request) - def concat(self, *terms: 'Term', response_format=None, execution_timeout=None) -> 'Term': + def intersection(self, *terms: 'Term', response_format=None, execution_timeout=None) -> 'Term': """ - Concatenate this term with one or more other terms. + Compute the intersection of this term with one or more other terms. Parameters: - terms: Additional terms to append in sequence. + terms: Additional terms to intersect with. response_format: Output format (`regex`, `fair`, or `any`). execution_timeout: Timeout in milliseconds for the server. Returns: - A new term representing the concatenation. + A new term representing the intersection. """ request = MultiTermsRequest(terms=[self] + list(terms), options=RequestOptions.from_args(response_format=response_format, execution_timeout=execution_timeout)) - return RegexSolver.get_instance()._compute_concat(request) + return RegexSolver.get_instance()._compute_intersection(request) - def equivalent(self, term: 'Term', execution_timeout=None) -> bool: + def repeat(self, min: int, max: Optional[int], response_format=None, execution_timeout=None) -> 'Term': """ - Check whether this term is equivalent to another. + Computes the repetition of the term between `min` and `max` times; if `max` is `None`, the repetition is unbounded. Parameters: - term: The term to compare against. + min: The lower bound of the repetition. + max: The upper bound of the repetition, if `None` the repetition is unbounded. + response_format: Output format (`regex`, `fair`, or `any`). execution_timeout: Timeout in milliseconds for the server. Returns: - True if both terms accept exactly the same language. + A new term representing the repetition. """ - request = MultiTermsRequest(terms=[self, term], options=RequestOptions.from_args(execution_timeout=execution_timeout)) - return RegexSolver.get_instance()._analyze_equivalent(request) + request = RepeatRequest(term=self, min=min, max=max, options=RequestOptions.from_args(response_format=response_format, execution_timeout=execution_timeout)) + return RegexSolver.get_instance()._compute_repeat(request) - def subset(self, term: 'Term', execution_timeout=None) -> bool: + + def union(self, *terms: 'Term', response_format=None, execution_timeout=None) -> 'Term': """ - Check whether this term is a subset of another. + Compute the union of this term with one or more other terms. Parameters: - term: The term to compare against. + terms: Terms to combine with this one. + response_format: Output format (`regex`, `fair`, or `any`). execution_timeout: Timeout in milliseconds for the server. Returns: - True if every string matched by this term is also matched by `term`. - """ - request = MultiTermsRequest(terms=[self, term], options=RequestOptions.from_args(execution_timeout=execution_timeout)) - return RegexSolver.get_instance()._analyze_subset(request) - - def is_empty(self) -> bool: + A new term representing the union. """ - Check whether this term matches no string. + request = MultiTermsRequest(terms=[self] + list(terms), options=RequestOptions.from_args(response_format=response_format, execution_timeout=execution_timeout)) + return RegexSolver.get_instance()._compute_union(request) - Results are cached on the instance to avoid repeated API calls. - """ - if self._empty: - return self._empty - elif self._details: - return self._details.empty - else: - self._empty = RegexSolver.get_instance()._analyze_empty(self) - return self._empty - - def is_total(self) -> bool: - """ - Check whether this term matches all possible strings. + # Generate - Results are cached on the instance to avoid repeated API calls. - """ - if self._total: - return self._total - elif self._details: - return self._details.total - else: - self._total = RegexSolver.get_instance()._analyze_total(self) - return self._total - - def is_empty_string(self) -> bool: + def generate_strings(self, count: int, execution_timeout=None) -> List[str]: """ - Check whether this term matches only the empty string. + Generate up to `count` example strings that match this term. - Results are cached on the instance to avoid repeated API calls. - """ - if self._empty_string: - return self._empty_string - else: - self._empty_string = RegexSolver.get_instance()._analyze_empty_string(self) - return self._empty_string - - def get_dot(self) -> str: - """ - Get the GraphViz DOT representation of this term. - - Results are cached on the instance to avoid repeated API calls. + Parameters: + count: Maximum number of unique strings to generate. + execution_timeout: Timeout in milliseconds for the server. Returns: - A DOT language string describing the automaton for this term. + A list of strings matched by this term. """ - if self._dot: - return self._dot - else: - self._dot = RegexSolver.get_instance()._analyze_dot(self) - return self._dot + request = GenerateStringsRequest(term=self, count=count, options=RequestOptions.from_args(execution_timeout=execution_timeout)) + return RegexSolver.get_instance()._generate_strings(request) - def get_cardinality(self) -> Cardinality: - """ - Get the cardinality of this term. - - Results are cached on the instance to avoid repeated API calls. - - Returns: - A `Cardinality` object describing how many distinct strings - are matched. - """ - - if self._cardinality: - return self._cardinality - elif self._details: - return self._details.cardinality - else: - self._cardinality = RegexSolver.get_instance()._analyze_cardinality(self) - return self._cardinality + # Other - def get_length(self) -> Length: - """ - Get the length bounds of this term. - - Results are cached on the instance to avoid repeated API calls. - - Returns: - A `Length` object with the minimum and maximum string length - matched by this term. - """ - if self._length: - return self._length - elif self._length: - return self._details.length - else: - self._length = RegexSolver.get_instance()._analyze_length(self) - return self._length - def serialize(self) -> str: """ Return a string representation of this term in the format diff --git a/tests/assets/response_compute_repeat.json b/tests/assets/response_compute_repeat.json new file mode 100644 index 0000000..6043d14 --- /dev/null +++ b/tests/assets/response_compute_repeat.json @@ -0,0 +1,4 @@ +{ + "type": "regex", + "value": "abc{3,5}" +} \ No newline at end of file diff --git a/tests/term_operation_test.py b/tests/term_operation_test.py index d814a9e..930a82a 100644 --- a/tests/term_operation_test.py +++ b/tests/term_operation_test.py @@ -8,7 +8,9 @@ class TermsOperationTest(unittest.TestCase): def setUp(self): RegexSolver.initialize("TOKEN") - + + # Analyze + def test_analyze_cardinality(self): with open('tests/assets/response_analyze_cardinality.json') as response: json_response = json.load(response) @@ -223,6 +225,8 @@ def test_analyze_subset(self): self.assertEqual(True, result) + # Compute + def test_compute_concat(self): with open('tests/assets/response_compute_concat.json') as response: json_response = json.load(response) @@ -271,6 +275,21 @@ def test_compute_intersection(self): result = term1.intersection(term2, term3, response_format=ResponseFormat.REGEX) self.assertEqual("regex=deabc", str(result)) + + def test_compute_repeat(self): + with open('tests/assets/response_compute_repeat.json') as response: + json_response = json.load(response) + with requests_mock.Mocker() as mock: + mock.post( + "https://api.regexsolver.com/api/compute/repeat", + json=json_response, status_code=200 + ) + + term = Term.regex(r"abc") + + result = term.repeat(3, 5, response_format=ResponseFormat.REGEX) + + self.assertEqual("regex=abc{3,5}", str(result)) def test_compute_union(self): with open('tests/assets/response_compute_union.json') as response: @@ -288,6 +307,8 @@ def test_compute_union(self): result = term1.union(term2, term3, response_format=ResponseFormat.REGEX) self.assertEqual("regex=(abc|de|fghi)", str(result)) + + # Generate def test_generate_strings(self): with open('tests/assets/response_generate_strings.json') as response: From 03a5bca8d548fa10a0cd477866a52759d400e785 Mon Sep 17 00:00:00 2001 From: Alexandre van Beurden <1949482+alexvbrdn@users.noreply.github.com> Date: Sun, 19 Oct 2025 13:52:11 +0200 Subject: [PATCH 18/20] Improve testing --- .github/workflows/python.yml | 2 + test-requirements.txt | 3 +- .../assets/response_analyze_cardinality.json | 4 - tests/assets/response_analyze_details.json | 13 - .../response_analyze_details_empty.json | 13 - .../response_analyze_details_infinite.json | 12 - tests/assets/response_analyze_dot.json | 4 - tests/assets/response_analyze_empty.json | 4 - .../assets/response_analyze_empty_string.json | 4 - tests/assets/response_analyze_equivalent.json | 4 - tests/assets/response_analyze_length.json | 5 - .../assets/response_analyze_length_empty.json | 3 - tests/assets/response_analyze_pattern.json | 4 - tests/assets/response_analyze_subset.json | 4 - tests/assets/response_analyze_total.json | 4 - tests/assets/response_compute_concat.json | 4 - tests/assets/response_compute_difference.json | 4 - .../assets/response_compute_intersection.json | 4 - tests/assets/response_compute_repeat.json | 4 - tests/assets/response_compute_union.json | 4 - tests/assets/response_generate_strings.json | 9 - tests/integration_test.py | 200 +++++++++++ tests/term_operation_test.py | 317 +----------------- 23 files changed, 205 insertions(+), 424 deletions(-) delete mode 100644 tests/assets/response_analyze_cardinality.json delete mode 100644 tests/assets/response_analyze_details.json delete mode 100644 tests/assets/response_analyze_details_empty.json delete mode 100644 tests/assets/response_analyze_details_infinite.json delete mode 100644 tests/assets/response_analyze_dot.json delete mode 100644 tests/assets/response_analyze_empty.json delete mode 100644 tests/assets/response_analyze_empty_string.json delete mode 100644 tests/assets/response_analyze_equivalent.json delete mode 100644 tests/assets/response_analyze_length.json delete mode 100644 tests/assets/response_analyze_length_empty.json delete mode 100644 tests/assets/response_analyze_pattern.json delete mode 100644 tests/assets/response_analyze_subset.json delete mode 100644 tests/assets/response_analyze_total.json delete mode 100644 tests/assets/response_compute_concat.json delete mode 100644 tests/assets/response_compute_difference.json delete mode 100644 tests/assets/response_compute_intersection.json delete mode 100644 tests/assets/response_compute_repeat.json delete mode 100644 tests/assets/response_compute_union.json delete mode 100644 tests/assets/response_generate_strings.json create mode 100644 tests/integration_test.py diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml index a9a10d5..4ab8f0b 100644 --- a/.github/workflows/python.yml +++ b/.github/workflows/python.yml @@ -29,4 +29,6 @@ jobs: pip install pytest - name: Run tests + env: + REGEXSOLVER_API_TOKEN: ${{ secrets.REGEXSOLVER_API_TOKEN }} run: pytest diff --git a/test-requirements.txt b/test-requirements.txt index 7a9c72b..606d9d3 100644 --- a/test-requirements.txt +++ b/test-requirements.txt @@ -1 +1,2 @@ -requests_mock>=1.9.0 \ No newline at end of file +requests_mock>=1.9.0 +python-dotenv==1.1.1 \ No newline at end of file diff --git a/tests/assets/response_analyze_cardinality.json b/tests/assets/response_analyze_cardinality.json deleted file mode 100644 index 157edb5..0000000 --- a/tests/assets/response_analyze_cardinality.json +++ /dev/null @@ -1,4 +0,0 @@ -{ - "type": "integer", - "value": 5 -} \ No newline at end of file diff --git a/tests/assets/response_analyze_details.json b/tests/assets/response_analyze_details.json deleted file mode 100644 index 07ce803..0000000 --- a/tests/assets/response_analyze_details.json +++ /dev/null @@ -1,13 +0,0 @@ -{ - "type": "details", - "cardinality": { - "type": "integer", - "value": 2 - }, - "length": [ - 2, - 3 - ], - "empty": false, - "total": false -} \ No newline at end of file diff --git a/tests/assets/response_analyze_details_empty.json b/tests/assets/response_analyze_details_empty.json deleted file mode 100644 index f50bf22..0000000 --- a/tests/assets/response_analyze_details_empty.json +++ /dev/null @@ -1,13 +0,0 @@ -{ - "type": "details", - "cardinality": { - "type": "integer", - "value": 0 - }, - "length": [ - null, - null - ], - "empty": true, - "total": false -} \ No newline at end of file diff --git a/tests/assets/response_analyze_details_infinite.json b/tests/assets/response_analyze_details_infinite.json deleted file mode 100644 index fe08178..0000000 --- a/tests/assets/response_analyze_details_infinite.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "type": "details", - "cardinality": { - "type": "infinite" - }, - "length": [ - 0, - null - ], - "empty": false, - "total": true -} \ No newline at end of file diff --git a/tests/assets/response_analyze_dot.json b/tests/assets/response_analyze_dot.json deleted file mode 100644 index 5b71397..0000000 --- a/tests/assets/response_analyze_dot.json +++ /dev/null @@ -1,4 +0,0 @@ -{ - "type": "string", - "value": "digraph G { ... }" -} \ No newline at end of file diff --git a/tests/assets/response_analyze_empty.json b/tests/assets/response_analyze_empty.json deleted file mode 100644 index 84ed493..0000000 --- a/tests/assets/response_analyze_empty.json +++ /dev/null @@ -1,4 +0,0 @@ -{ - "type": "boolean", - "value": true -} \ No newline at end of file diff --git a/tests/assets/response_analyze_empty_string.json b/tests/assets/response_analyze_empty_string.json deleted file mode 100644 index 84ed493..0000000 --- a/tests/assets/response_analyze_empty_string.json +++ /dev/null @@ -1,4 +0,0 @@ -{ - "type": "boolean", - "value": true -} \ No newline at end of file diff --git a/tests/assets/response_analyze_equivalent.json b/tests/assets/response_analyze_equivalent.json deleted file mode 100644 index 25147f3..0000000 --- a/tests/assets/response_analyze_equivalent.json +++ /dev/null @@ -1,4 +0,0 @@ -{ - "type": "boolean", - "value": false -} \ No newline at end of file diff --git a/tests/assets/response_analyze_length.json b/tests/assets/response_analyze_length.json deleted file mode 100644 index 0109dd3..0000000 --- a/tests/assets/response_analyze_length.json +++ /dev/null @@ -1,5 +0,0 @@ -{ - "type": "length", - "min": 0, - "max": 3 -} \ No newline at end of file diff --git a/tests/assets/response_analyze_length_empty.json b/tests/assets/response_analyze_length_empty.json deleted file mode 100644 index eb3a50f..0000000 --- a/tests/assets/response_analyze_length_empty.json +++ /dev/null @@ -1,3 +0,0 @@ -{ - "type": "length" -} \ No newline at end of file diff --git a/tests/assets/response_analyze_pattern.json b/tests/assets/response_analyze_pattern.json deleted file mode 100644 index 42cb41f..0000000 --- a/tests/assets/response_analyze_pattern.json +++ /dev/null @@ -1,4 +0,0 @@ -{ - "type": "string", - "value": "abc.*" -} \ No newline at end of file diff --git a/tests/assets/response_analyze_subset.json b/tests/assets/response_analyze_subset.json deleted file mode 100644 index 84ed493..0000000 --- a/tests/assets/response_analyze_subset.json +++ /dev/null @@ -1,4 +0,0 @@ -{ - "type": "boolean", - "value": true -} \ No newline at end of file diff --git a/tests/assets/response_analyze_total.json b/tests/assets/response_analyze_total.json deleted file mode 100644 index 25147f3..0000000 --- a/tests/assets/response_analyze_total.json +++ /dev/null @@ -1,4 +0,0 @@ -{ - "type": "boolean", - "value": false -} \ No newline at end of file diff --git a/tests/assets/response_compute_concat.json b/tests/assets/response_compute_concat.json deleted file mode 100644 index c316789..0000000 --- a/tests/assets/response_compute_concat.json +++ /dev/null @@ -1,4 +0,0 @@ -{ - "type": "regex", - "value": "abcde" -} \ No newline at end of file diff --git a/tests/assets/response_compute_difference.json b/tests/assets/response_compute_difference.json deleted file mode 100644 index 478ac72..0000000 --- a/tests/assets/response_compute_difference.json +++ /dev/null @@ -1,4 +0,0 @@ -{ - "type": "regex", - "value": "abc" -} \ No newline at end of file diff --git a/tests/assets/response_compute_intersection.json b/tests/assets/response_compute_intersection.json deleted file mode 100644 index e6b1a7a..0000000 --- a/tests/assets/response_compute_intersection.json +++ /dev/null @@ -1,4 +0,0 @@ -{ - "type": "regex", - "value": "deabc" -} \ No newline at end of file diff --git a/tests/assets/response_compute_repeat.json b/tests/assets/response_compute_repeat.json deleted file mode 100644 index 6043d14..0000000 --- a/tests/assets/response_compute_repeat.json +++ /dev/null @@ -1,4 +0,0 @@ -{ - "type": "regex", - "value": "abc{3,5}" -} \ No newline at end of file diff --git a/tests/assets/response_compute_union.json b/tests/assets/response_compute_union.json deleted file mode 100644 index 27dae5e..0000000 --- a/tests/assets/response_compute_union.json +++ /dev/null @@ -1,4 +0,0 @@ -{ - "type": "regex", - "value": "(abc|de|fghi)" -} \ No newline at end of file diff --git a/tests/assets/response_generate_strings.json b/tests/assets/response_generate_strings.json deleted file mode 100644 index 9ee8883..0000000 --- a/tests/assets/response_generate_strings.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "type": "strings", - "value": [ - "abcde", - "dede", - "deabc", - "abcabc" - ] -} \ No newline at end of file diff --git a/tests/integration_test.py b/tests/integration_test.py new file mode 100644 index 0000000..91013c6 --- /dev/null +++ b/tests/integration_test.py @@ -0,0 +1,200 @@ +import unittest +from dotenv import load_dotenv +from regexsolver import RegexSolver, ResponseFormat, Term + + +class IntegrationTest(unittest.TestCase): + def setUp(self): + load_dotenv() + RegexSolver.initialize() + + # Analyze + + def test_analyze_cardinality(self): + term = Term.regex(r"[0-4]") + cardinality = term.get_cardinality() + + self.assertEqual( + "Integer(5)", + str(cardinality) + ) + + def test_analyze_details(self): + term = Term.regex(r"(abc|de)") + details = term.get_details() + + self.assertEqual( + "Details[cardinality=Integer(2), length=Length[minimum=2, maximum=3], empty=False, total=False]", + str(details) + ) + + def test_analyze_details_infinite(self): + term = Term.regex(r".*") + details = term.get_details() + + self.assertEqual( + "Details[cardinality=Infinite, length=Length[minimum=0, maximum=None], empty=False, total=True]", + str(details) + ) + + def test_analyze_details_empty(self): + term = Term.regex(r"[]") + details = term.get_details() + + self.assertEqual( + "Details[cardinality=Integer(0), length=Length[minimum=None, maximum=None], empty=True, total=False]", + str(details) + ) + + def test_analyze_dot(self): + term = Term.regex(r"(abc|de)") + dot = term.get_dot() + + self.assertTrue(dot.startswith("digraph ")) + + def test_analyze_empty_string(self): + term = Term.regex(r"") + + result = term.is_empty_string() + + self.assertTrue(result) + + def test_analyze_empty(self): + term = Term.regex(r"[]") + + result = term.is_empty() + + self.assertTrue(result) + + def test_analyze_total(self): + term = Term.regex(r".*") + + result = term.is_total() + + self.assertTrue(result) + + def test_analyze_equivalent(self): + term1 = Term.regex(r"(abc|de)") + term2 = Term.fair("sLc#w-!No&(oq@Sf>X).?lI3{uh{80qWEH[#0.pHq@B-9o[LpP-a#fYI+") - - result = term1.equivalent(term2) - - self.assertEqual(False, result) - - def test_analyze_length_empty(self): - with open('tests/assets/response_analyze_length_empty.json') as response: - json_response = json.load(response) - with requests_mock.Mocker() as mock: - mock.post( - "https://api.regexsolver.com/api/analyze/length", - json=json_response, status_code=200 - ) - - term = Term.regex(r"[]") - length = term.get_length() - - self.assertEqual( - "Length[minimum=None, maximum=None]", - str(length) - ) - - def test_analyze_length(self): - with open('tests/assets/response_analyze_length.json') as response: - json_response = json.load(response) - with requests_mock.Mocker() as mock: - mock.post( - "https://api.regexsolver.com/api/analyze/length", - json=json_response, status_code=200 - ) - - term = Term.regex(r"(abc)?") - length = term.get_length() - - self.assertEqual( - "Length[minimum=0, maximum=3]", - str(length) - ) - - def test_analyze_pattern(self): - with open('tests/assets/response_analyze_pattern.json') as response: - json_response = json.load(response) - with requests_mock.Mocker() as mock: - mock.post( - "https://api.regexsolver.com/api/analyze/pattern", - json=json_response, status_code=200 - ) - - term = Term.regex(r"abc.*") - pattern = term.get_pattern() - - self.assertEqual( - "abc.*", - str(pattern) - ) - - def test_analyze_subset(self): - with open('tests/assets/response_analyze_subset.json') as response: - json_response = json.load(response) - with requests_mock.Mocker() as mock: - mock.post( - "https://api.regexsolver.com/api/analyze/subset", - json=json_response, status_code=200 - ) - - term1 = Term.regex(r"de") - term2 = Term.regex(r"(abc|de)") - - result = term1.subset(term2) - - self.assertEqual(True, result) - - # Compute - - def test_compute_concat(self): - with open('tests/assets/response_compute_concat.json') as response: - json_response = json.load(response) - with requests_mock.Mocker() as mock: - mock.post( - "https://api.regexsolver.com/api/compute/concat", - json=json_response, status_code=200 - ) - - term1 = Term.regex(r"abc") - term2 = Term.regex(r"de") - - result = term1.concat(term2, response_format=ResponseFormat.REGEX) - - self.assertEqual("regex=abcde", str(result)) - - def test_compute_difference(self): - with open('tests/assets/response_compute_difference.json') as response: - json_response = json.load(response) - with requests_mock.Mocker() as mock: - mock.post( - "https://api.regexsolver.com/api/compute/difference", - json=json_response, status_code=200 - ) - - term1 = Term.regex(r"(abc|de)") - term2 = Term.regex(r"de") - - result = term1.difference(term2, response_format=ResponseFormat.REGEX) - - self.assertEqual("regex=abc", str(result)) - - def test_compute_intersection(self): - with open('tests/assets/response_compute_intersection.json') as response: - json_response = json.load(response) - with requests_mock.Mocker() as mock: - mock.post( - "https://api.regexsolver.com/api/compute/intersection", - json=json_response, status_code=200 - ) - - term1 = Term.regex(r"(abc|de){2}") - term2 = Term.regex(r"de.*") - term3 = Term.regex(r".*abc") - - result = term1.intersection(term2, term3, response_format=ResponseFormat.REGEX) - - self.assertEqual("regex=deabc", str(result)) - - def test_compute_repeat(self): - with open('tests/assets/response_compute_repeat.json') as response: - json_response = json.load(response) - with requests_mock.Mocker() as mock: - mock.post( - "https://api.regexsolver.com/api/compute/repeat", - json=json_response, status_code=200 - ) - - term = Term.regex(r"abc") - - result = term.repeat(3, 5, response_format=ResponseFormat.REGEX) - - self.assertEqual("regex=abc{3,5}", str(result)) - - def test_compute_union(self): - with open('tests/assets/response_compute_union.json') as response: - json_response = json.load(response) - with requests_mock.Mocker() as mock: - mock.post( - "https://api.regexsolver.com/api/compute/union", - json=json_response, status_code=200 - ) - - term1 = Term.regex(r"abc") - term2 = Term.regex(r"de") - term3 = Term.regex(r"fghi") - - result = term1.union(term2, term3, response_format=ResponseFormat.REGEX) - - self.assertEqual("regex=(abc|de|fghi)", str(result)) - - # Generate - - def test_generate_strings(self): - with open('tests/assets/response_generate_strings.json') as response: - json_response = json.load(response) - with requests_mock.Mocker() as mock: - mock.post( - "https://api.regexsolver.com/api/generate/strings", - json=json_response, status_code=200 - ) - - term = Term.regex(r"(abc|de){2}") - strings = term.generate_strings(10) - - self.assertEqual(4, len(strings)) def test_error_response(self): with open('tests/assets/response_error.json') as response: From a150a829417e35da4492c8ebc21ce784bfb64c30 Mon Sep 17 00:00:00 2001 From: Alexandre van Beurden <1949482+alexvbrdn@users.noreply.github.com> Date: Tue, 21 Oct 2025 16:46:02 +0200 Subject: [PATCH 19/20] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index d7cb56e..d0c4199 100644 --- a/README.md +++ b/README.md @@ -60,7 +60,7 @@ term = Term.regex(r"abcde") result = term.union(Term.regex(r"de"), response_format=ResponseFormat.REGEX) print(result) # regex=(abc)?de -result = term.intersection(Term.regex(r"de.*"), response_format=ResponseFormat.FAIR) +result = term.union(Term.regex(r"de"), response_format=ResponseFormat.FAIR) print(result) # fair=... ``` From 85f3604ef1a9bf612969ce9982ead86a596cd9de Mon Sep 17 00:00:00 2001 From: Alexandre van Beurden <1949482+alexvbrdn@users.noreply.github.com> Date: Sun, 26 Oct 2025 15:16:11 +0100 Subject: [PATCH 20/20] Remove get_details --- README.md | 1 - regexsolver/__init__.py | 172 ++++++++++++++++++++--------------- regexsolver/details.py | 74 --------------- tests/integration_test.py | 27 ------ tests/term_operation_test.py | 2 +- 5 files changed, 101 insertions(+), 175 deletions(-) delete mode 100644 regexsolver/details.py diff --git a/README.md b/README.md index d0c4199..f9ec186 100644 --- a/README.md +++ b/README.md @@ -103,7 +103,6 @@ Timeout is best effort. The exact time is not guaranteed. | -------- | ------- | ------- | | `t.equivalent(term: Term)` | `bool` | `True` if `t` and `term` accept exactly the same language. Supports `execution_timeout`. | | `t.get_cardinality()` | `Cardinality` | Returns the cardinality of the term (i.e., the number of possible matched strings). | -| `t.get_details()` | `Details` | Returns cardinality, length bounds, and if it is empty or total. | | `t.get_dot()` | `str` | Returns a Graphviz DOT representation of the automaton for the term. | | `t.get_fair()` | `str` | Returns the FAIR of the term if defined. | | `t.get_length()` | `Length` | Returns the minimum and maximum length of matched strings. | diff --git a/regexsolver/__init__.py b/regexsolver/__init__.py index 22be8b3..e43f256 100644 --- a/regexsolver/__init__.py +++ b/regexsolver/__init__.py @@ -1,13 +1,92 @@ from enum import Enum -from importlib import metadata +from typing import Any, Optional import os -from regexsolver.details import Details, Cardinality, Length - from typing import List, Optional -from pydantic import BaseModel +from pydantic import BaseModel, model_validator import requests +class Cardinality(BaseModel): + """ + Class that represent the number of possible values. + """ + type: str + value: Optional[int] = None + + def is_infinite(self) -> bool: + """ + True if it has a infinite number of values, False otherwise. + """ + return self.type == 'infinite' + + def __str__(self): + if self.type == 'infinite': + return "Infinite" + elif self.type == 'bigInteger': + return 'BigInteger' + elif self.type == 'integer': + return "Integer({})".format(self.value) + else: + return 'Unknown' + + +class Length(BaseModel): + """ + Contains the minimum and maximum length of possible values. + """ + + minimum: Optional[int] + maximum: Optional[int] + + @model_validator(mode="before") + def from_list(cls, values: Any): + if isinstance(values, dict): + return {'minimum': values.get('min'), 'maximum': values.get('max')} + + if isinstance(values, list): + if len(values) != 2: + raise ValueError("List must contain exactly two elements") + return {'minimum': values[0], 'maximum': values[1]} + + return values + + def __str__(self): + return "Length[minimum={}, maximum={}]".format( + self.minimum, + self.maximum + ) + +class ResponseFormat(str, Enum): + ANY = "any" + REGEX = "regex" + FAIR = "fair" + +class ResponseOptions(BaseModel): + format: Optional[ResponseFormat] = None + + model_config = {"use_enum_values": True} + +class ExecutionOptions(BaseModel): + timeout: Optional[int] = None + +class RequestOptions(BaseModel): + schema_version: int = 1 + response: Optional[ResponseOptions] = None + execution: Optional[ExecutionOptions] = None + + @classmethod + def from_args(cls, response_format: ResponseFormat = None, execution_timeout: int = None) -> "RequestOptions | None": + response = None + if response_format: + response=ResponseOptions(format=response_format) + execution = None + if execution_timeout: + execution=ExecutionOptions(timeout=execution_timeout) + if response or execution: + return cls(response=response, execution=execution) + else: + return None + class ApiError(Exception): """ Exception raised when the API returns an error. @@ -48,7 +127,7 @@ def initialize(cls, api_token: str = None, base_url: str = None): if base_url: instance._base_url = base_url else: - instance._base_url = os.environ.get("REGEXSOLVER_BASE_URL", "https://api.regexsolver.com") + instance._base_url = os.environ.get("REGEXSOLVER_BASE_URL", "https://api.regexsolver.com/v1/") instance._headers['Authorization'] = f'Bearer {instance._api_token}' @@ -76,57 +155,54 @@ def _request(self, endpoint: str, request: BaseModel) -> dict: # Analyze - def _analyze_details(self, term: 'Term') -> Details: - return Details(**self._request('api/analyze/details', term)) - def _analyze_cardinality(self, term: 'Term') -> Cardinality: - return Cardinality(**self._request('api/analyze/cardinality', term)) + return Cardinality(**self._request('analyze/cardinality', term)) def _analyze_length(self, term: 'Term') -> Length: - return Length(**self._request('api/analyze/length', term)) + return Length(**self._request('analyze/length', term)) def _analyze_equivalent(self, request: 'MultiTermsRequest') -> bool: - return self._request('api/analyze/equivalent', request).get('value') + return self._request('analyze/equivalent', request).get('value') def _analyze_subset(self, request: 'MultiTermsRequest') -> bool: - return self._request('api/analyze/subset', request).get('value') + return self._request('analyze/subset', request).get('value') def _analyze_empty(self, term: 'Term') -> bool: - return self._request('api/analyze/empty', term).get('value') + return self._request('analyze/empty', term).get('value') def _analyze_total(self, term: 'Term') -> bool: - return self._request('api/analyze/total', term).get('value') + return self._request('analyze/total', term).get('value') def _analyze_empty_string(self, term: 'Term') -> bool: - return self._request('api/analyze/empty_string', term).get('value') + return self._request('analyze/empty_string', term).get('value') def _analyze_dot(self, term: 'Term') -> str: - return self._request('api/analyze/dot', term).get('value') + return self._request('analyze/dot', term).get('value') def _analyze_pattern(self, term: 'Term') -> str: - return self._request('api/analyze/pattern', term).get('value') + return self._request('analyze/pattern', term).get('value') # Compute def _compute_repeat(self, request: 'RepeatRequest') -> 'Term': - return Term(**self._request('api/compute/repeat', request)) + return Term(**self._request('compute/repeat', request)) def _compute_intersection(self, request: 'MultiTermsRequest') -> 'Term': - return Term(**self._request('api/compute/intersection', request)) + return Term(**self._request('compute/intersection', request)) def _compute_union(self, request: 'MultiTermsRequest') -> 'Term': - return Term(**self._request('api/compute/union', request)) + return Term(**self._request('compute/union', request)) def _compute_difference(self, request: 'MultiTermsRequest') -> 'Term': - return Term(**self._request('api/compute/difference', request)) + return Term(**self._request('compute/difference', request)) def _compute_concat(self, request: 'MultiTermsRequest') -> 'Term': - return Term(**self._request('api/compute/concat', request)) + return Term(**self._request('compute/concat', request)) # Generate def _generate_strings(self, request: 'GenerateStringsRequest') -> List[str]: - return self._request('api/generate/strings', request).get('value') + return self._request('generate/strings', request).get('value') class TermType(str, Enum): @@ -148,7 +224,6 @@ class Term(BaseModel): type: TermType value: str - _details: Optional['Details'] = None _cardinality: Optional[Cardinality] = None _length: Optional[Length] = None _empty: Optional[bool] = None @@ -202,24 +277,10 @@ def get_cardinality(self) -> Cardinality: if self._cardinality: return self._cardinality - elif self._details: - return self._details.cardinality else: self._cardinality = RegexSolver.get_instance()._analyze_cardinality(self) return self._cardinality - def get_details(self) -> Details: - """ - Analyze this term and return detailed information including cardinality, - length, and whether it is empty or total. - - Results are cached on the instance to avoid repeated API calls. - """ - if self._details: - return self._details - else: - self._details = RegexSolver.get_instance()._analyze_details(self) - return self._details def get_dot(self) -> str: """ @@ -285,8 +346,6 @@ def is_empty(self) -> bool: """ if self._empty: return self._empty - elif self._details: - return self._details.empty else: self._empty = RegexSolver.get_instance()._analyze_empty(self) return self._empty @@ -311,8 +370,6 @@ def is_total(self) -> bool: """ if self._total: return self._total - elif self._details: - return self._details.total else: self._total = RegexSolver.get_instance()._analyze_total(self) return self._total @@ -473,37 +530,8 @@ def __eq__(self, other): def __hash__(self): return hash(self.serialize()) -class ResponseFormat(str, Enum): - ANY = "any" - REGEX = "regex" - FAIR = "fair" - -class ResponseOptions(BaseModel): - format: Optional[ResponseFormat] = None - - model_config = {"use_enum_values": True} -class ExecutionOptions(BaseModel): - timeout: Optional[int] = None - -class RequestOptions(BaseModel): - schema_version: int = 1 - response: Optional[ResponseOptions] = None - execution: Optional[ExecutionOptions] = None - - @classmethod - def from_args(cls, response_format: ResponseFormat = None, execution_timeout: int = None) -> "RequestOptions | None": - response = None - if response_format: - response=ResponseOptions(format=response_format) - execution = None - if execution_timeout: - execution=ExecutionOptions(timeout=execution_timeout) - if response or execution: - return cls(response=response, execution=execution) - else: - return None - + class MultiTermsRequest(BaseModel): terms: List[Term] options: Optional[RequestOptions] = None diff --git a/regexsolver/details.py b/regexsolver/details.py deleted file mode 100644 index f7741d0..0000000 --- a/regexsolver/details.py +++ /dev/null @@ -1,74 +0,0 @@ -from typing import Any, Optional - -from pydantic import BaseModel, model_validator - - -class Cardinality(BaseModel): - """ - Class that represent the number of possible values. - """ - type: str - value: Optional[int] = None - - def is_infinite(self) -> bool: - """ - True if it has a infinite number of values, False otherwise. - """ - return self.type == 'infinite' - - def __str__(self): - if self.type == 'infinite': - return "Infinite" - elif self.type == 'bigInteger': - return 'BigInteger' - elif self.type == 'integer': - return "Integer({})".format(self.value) - else: - return 'Unknown' - - -class Length(BaseModel): - """ - Contains the minimum and maximum length of possible values. - """ - - minimum: Optional[int] - maximum: Optional[int] - - @model_validator(mode="before") - def from_list(cls, values: Any): - if isinstance(values, dict): - return {'minimum': values.get('min'), 'maximum': values.get('max')} - - if isinstance(values, list): - if len(values) != 2: - raise ValueError("List must contain exactly two elements") - return {'minimum': values[0], 'maximum': values[1]} - - return values - - def __str__(self): - return "Length[minimum={}, maximum={}]".format( - self.minimum, - self.maximum - ) - - -class Details(BaseModel): - """ - Contains details about the requested Term. - """ - type: str = 'details' - - cardinality: Cardinality - length: Length - empty: bool - total: bool - - def __str__(self): - return "Details[cardinality={}, length={}, empty={}, total={}]".format( - self.cardinality, - self.length, - self.empty, - self.total - ) diff --git a/tests/integration_test.py b/tests/integration_test.py index 91013c6..c601c19 100644 --- a/tests/integration_test.py +++ b/tests/integration_test.py @@ -18,33 +18,6 @@ def test_analyze_cardinality(self): "Integer(5)", str(cardinality) ) - - def test_analyze_details(self): - term = Term.regex(r"(abc|de)") - details = term.get_details() - - self.assertEqual( - "Details[cardinality=Integer(2), length=Length[minimum=2, maximum=3], empty=False, total=False]", - str(details) - ) - - def test_analyze_details_infinite(self): - term = Term.regex(r".*") - details = term.get_details() - - self.assertEqual( - "Details[cardinality=Infinite, length=Length[minimum=0, maximum=None], empty=False, total=True]", - str(details) - ) - - def test_analyze_details_empty(self): - term = Term.regex(r"[]") - details = term.get_details() - - self.assertEqual( - "Details[cardinality=Integer(0), length=Length[minimum=None, maximum=None], empty=True, total=False]", - str(details) - ) def test_analyze_dot(self): term = Term.regex(r"(abc|de)") diff --git a/tests/term_operation_test.py b/tests/term_operation_test.py index b9826bb..bae8bb6 100644 --- a/tests/term_operation_test.py +++ b/tests/term_operation_test.py @@ -14,7 +14,7 @@ def test_error_response(self): json_response = json.load(response) with requests_mock.Mocker() as mock: mock.post( - "https://api.regexsolver.com/api/compute/intersection", + "https://api.regexsolver.com/v1/compute/intersection", json=json_response, status_code=400 )