diff --git a/.github/workflows/python.yml b/.github/workflows/python.yml index a9a10d5..4ab8f0b 100644 --- a/.github/workflows/python.yml +++ b/.github/workflows/python.yml @@ -29,4 +29,6 @@ jobs: pip install pytest - name: Run tests + env: + REGEXSOLVER_API_TOKEN: ${{ secrets.REGEXSOLVER_API_TOKEN }} run: pytest diff --git a/README.md b/README.md index 7335535..f9ec186 100644 --- a/README.md +++ b/README.md @@ -1,202 +1,149 @@ # RegexSolver Python API Client [Homepage](https://regexsolver.com) | [Online Demo](https://regexsolver.com/demo) | [Documentation](https://docs.regexsolver.com) | [Developer Console](https://console.regexsolver.com) -This repository contains the source code of the Python library for [RegexSolver](https://regexsolver.com) API. - -RegexSolver is a powerful regular expression manipulation toolkit, that gives you the power to manipulate regex as if -they were sets. +**RegexSolver** is a powerful toolkit for building, combining, and analyzing regular expressions. It is designed for constraint solvers, test generators, and other systems that need advanced regex operations. ## Installation ```sh pip install --upgrade regexsolver ``` +Requirements: Python >= 3.7 -### Requirements - -- Python >=3.7 +## Quick Start -## Usage - -In order to use the library you need to generate an API Token on our [Developer Console](https://console.regexsolver.com/). +1. Create an API token in the [Developer Console](https://console.regexsolver.com/). +2. Initialize the client and start working with terms: ```python from regexsolver import RegexSolver, Term -RegexSolver.initialize("YOUR TOKEN HERE") +# Set REGEXSOLVER_API_TOKEN in your env and call initialize(), +# or pass the token directly: +RegexSolver.initialize() # or RegexSolver.initialize("YOUR_API_TOKEN") +# Create terms term1 = Term.regex(r"(abc|de|fg){2,}") term2 = Term.regex(r"de.*") term3 = Term.regex(r".*abc") -term4 = Term.regex(r".+(abc|de).+") - -result = term1.intersection(term2, term3)\ - .subtraction(term4) +# Compute intersection and difference +result = term1.intersection(term2, term3).difference( + Term.regex(r".+(abc|de).+") +) -print(result) +print(result.get_pattern()) # de(fg)*abc ``` -## Features +## Key Concepts & Limitations -- [Intersection](#intersection) -- [Union](#union) -- [Subtraction / Difference](#subtraction--difference) -- [Equivalence](#equivalence) -- [Subset](#subset) -- [Details](#details) -- [Generate Strings](#generate-strings) +RegexSolver supports a subset of regular expressions that adhere to the principles of regular languages. Here are the key characteristics and limitations of the regular expressions supported by RegexSolver: +- **Anchored Expressions:** All regular expressions in RegexSolver are anchored. This means that the expressions are treated as if they start and end at the boundaries of the input text. For example, the expression `abc` will match the string "abc" but not "xabc" or "abcx". +- **Lookahead/Lookbehind:** RegexSolver does not support lookahead (`(?=...)`) or lookbehind (`(?<=...)`) assertions. Using them returns an error. +- **Pure Regular Expressions:** RegexSolver focuses on pure regular expressions as defined in regular language theory. This means features that extend beyond regular languages, such as backreferences (`\1`, `\2`, etc.), are not supported. Any use of backreference would return an error. +- **Greedy/Ungreedy Quantifiers:** The concept of ungreedy (`*?`, `+?`, `??`) quantifiers is not supported. All quantifiers are treated as greedy. For example, `a*` or `a*?` will match the longest possible sequence of "a"s. +- **Line Feed and Dot:** RegexSolver handles all characters the same way. The dot `.` matches any Unicode character including line feed (`\n`). +- **Empty Regular Expressions:** The empty language (matches no string) is represented by constructs like `[]` (empty character class). This is distinct from the empty string. -### Intersection +## Response Formats -#### Request +The API can handle terms in two formats: +- `regex`: a regular expression pattern +- `fair`: FAIR (Fast Automaton Internal Representation), a stable, signed format used internally by the engine -Compute the intersection of the provided terms and return the resulting term. - -The maximum number of terms is currently limited to 10. +By default, the engine returns whatever the operation produces, with no extra convertion. Override with `response_format`: ```python -term1 = Term.regex(r"(abc|de){2}") -term2 = Term.regex(r"de.*") -term3 = Term.regex(r".*abc") - -result = term1.intersection(term2, term3) -print(result) -``` - -#### Response - -``` -regex=deabc -``` - -### Union +from regexsolver import RegexSolver, ResponseFormat, Term -Compute the union of the provided terms and return the resulting term. +term = Term.regex(r"abcde") +result = term.union(Term.regex(r"de"), response_format=ResponseFormat.REGEX) +print(result) # regex=(abc)?de -The maximum number of terms is currently limited to 10. - -#### Request - -```python -term1 = Term.regex(r"abc") -term2 = Term.regex(r"de") -term3 = Term.regex(r"fghi") - -result = term1.union(term2, term3) -print(result) +result = term.union(Term.regex(r"de"), response_format=ResponseFormat.FAIR) +print(result) # fair=... ``` -#### Response - -``` -regex=(abc|de|fghi) -``` +If the format does not matter, omit `response_format` or set it to `ResponseFormat.ANY`. -### Subtraction / Difference +Regardless of the format, you can always call `get_pattern()` to obtain the regex pattern of a term. -Compute the first term minus the second and return the resulting term. +## Bounding execution time -#### Request +Set a server-side compute timeout in milliseconds with `execution_timeout`: ```python -term1 = Term.regex(r"(abc|de)") -term2 = Term.regex(r"de") +from regexsolver import ApiError, RegexSolver, Term -result = term1.subtraction(term2) -print(result) +# Limit the server-side compute time to 5 ms +try: + res = Term.regex(r".*ab.*c(de|fg).*dab.*c(de|fg).*ab.*c(de|fg).*dab.*c").difference( + Term.regex(r".*abc.*"), + execution_timeout=5 + ) +except ApiError as error: + print(error) # The API returned the following error: The operation took too much time. ``` -#### Response +Timeout is best effort. The exact time is not guaranteed. -``` -regex=abc -``` +## API Overview -### Equivalence +`Term` exposes the following methods. -Analyze if the two provided terms are equivalent. +### Build +| Method | Return | Description | +| -------- | ------- | ------- | +| `Term.fair(fair: str)` | `Term` | Creates a term from a FAIR. | +| `Term.regex(regex: str)` | `Term` | Creates a term from a regex pattern. | -#### Request +### Analyze -```python -term1 = Term.regex(r"(abc|de)") -term2 = Term.regex(r"(abc|de)*") +| Method | Return | Description | +| -------- | ------- | ------- | +| `t.equivalent(term: Term)` | `bool` | `True` if `t` and `term` accept exactly the same language. Supports `execution_timeout`. | +| `t.get_cardinality()` | `Cardinality` | Returns the cardinality of the term (i.e., the number of possible matched strings). | +| `t.get_dot()` | `str` | Returns a Graphviz DOT representation of the automaton for the term. | +| `t.get_fair()` | `str` | Returns the FAIR of the term if defined. | +| `t.get_length()` | `Length` | Returns the minimum and maximum length of matched strings. | +| `t.get_pattern()` | `str` | Returns a regular expression pattern for the term. | +| `t.is_empty()` | `bool` | `True` if the term matches no string. | +| `t.is_empty_string()` | `bool` | `True` if the term matches only the empty string. | +| `t.is_total()` | `bool` | `True` if the term matches all possible strings. | +| `t.subset(term: Term)` | `bool` | `True` if every string matched by `t` is also matched by `term`. Supports `execution_timeout`. | -result = term1.is_equivalent_to(term2) -print(result) -``` +### Compute -#### Response +| Method | Return | Description | +| -------- | ------- | ------- | +| `t.concat(*terms: Term)` | `Term` | Concatenates `t` with the given terms. Supports `response_format` and `execution_timeout`. | +| `t.difference(term: Term)` | `Term` | Computes the difference `t - term`. Supports `response_format` and `execution_timeout`. | +| `t.intersection(*terms: Term)` | `Term` | Computes the intersection of `t` with the given terms. Supports `response_format` and `execution_timeout`. | +| `t.repeat(min: int, max: Optional[int])` | `Term` | Computes the repetition of the term between `min` and `max` times; if `max` is `None`, the repetition is unbounded. Supports `response_format` and `execution_timeout`. | +| `t.union(*terms: Term)` | `Term` | Computes the union of `t` with the given terms. Supports `response_format` and `execution_timeout`. | -``` -False -``` +### Generate -### Subset +| Method | Return | Description | +| -------- | ------- | ------- | +| `t.generate_strings(count: int)` | `List[str]` | Generates up to `count` unique example strings matched by `t`. Supports `execution_timeout`. | -Analyze if the second term is a subset of the first. +### Other +| Method | Return | Description | +| -------- | ------- | ------- | +| `t.serialize()` | `str` | Returns a serialized form of `t`. | +| `Term.deserialize(string: str)` | `Term` | Returns a deserialized term from the given `string`. | -#### Request +## Cross-Language Support -```java -term1 = Term.regex(r"de") -term2 = Term.regex(r"(abc|de)") +If you want to use this library with other programming languages, we provide: +- [regexsolver-java](https://github.com/RegexSolver/regexsolver-java) +- [regexsolver-js](https://github.com/RegexSolver/regexsolver-js) -result = term1.is_subset_of(term2) -print(result) -``` +For more information about how to use the wrappers, you can refer to our [guide](https://docs.regexsolver.com/getting-started.html). -#### Response +You can also take a look at [regexsolver](https://github.com/RegexSolver/regexsolver) which contains the source code of the engine. -``` -True -``` - -### Details - -Compute the details of the provided term. - -The computed details are: - -- **Cardinality:** the number of possible values. -- **Length:** the minimum and maximum length of possible values. -- **Empty:** true if is an empty set (does not contain any value), false otherwise. -- **Total:** true if is a total set (contains all values), false otherwise. - -#### Request - -```python -term = Term.regex(r"(abc|de)") - -details = term.get_details() -print(details) -``` - -#### Response - -``` -Details[cardinality=Integer(2), length=Length[minimum=2, maximum=3], empty=false, total=false] -``` - -### Generate Strings - -Generate the given number of strings that can be matched by the provided term. - -The maximum number of strings to generate is currently limited to 200. - -#### Request - -```python -term = Term.regex(r"(abc|de){2}") - -strings = term.generate_strings(3) -print(strings) -``` - -#### Response - -``` -['deabc', 'abcde', 'dede'] -``` +## License +This project is licensed under the MIT License. diff --git a/pyproject.toml b/pyproject.toml index bda1679..6367854 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,19 +4,19 @@ build-backend = "setuptools.build_meta" [project] name = "regexsolver" -version = "1.0.3" +version = "1.1.0" authors = [ { name = "RegexSolver", email = "contact@regexsolver.com" } ] -description = "RegexSolver allows you to manipulate regular expressions as sets, enabling operations such as intersection, union, and subtraction." +description = "RegexSolver is a powerful toolkit for building, combining, and analyzing regular expressions." keywords = [ "Regular Expression", "regex", "regexp", - "set", + "pattern", "intersection", "union", - "subtraction", + "concat", "difference", "equivalence", "subset", diff --git a/regexsolver/__init__.py b/regexsolver/__init__.py index ae744d6..e43f256 100644 --- a/regexsolver/__init__.py +++ b/regexsolver/__init__.py @@ -1,12 +1,91 @@ -from regexsolver.details import Details, Cardinality, Length - +from enum import Enum +from typing import Any, Optional +import os from typing import List, Optional -from pydantic import BaseModel +from pydantic import BaseModel, model_validator import requests -from regexsolver.details import Details +class Cardinality(BaseModel): + """ + Class that represent the number of possible values. + """ + type: str + value: Optional[int] = None + + def is_infinite(self) -> bool: + """ + True if it has a infinite number of values, False otherwise. + """ + return self.type == 'infinite' + + def __str__(self): + if self.type == 'infinite': + return "Infinite" + elif self.type == 'bigInteger': + return 'BigInteger' + elif self.type == 'integer': + return "Integer({})".format(self.value) + else: + return 'Unknown' + +class Length(BaseModel): + """ + Contains the minimum and maximum length of possible values. + """ + + minimum: Optional[int] + maximum: Optional[int] + + @model_validator(mode="before") + def from_list(cls, values: Any): + if isinstance(values, dict): + return {'minimum': values.get('min'), 'maximum': values.get('max')} + + if isinstance(values, list): + if len(values) != 2: + raise ValueError("List must contain exactly two elements") + return {'minimum': values[0], 'maximum': values[1]} + + return values + + def __str__(self): + return "Length[minimum={}, maximum={}]".format( + self.minimum, + self.maximum + ) + +class ResponseFormat(str, Enum): + ANY = "any" + REGEX = "regex" + FAIR = "fair" + +class ResponseOptions(BaseModel): + format: Optional[ResponseFormat] = None + + model_config = {"use_enum_values": True} + +class ExecutionOptions(BaseModel): + timeout: Optional[int] = None + +class RequestOptions(BaseModel): + schema_version: int = 1 + response: Optional[ResponseOptions] = None + execution: Optional[ExecutionOptions] = None + + @classmethod + def from_args(cls, response_format: ResponseFormat = None, execution_timeout: int = None) -> "RequestOptions | None": + response = None + if response_format: + response=ResponseOptions(format=response_format) + execution = None + if execution_timeout: + execution=ExecutionOptions(timeout=execution_timeout) + if response or execution: + return cls(response=response, execution=execution) + else: + return None class ApiError(Exception): """ @@ -25,10 +104,9 @@ def __init__(self): raise Exception("This class is a singleton.") else: RegexSolver._instance = self - self.base_url = "https://api.regexsolver.com/" - self.api_token = None - self.headers = { - 'User-Agent': 'RegexSolver Python / 1.0.3', + + self._headers = { + 'User-Agent': 'RegexSolver Python / 1.1.0', 'Content-Type': 'application/json' } @@ -39,177 +117,407 @@ def get_instance(cls): return cls._instance @classmethod - def initialize(cls, api_token: str, base_url: str = None): + def initialize(cls, api_token: str = None, base_url: str = None): instance = cls.get_instance() - instance.api_token = api_token + if api_token: + instance._api_token = api_token + else: + instance._api_token = os.environ.get("REGEXSOLVER_API_TOKEN") or None + if base_url: - instance.base_url = base_url + instance._base_url = base_url + else: + instance._base_url = os.environ.get("REGEXSOLVER_BASE_URL", "https://api.regexsolver.com/v1/") - instance.headers['Authorization'] = f'Bearer {instance.api_token}' + instance._headers['Authorization'] = f'Bearer {instance._api_token}' def _get_request_url(self, endpoint: str) -> str: - if self.base_url.endswith('/'): - return self.base_url + endpoint + if self._base_url.endswith('/'): + return self._base_url + endpoint else: - return self.base_url + '/' + endpoint + return self._base_url + '/' + endpoint def _request(self, endpoint: str, request: BaseModel) -> dict: response = requests.post( self._get_request_url(endpoint), - headers=self.headers, - json=request.model_dump() + headers=self._headers, + json=request.model_dump(exclude_none=True) ) if response.ok: return response.json() - else: - raise ApiError(response.json().get('message')) - - def compute_intersection(self, request: 'MultiTermsRequest') -> 'Term': - return Term(**self._request('api/compute/intersection', request)) - - def compute_union(self, request: 'MultiTermsRequest') -> 'Term': - return Term(**self._request('api/compute/union', request)) - - def compute_subtraction(self, request: 'MultiTermsRequest') -> 'Term': - return Term(**self._request('api/compute/subtraction', request)) - - def get_details(self, term: 'Term') -> Details: - return Details(**self._request('api/analyze/details', term)) - - def equivalence(self, request: 'MultiTermsRequest') -> bool: - return self._request('api/analyze/equivalence', request).get('value') - - def subset(self, request: 'MultiTermsRequest') -> bool: - return self._request('api/analyze/subset', request).get('value') - - def generate_strings(self, request: 'GenerateStringsRequest') -> List[str]: - return self._request('api/generate/strings', request).get('value') - - -_REGEX_PREFIX = "regex" -_FAIR_PREFIX = "fair" -_UNKNOWN_PREFIX = "unknown" + try: + data = response.json() + msg = data.get("message", response.text) + except Exception: + msg = response.text + raise ApiError(msg) + + # Analyze + + def _analyze_cardinality(self, term: 'Term') -> Cardinality: + return Cardinality(**self._request('analyze/cardinality', term)) + + def _analyze_length(self, term: 'Term') -> Length: + return Length(**self._request('analyze/length', term)) + + def _analyze_equivalent(self, request: 'MultiTermsRequest') -> bool: + return self._request('analyze/equivalent', request).get('value') + + def _analyze_subset(self, request: 'MultiTermsRequest') -> bool: + return self._request('analyze/subset', request).get('value') + + def _analyze_empty(self, term: 'Term') -> bool: + return self._request('analyze/empty', term).get('value') + + def _analyze_total(self, term: 'Term') -> bool: + return self._request('analyze/total', term).get('value') + + def _analyze_empty_string(self, term: 'Term') -> bool: + return self._request('analyze/empty_string', term).get('value') + + def _analyze_dot(self, term: 'Term') -> str: + return self._request('analyze/dot', term).get('value') + + def _analyze_pattern(self, term: 'Term') -> str: + return self._request('analyze/pattern', term).get('value') + + # Compute + + def _compute_repeat(self, request: 'RepeatRequest') -> 'Term': + return Term(**self._request('compute/repeat', request)) + + def _compute_intersection(self, request: 'MultiTermsRequest') -> 'Term': + return Term(**self._request('compute/intersection', request)) + + def _compute_union(self, request: 'MultiTermsRequest') -> 'Term': + return Term(**self._request('compute/union', request)) + + def _compute_difference(self, request: 'MultiTermsRequest') -> 'Term': + return Term(**self._request('compute/difference', request)) + + def _compute_concat(self, request: 'MultiTermsRequest') -> 'Term': + return Term(**self._request('compute/concat', request)) + + # Generate + + def _generate_strings(self, request: 'GenerateStringsRequest') -> List[str]: + return self._request('generate/strings', request).get('value') + + +class TermType(str, Enum): + FAIR = "fair" + REGEX = "regex" class Term(BaseModel): """ - This class represents a term on which it is possible to perform operations. - It can either be a regular expression (regex) or a FAIR (Fast Automaton Internal Representation). + Represents a term on which operations can be performed. + A term can be either: + - A regular expression (`regex`) + - A FAIR (Fast Automaton Internal Representation, `fair`) + + Convenience constructors: + - `Term.regex(pattern: str)` + - `Term.fair(fair: str)` """ - type: str + type: TermType value: str - _details: Optional['Details'] = None + _cardinality: Optional[Cardinality] = None + _length: Optional[Length] = None + _empty: Optional[bool] = None + _total: Optional[bool] = None + _empty_string: Optional[bool] = None + _dot: Optional[str] = None + _pattern: Optional[str] = None + + model_config = {"use_enum_values": True} @classmethod def fair(cls, fair: str) -> 'Term': """ Initialize a Fast Automaton Internal Representation (FAIR). """ - return cls(type=_FAIR_PREFIX, value=fair) + return cls(type=TermType.FAIR, value=fair) @classmethod def regex(cls, pattern: str) -> 'Term': """ Initialize a regex. """ - return cls(type=_REGEX_PREFIX, value=pattern) + return cls(type=TermType.REGEX, value=pattern) + + # Analyze + + def equivalent(self, term: 'Term', execution_timeout=None) -> bool: + """ + Check whether this term is equivalent to another. + + Parameters: + term: The term to compare against. + execution_timeout: Timeout in milliseconds for the server. + + Returns: + True if both terms accept exactly the same language. + """ + request = MultiTermsRequest(terms=[self, term], options=RequestOptions.from_args(execution_timeout=execution_timeout)) + return RegexSolver.get_instance()._analyze_equivalent(request) + + def get_cardinality(self) -> Cardinality: + """ + Get the cardinality of this term. + + Results are cached on the instance to avoid repeated API calls. + + Returns: + A `Cardinality` object describing how many distinct strings + are matched. + """ + + if self._cardinality: + return self._cardinality + else: + self._cardinality = RegexSolver.get_instance()._analyze_cardinality(self) + return self._cardinality + + + def get_dot(self) -> str: + """ + Get the GraphViz DOT representation of this term. + + Results are cached on the instance to avoid repeated API calls. + + Returns: + A DOT language string describing the automaton for this term. + """ + if self._dot: + return self._dot + else: + self._dot = RegexSolver.get_instance()._analyze_dot(self) + return self._dot def get_fair(self) -> Optional[str]: """ Return the Fast Automaton Internal Representation (FAIR). """ - if type == _FAIR_PREFIX: + if self.type == TermType.FAIR: return self.value return None + + def get_length(self) -> Length: + """ + Get the length bounds of this term. + + Results are cached on the instance to avoid repeated API calls. + + Returns: + A `Length` object with the minimum and maximum string length + matched by this term. + """ + if self._length: + return self._length + elif self._length: + return self._details.length + else: + self._length = RegexSolver.get_instance()._analyze_length(self) + return self._length def get_pattern(self) -> Optional[str]: """ Return the regular expression pattern. + + If the term is not a regex the pattern will be resolved. + Results are cached on the instance to avoid repeated API calls. """ - if type == _REGEX_PREFIX: + if self.type == TermType.REGEX: return self.value - return None + elif self._pattern: + return self._pattern + else: + self._pattern = RegexSolver.get_instance()._analyze_pattern(self) + return self._pattern + + def is_empty(self) -> bool: + """ + Check whether this term matches no string. + + Results are cached on the instance to avoid repeated API calls. + """ + if self._empty: + return self._empty + else: + self._empty = RegexSolver.get_instance()._analyze_empty(self) + return self._empty + + def is_empty_string(self) -> bool: + """ + Check whether this term matches only the empty string. - def get_details(self) -> Details: + Results are cached on the instance to avoid repeated API calls. """ - Get the details of this term. - Cache the result to avoid calling the API again if this method is called multiple times. + if self._empty_string: + return self._empty_string + else: + self._empty_string = RegexSolver.get_instance()._analyze_empty_string(self) + return self._empty_string + + def is_total(self) -> bool: """ - if self._details: - return self._details + Check whether this term matches all possible strings. + + Results are cached on the instance to avoid repeated API calls. + """ + if self._total: + return self._total else: - self._details = RegexSolver.get_instance().get_details(self) - return self._details + self._total = RegexSolver.get_instance()._analyze_total(self) + return self._total + + def subset(self, term: 'Term', execution_timeout=None) -> bool: + """ + Check whether this term is a subset of another. - def generate_strings(self, count: int) -> List[str]: + Parameters: + term: The term to compare against. + execution_timeout: Timeout in milliseconds for the server. + + Returns: + True if every string matched by this term is also matched by `term`. """ - Generate the given number of unique strings matched by this term. + request = MultiTermsRequest(terms=[self, term], options=RequestOptions.from_args(execution_timeout=execution_timeout)) + return RegexSolver.get_instance()._analyze_subset(request) + + # Compute + + def concat(self, *terms: 'Term', response_format=None, execution_timeout=None) -> 'Term': """ - request = GenerateStringsRequest(term=self, count=count) - return RegexSolver.get_instance().generate_strings(request) + Concatenate this term with one or more other terms. + + Parameters: + terms: Additional terms to append in sequence. + response_format: Output format (`regex`, `fair`, or `any`). + execution_timeout: Timeout in milliseconds for the server. - def intersection(self, *terms: 'Term') -> 'Term': + Returns: + A new term representing the concatenation. """ - Compute the intersection with the given terms and return the resulting term. + request = MultiTermsRequest(terms=[self] + list(terms), options=RequestOptions.from_args(response_format=response_format, execution_timeout=execution_timeout)) + return RegexSolver.get_instance()._compute_concat(request) + + def difference(self, term: 'Term', response_format=None, execution_timeout=None) -> 'Term': """ - request = MultiTermsRequest(terms=[self] + list(terms)) - return RegexSolver.get_instance().compute_intersection(request) + Compute the difference between this term and another. - def union(self, *terms: 'Term') -> 'Term': + Parameters: + term: The term to subtract from this one. + response_format: Output format (`regex`, `fair`, or `any`). + execution_timeout: Timeout in milliseconds for the server. + + Returns: + A new term representing the set difference (this - term). """ - Compute the union with the given terms and return the resulting term. + request = MultiTermsRequest(terms=[self, term], options=RequestOptions.from_args(response_format=response_format, execution_timeout=execution_timeout)) + return RegexSolver.get_instance()._compute_difference(request) + + def intersection(self, *terms: 'Term', response_format=None, execution_timeout=None) -> 'Term': """ - request = MultiTermsRequest(terms=[self] + list(terms)) - return RegexSolver.get_instance().compute_union(request) + Compute the intersection of this term with one or more other terms. + + Parameters: + terms: Additional terms to intersect with. + response_format: Output format (`regex`, `fair`, or `any`). + execution_timeout: Timeout in milliseconds for the server. - def subtraction(self, term: 'Term') -> 'Term': + Returns: + A new term representing the intersection. """ - Compute the subtraction with the given term and return the resulting term. + request = MultiTermsRequest(terms=[self] + list(terms), options=RequestOptions.from_args(response_format=response_format, execution_timeout=execution_timeout)) + return RegexSolver.get_instance()._compute_intersection(request) + + def repeat(self, min: int, max: Optional[int], response_format=None, execution_timeout=None) -> 'Term': """ - request = MultiTermsRequest(terms=[self, term]) - return RegexSolver.get_instance().compute_subtraction(request) + Computes the repetition of the term between `min` and `max` times; if `max` is `None`, the repetition is unbounded. + + Parameters: + min: The lower bound of the repetition. + max: The upper bound of the repetition, if `None` the repetition is unbounded. + response_format: Output format (`regex`, `fair`, or `any`). + execution_timeout: Timeout in milliseconds for the server. - def is_equivalent_to(self, term: 'Term') -> bool: + Returns: + A new term representing the repetition. """ - Check equivalence with the given term. + request = RepeatRequest(term=self, min=min, max=max, options=RequestOptions.from_args(response_format=response_format, execution_timeout=execution_timeout)) + return RegexSolver.get_instance()._compute_repeat(request) + + + def union(self, *terms: 'Term', response_format=None, execution_timeout=None) -> 'Term': """ - request = MultiTermsRequest(terms=[self, term]) - return RegexSolver.get_instance().equivalence(request) + Compute the union of this term with one or more other terms. - def is_subset_of(self, term: 'Term') -> bool: + Parameters: + terms: Terms to combine with this one. + response_format: Output format (`regex`, `fair`, or `any`). + execution_timeout: Timeout in milliseconds for the server. + + Returns: + A new term representing the union. """ - Check if is a subset of the given term. + request = MultiTermsRequest(terms=[self] + list(terms), options=RequestOptions.from_args(response_format=response_format, execution_timeout=execution_timeout)) + return RegexSolver.get_instance()._compute_union(request) + + # Generate + + def generate_strings(self, count: int, execution_timeout=None) -> List[str]: """ - request = MultiTermsRequest(terms=[self, term]) - return RegexSolver.get_instance().subset(request) + Generate up to `count` example strings that match this term. + Parameters: + count: Maximum number of unique strings to generate. + execution_timeout: Timeout in milliseconds for the server. + + Returns: + A list of strings matched by this term. + """ + request = GenerateStringsRequest(term=self, count=count, options=RequestOptions.from_args(execution_timeout=execution_timeout)) + return RegexSolver.get_instance()._generate_strings(request) + + # Other + def serialize(self) -> str: """ - Generate a string representation that can be parsed by deserialize(). + Return a string representation of this term in the format + `=`, which can later be parsed by `deserialize()`. """ - prefix = _UNKNOWN_PREFIX - if self.type == _FAIR_PREFIX: - prefix = _FAIR_PREFIX - elif self.type == _REGEX_PREFIX: - prefix = _REGEX_PREFIX - + if self.type == TermType.FAIR: + prefix = TermType.FAIR + elif self.type == TermType.REGEX: + prefix = TermType.REGEX + else: + raise ValueError(f"Unknown type: {self.type}") + return prefix + "=" + self.value + @staticmethod def deserialize(string: str) -> Optional['Term']: """ - Parse a string representation of a Term produced by serialize(). - """ - if not string: - return None + Parse a string representation produced by `serialize()`. - if string.startswith(_REGEX_PREFIX): - return Term.regex(string[len(_REGEX_PREFIX)+1:]) - elif string.startswith(_FAIR_PREFIX): - return Term.fair(string[len(_FAIR_PREFIX)+1:]) - else: + Parameters: + string: The serialized term, e.g. `"regex=abc"`. + + Returns: + A Term instance, or None if the input is empty or invalid. + """ + if not string or "=" not in string: return None + prefix, value = string.split("=", 1) + if prefix == TermType.REGEX: + return Term.regex(value) + elif prefix == TermType.FAIR: + return Term.fair(value) + return None def __str__(self): return self.serialize() @@ -223,10 +531,18 @@ def __hash__(self): return hash(self.serialize()) + class MultiTermsRequest(BaseModel): terms: List[Term] + options: Optional[RequestOptions] = None +class RepeatRequest(BaseModel): + term: Term + min: int + max: Optional[int] + options: Optional[RequestOptions] = None class GenerateStringsRequest(BaseModel): term: Term count: int + options: Optional[RequestOptions] = None \ No newline at end of file diff --git a/regexsolver/details.py b/regexsolver/details.py deleted file mode 100644 index ec799c1..0000000 --- a/regexsolver/details.py +++ /dev/null @@ -1,71 +0,0 @@ -from typing import Optional - -from pydantic import BaseModel, model_validator - - -class Cardinality(BaseModel): - """ - Class that represent the number of possible values. - """ - type: str - value: Optional[int] = None - - def is_infinite(self) -> bool: - """ - True if it has a finite number of values, False otherwise. - """ - if self.type == 'Infinite': - return True - else: - return False - - def __str__(self): - if self.type == 'Infinite': - return "Infinite" - elif self.type == 'BigInteger': - return 'BigInteger' - elif self.type == 'Integer': - return "Integer({})".format(self.value) - else: - return 'Unknown' - - -class Length(BaseModel): - """ - Contains the minimum and maximum length of possible values. - """ - - minimum: Optional[int] - maximum: Optional[int] - - @model_validator(mode="before") - def from_list(cls, values: list): - if len(values) != 2: - raise ValueError("List must contain exactly two elements") - return {'minimum': values[0], 'maximum': values[1]} - - def __str__(self): - return "Length[minimum={}, maximum={}]".format( - self.minimum, - self.maximum - ) - - -class Details(BaseModel): - """ - Contains details about the requested Term. - """ - type: str = 'details' - - cardinality: Cardinality - length: Length - empty: bool - total: bool - - def __str__(self): - return "Details[cardinality={}, length={}, empty={}, total={}]".format( - self.cardinality, - self.length, - self.empty, - self.total - ) diff --git a/setup.py b/setup.py index d6b4483..9db4e61 100644 --- a/setup.py +++ b/setup.py @@ -2,15 +2,15 @@ setup( name="regexsolver", - version="1.0.3", - description="RegexSolver allows you to manipulate regular expressions as sets, enabling operations such as intersection, union, and subtraction.", + version="1.1.0", + description="RegexSolver is a powerful toolkit for building, combining, and analyzing regular expressions.", long_description=open('README.md').read(), long_description_content_type='text/markdown', author="RegexSolver", author_email="contact@regexsolver.com", url="https://github.com/RegexSolver/regexsolver-python", license="MIT", - keywords="regex regexp set intersection union subtraction difference equivalence subset nfa dfa", + keywords="regex regexp pattern intersection union difference concat equivalence subset nfa dfa", packages=find_packages(exclude=["tests", "tests.*"]), install_requires=[ diff --git a/test-requirements.txt b/test-requirements.txt index 7a9c72b..606d9d3 100644 --- a/test-requirements.txt +++ b/test-requirements.txt @@ -1 +1,2 @@ -requests_mock>=1.9.0 \ No newline at end of file +requests_mock>=1.9.0 +python-dotenv==1.1.1 \ No newline at end of file diff --git a/tests/assets/response_generateStrings.json b/tests/assets/response_generateStrings.json deleted file mode 100644 index 9ee8883..0000000 --- a/tests/assets/response_generateStrings.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "type": "strings", - "value": [ - "abcde", - "dede", - "deabc", - "abcabc" - ] -} \ No newline at end of file diff --git a/tests/assets/response_getDetails.json b/tests/assets/response_getDetails.json deleted file mode 100644 index 65e0539..0000000 --- a/tests/assets/response_getDetails.json +++ /dev/null @@ -1,13 +0,0 @@ -{ - "type": "details", - "cardinality": { - "type": "Integer", - "value": 2 - }, - "length": [ - 2, - 3 - ], - "empty": false, - "total": false -} \ No newline at end of file diff --git a/tests/assets/response_getDetails_empty.json b/tests/assets/response_getDetails_empty.json deleted file mode 100644 index d33b6f3..0000000 --- a/tests/assets/response_getDetails_empty.json +++ /dev/null @@ -1,13 +0,0 @@ -{ - "type": "details", - "cardinality": { - "type": "Integer", - "value": 0 - }, - "length": [ - null, - null - ], - "empty": true, - "total": false -} \ No newline at end of file diff --git a/tests/assets/response_getDetails_infinite.json b/tests/assets/response_getDetails_infinite.json deleted file mode 100644 index ae72fc8..0000000 --- a/tests/assets/response_getDetails_infinite.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "type": "details", - "cardinality": { - "type": "Infinite" - }, - "length": [ - 0, - null - ], - "empty": false, - "total": true -} \ No newline at end of file diff --git a/tests/assets/response_intersection.json b/tests/assets/response_intersection.json deleted file mode 100644 index e6b1a7a..0000000 --- a/tests/assets/response_intersection.json +++ /dev/null @@ -1,4 +0,0 @@ -{ - "type": "regex", - "value": "deabc" -} \ No newline at end of file diff --git a/tests/assets/response_isEquivalentTo.json b/tests/assets/response_isEquivalentTo.json deleted file mode 100644 index 25147f3..0000000 --- a/tests/assets/response_isEquivalentTo.json +++ /dev/null @@ -1,4 +0,0 @@ -{ - "type": "boolean", - "value": false -} \ No newline at end of file diff --git a/tests/assets/response_isSubsetOf.json b/tests/assets/response_isSubsetOf.json deleted file mode 100644 index 84ed493..0000000 --- a/tests/assets/response_isSubsetOf.json +++ /dev/null @@ -1,4 +0,0 @@ -{ - "type": "boolean", - "value": true -} \ No newline at end of file diff --git a/tests/assets/response_subtraction.json b/tests/assets/response_subtraction.json deleted file mode 100644 index 478ac72..0000000 --- a/tests/assets/response_subtraction.json +++ /dev/null @@ -1,4 +0,0 @@ -{ - "type": "regex", - "value": "abc" -} \ No newline at end of file diff --git a/tests/assets/response_union.json b/tests/assets/response_union.json deleted file mode 100644 index 27dae5e..0000000 --- a/tests/assets/response_union.json +++ /dev/null @@ -1,4 +0,0 @@ -{ - "type": "regex", - "value": "(abc|de|fghi)" -} \ No newline at end of file diff --git a/tests/integration_test.py b/tests/integration_test.py new file mode 100644 index 0000000..c601c19 --- /dev/null +++ b/tests/integration_test.py @@ -0,0 +1,173 @@ +import unittest +from dotenv import load_dotenv +from regexsolver import RegexSolver, ResponseFormat, Term + + +class IntegrationTest(unittest.TestCase): + def setUp(self): + load_dotenv() + RegexSolver.initialize() + + # Analyze + + def test_analyze_cardinality(self): + term = Term.regex(r"[0-4]") + cardinality = term.get_cardinality() + + self.assertEqual( + "Integer(5)", + str(cardinality) + ) + + def test_analyze_dot(self): + term = Term.regex(r"(abc|de)") + dot = term.get_dot() + + self.assertTrue(dot.startswith("digraph ")) + + def test_analyze_empty_string(self): + term = Term.regex(r"") + + result = term.is_empty_string() + + self.assertTrue(result) + + def test_analyze_empty(self): + term = Term.regex(r"[]") + + result = term.is_empty() + + self.assertTrue(result) + + def test_analyze_total(self): + term = Term.regex(r".*") + + result = term.is_total() + + self.assertTrue(result) + + def test_analyze_equivalent(self): + term1 = Term.regex(r"(abc|de)") + term2 = Term.fair("sLc#w-!No&(oq@Sf>X).?lI3{uh{80qWEH[#0.pHq@B-9o[LpP-a#fYI+") - - result = term1.is_equivalent_to(term2) - - self.assertEqual(False, result) - - def test_is_subset_of(self): - with open('tests/assets/response_isSubsetOf.json') as response: - json_response = json.load(response) - with requests_mock.Mocker() as mock: - mock.post( - "https://api.regexsolver.com/api/analyze/subset", - json=json_response, status_code=200 - ) - - term1 = Term.regex(r"de") - term2 = Term.regex(r"(abc|de)") - - result = term1.is_subset_of(term2) - - self.assertEqual(True, result) + RegexSolver.initialize("TOKEN") def test_error_response(self): with open('tests/assets/response_error.json') as response: json_response = json.load(response) with requests_mock.Mocker() as mock: mock.post( - "https://api.regexsolver.com/api/compute/intersection", + "https://api.regexsolver.com/v1/compute/intersection", json=json_response, status_code=400 )