From 0e28f5ffe983b09eaa9dc721038a483ba77b4f33 Mon Sep 17 00:00:00 2001 From: Sai Medhini Reddy Maryada <117196660+saimedhi@users.noreply.github.com> Date: Wed, 17 Apr 2024 14:22:10 -0700 Subject: [PATCH] Updated code generator to use new version of OpenAPI specification (#721) * Updated code generator to use new version of OpenAPI specification Signed-off-by: saimedhi * Updated code generator to use native OpenAPI specification Signed-off-by: saimedhi --------- Signed-off-by: saimedhi --- .github/workflows/update_api.yml | 2 +- CHANGELOG.md | 9 +- DEVELOPER_GUIDE.md | 2 +- test_opensearchpy/test_server/__init__.py | 14 +- utils/changelog_updater.py | 117 ++++------- utils/generate_api.py | 232 +++++++++++----------- utils/templates/base | 2 +- 7 files changed, 174 insertions(+), 204 deletions(-) diff --git a/.github/workflows/update_api.yml b/.github/workflows/update_api.yml index 7c130d0b..2cc4f298 100644 --- a/.github/workflows/update_api.yml +++ b/.github/workflows/update_api.yml @@ -44,7 +44,7 @@ jobs: commit-message: Updated opensearch-py to reflect the latest OpenSearch API spec (${{ steps.date.outputs.date }}) title: Updated opensearch-py to reflect the latest OpenSearch API spec body: | - Updated [opensearch-py](https://github.com/opensearch-project/opensearch-py) to reflect the latest [OpenSearch API spec](https://github.com/opensearch-project/opensearch-api-specification/blob/main/OpenSearch.openapi.json) + Updated [opensearch-py](https://github.com/opensearch-project/opensearch-py) to reflect the latest [OpenSearch API spec](https://github.com/opensearch-project/opensearch-api-specification/releases/download/main/opensearch-openapi.yaml) Date: ${{ steps.date.outputs.date }} branch: automated-api-update base: main diff --git a/CHANGELOG.md b/CHANGELOG.md index 06fd1386..342ba1c8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ Inspired from [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) ### Removed - Removed support for Python 3.6, 3.7 ([#717](https://github.com/opensearch-project/opensearch-py/pull/717)) ### Fixed +- Updated code generator to use native OpenAPI specification ([#721](https://github.com/opensearch-project/opensearch-py/pull/721)) ### Updated APIs ### Security ### Dependencies @@ -18,10 +19,10 @@ Inspired from [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) ## [2.5.0] ### Added -- Added pylint `assignment-from-no-return` and `unused-variable` (([#658](https://github.com/opensearch-project/opensearch-py/pull/658)) -- Added pylint `unnecessary-dunder-calls` (([#655](https://github.com/opensearch-project/opensearch-py/pull/655)) -- Changed to use .pylintrc files in root and any directory with override requirements (([#654](https://github.com/opensearch-project/opensearch-py/pull/654)) -- Added pylint `unspecified-encoding` and `missing-function-docstring` and ignored opensearchpy for lints (([#643](https://github.com/opensearch-project/opensearch-py/pull/643))) +- Added pylint `assignment-from-no-return` and `unused-variable` ([#658](https://github.com/opensearch-project/opensearch-py/pull/658)) +- Added pylint `unnecessary-dunder-calls` ([#655](https://github.com/opensearch-project/opensearch-py/pull/655)) +- Changed to use .pylintrc files in root and any directory with override requirements ([#654](https://github.com/opensearch-project/opensearch-py/pull/654)) +- Added pylint `unspecified-encoding` and `missing-function-docstring` and ignored opensearchpy for lints ([#643](https://github.com/opensearch-project/opensearch-py/pull/643)) - Added pylint `line-too-long` and `invalid-name` ([#590](https://github.com/opensearch-project/opensearch-py/pull/590)) - Added pylint `pointless-statement` ([#611](https://github.com/opensearch-project/opensearch-py/pull/611)) - Added a log collection guide ([#579](https://github.com/opensearch-project/opensearch-py/pull/579)) diff --git a/DEVELOPER_GUIDE.md b/DEVELOPER_GUIDE.md index 7a51a6cf..cf566e20 100644 --- a/DEVELOPER_GUIDE.md +++ b/DEVELOPER_GUIDE.md @@ -125,7 +125,7 @@ Open `docs/build/html/index.html` to see results. ## Client Code Generator -OpenSearch publishes an [OpenAPI specification](https://github.com/opensearch-project/opensearch-api-specification/blob/main/OpenSearch.openapi.json) in the [opensearch-api-specification](https://github.com/opensearch-project/opensearch-api-specification) repository, which is used to auto-generate the less interesting parts of the client. +OpenSearch publishes an [OpenAPI specification](https://github.com/opensearch-project/opensearch-api-specification/releases/download/main/opensearch-openapi.yaml) in the [opensearch-api-specification](https://github.com/opensearch-project/opensearch-api-specification) repository, which is used to auto-generate the less interesting parts of the client. ``` nox -rs generate diff --git a/test_opensearchpy/test_server/__init__.py b/test_opensearchpy/test_server/__init__.py index 36b548b5..0e4a974b 100644 --- a/test_opensearchpy/test_server/__init__.py +++ b/test_opensearchpy/test_server/__init__.py @@ -31,24 +31,24 @@ from opensearchpy.helpers import test from opensearchpy.helpers.test import OpenSearchTestCase as BaseTestCase -client: Any = None +CLIENT: Any = None def get_client(**kwargs: Any) -> Any: - global client - if client is False: + global CLIENT + if CLIENT is False: raise SkipTest("No client is available") - if client is not None and not kwargs: - return client + if CLIENT is not None and not kwargs: + return CLIENT try: new_client = test.get_test_client(**kwargs) except SkipTest: - client = False + CLIENT = False raise if not kwargs: - client = new_client + CLIENT = new_client return new_client diff --git a/utils/changelog_updater.py b/utils/changelog_updater.py index fde4947f..7021c10c 100644 --- a/utils/changelog_updater.py +++ b/utils/changelog_updater.py @@ -7,9 +7,7 @@ # Modifications Copyright OpenSearch Contributors. See # GitHub history for details. -import filecmp -import os -import shutil +import subprocess import requests @@ -18,82 +16,51 @@ def main() -> None: """ Update CHANGELOG.md when API generator produces new code differing from existing. """ - root_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) - - after_paths = [ - os.path.join(root_dir, f"opensearchpy/{folder}") - for folder in ["client", "_async/client"] - ] - - before_paths = [ - os.path.join(root_dir, f"before_generate/{folder}") - for folder in ["client", "async_client"] - ] - - # Compare only .py files and take their union for client and async_client directories - before_files_client = set( - file for file in os.listdir(before_paths[0]) if file.endswith(".py") - ) - after_files_client = set( - file for file in os.listdir(after_paths[0]) if file.endswith(".py") - ) - - before_files_async_client = set( - file for file in os.listdir(before_paths[1]) if file.endswith(".py") - ) - after_files_async_client = set( - file for file in os.listdir(after_paths[1]) if file.endswith(".py") - ) - - all_files_union_client = before_files_client.union(after_files_client) - all_files_union_async_client = before_files_async_client.union( - after_files_async_client - ) + git_command = "git status" + try: + git_status = subprocess.check_output( + git_command, shell=True, stderr=subprocess.STDOUT + ) + if ( + "Changes to be committed:" in git_status.decode() + or "Changes not staged for commit:" in git_status.decode() + or "Untracked files:" in git_status.decode() + ): + print("Changes detected; updating changelog.") - # Compare files and check for mismatches or errors for client and async_client directories - mismatch_client, errors_client = filecmp.cmpfiles( - before_paths[0], after_paths[0], all_files_union_client, shallow=True - )[1:] - mismatch_async_client, errors_async_client = filecmp.cmpfiles( - before_paths[1], after_paths[1], all_files_union_async_client, shallow=True - )[1:] + base_url = "https://api.github.com/repos/opensearch-project/opensearch-api-specification/commits" + url_with_per_page = base_url + "?per_page=1" + response = requests.get(url_with_per_page) + if response.ok: + commit_info = response.json()[0] + commit_url = commit_info["html_url"] + latest_commit_sha = commit_info.get("sha") + else: + raise Exception( + f"Failed to fetch opensearch-api-specification commit information. Status code: {response.status_code}" + ) - if mismatch_client or errors_client or mismatch_async_client or errors_async_client: - print("Changes detected") - response = requests.get( - "https://api.github.com/repos/opensearch-project/opensearch-api-specification/commits" - ) - if response.ok: - commit_info = response.json()[0] - commit_url = commit_info["html_url"] - latest_commit_sha = commit_info.get("sha") + with open("CHANGELOG.md", "r+", encoding="utf-8") as file: + content = file.read() + if commit_url not in content: + if "### Updated APIs" in content: + file_content = content.replace( + "### Updated APIs", + f"### Updated APIs\n- Updated opensearch-py APIs to reflect [opensearch-api-specification@{latest_commit_sha[:7]}]({commit_url})", + 1, + ) + file.seek(0) + file.write(file_content) + file.truncate() + else: + raise Exception( + "'Updated APIs' section is not present in CHANGELOG.md" + ) else: - raise Exception( - f"Failed to fetch opensearch-api-specification commit information. Status code: {response.status_code}" - ) - - with open("CHANGELOG.md", "r+", encoding="utf-8") as file: - content = file.read() - if commit_url not in content: - if "### Updated APIs" in content: - file_content = content.replace( - "### Updated APIs", - f"### Updated APIs\n- Updated opensearch-py APIs to reflect [opensearch-api-specification@{latest_commit_sha[:7]}]({commit_url})", - 1, - ) - file.seek(0) - file.write(file_content) - file.truncate() - else: - raise Exception( - "'Updated APIs' section is not present in CHANGELOG.md" - ) - else: - print("No changes detected") + print("No changes detected") - # Clean up - for path in before_paths: - shutil.rmtree(path) + except subprocess.CalledProcessError as e: + print(f"Error occurred while checking Git status: {e}") if __name__ == "__main__": diff --git a/utils/generate_api.py b/utils/generate_api.py index 26ea90c9..6d6fa4ea 100644 --- a/utils/generate_api.py +++ b/utils/generate_api.py @@ -33,7 +33,6 @@ import json import os import re -import shutil from functools import lru_cache from itertools import chain, groupby from operator import itemgetter @@ -45,6 +44,7 @@ import requests import unasync import urllib3 +import yaml from click.testing import CliRunner from jinja2 import Environment, FileSystemLoader, TemplateNotFound, select_autoescape @@ -119,13 +119,7 @@ def parse_orig(self) -> None: reads the written module and updates with important code specific to this client """ self.orders = [] - if self.is_plugin: - self.header = "from typing import Any\n\n" - else: - self.header = ( - "from typing import Any, Collection, Optional, Tuple, Union\n\n" - ) - + self.header = "from typing import Any\n\n" self.namespace_new = "".join( word.capitalize() for word in self.namespace.split("_") ) @@ -200,7 +194,7 @@ def dump(self) -> None: content = file.read() file_content = content.replace( "# namespaced clients for compatibility with API names", - f"# namespaced clients for compatibility with API names\n self.{self.namespace} = {self.namespace_new}Client(client)", # pylint: disable=line-too-long + f"# namespaced clients for compatibility with API names\n self.{self.namespace} = {self.namespace_new}Client(self)", # pylint: disable=line-too-long 1, ) new_file_content = file_content.replace( @@ -392,6 +386,22 @@ def all_parts(self) -> Dict[str, str]: if self.namespace == "tasks" and self.name == "get": parts["task_id"]["required"] = False + # Workaround to prevent lint error: invalid escape sequence '\`' + if ( + self.namespace == "indices" + and self.name == "create_data_stream" + and part == "name" + ): + replace_str = r"`\`, " + # Replace the string in the description + parts["name"]["description"] = parts["name"]["description"].replace( + replace_str, "" + ) + if "backslash" not in parts["name"]["description"]: + parts["name"]["description"] = parts["name"]["description"].replace( + "`:`", "`:`, backslash" + ) + for k, sub in SUBSTITUTIONS.items(): if k in parts: parts[sub] = parts.pop(k) @@ -540,7 +550,7 @@ def to_python(self) -> Any: def read_modules() -> Any: """ checks the opensearch-api spec at - https://raw.githubusercontent.com/opensearch-project/opensearch-api-specification/main/OpenSearch.openapi.json + https://github.com/opensearch-project/opensearch-api-specification/releases/download/main/opensearch-openapi.yaml and parses it into one or more API modules :return: a dict of API objects """ @@ -548,32 +558,45 @@ def read_modules() -> Any: # Load the OpenAPI specification file response = requests.get( - "https://raw.githubusercontent.com/opensearch-project/opensearch-api-" - "specification/main/OpenSearch.openapi.json" + "https://github.com/opensearch-project/opensearch-api-specification/releases/download/main/opensearch-openapi.yaml" ) - data = response.json() + data = yaml.safe_load(response.text) list_of_dicts = [] for path in data["paths"]: - for param in data["paths"][path]: # pylint: disable=invalid-name - if data["paths"][path][param]["x-operation-group"] == "nodes.hot_threads": - if "deprecated" in data["paths"][path][param]: + for method in data["paths"][path]: + # Workaround for excluding deprecated path of 'nodes.hot_threads' + if data["paths"][path][method]["x-operation-group"] == "nodes.hot_threads": + if "deprecated" in data["paths"][path][method]: continue - data["paths"][path][param].update({"path": path, "method": param}) - list_of_dicts.append(data["paths"][path][param]) + + data["paths"][path][method].update({"path": path, "method": method}) + list_of_dicts.append(data["paths"][path][method]) + + # 'list_of_dicts' contains dictionaries, each representing a possible API endpoint # Update parameters in each endpoint - for param_dict in list_of_dicts: - if "parameters" in param_dict: + for endpoint in list_of_dicts: + if "parameters" in endpoint: params = [] parts = [] # Iterate over the list of parameters and update them - for param in param_dict["parameters"]: + for param_ref in endpoint["parameters"]: + param = data["components"]["parameters"][ + param_ref["$ref"].split("/")[-1] + ] if "schema" in param and "$ref" in param["schema"]: schema_path_ref = param["schema"]["$ref"].split("/")[-1] param["schema"] = data["components"]["schemas"][schema_path_ref] + if "oneOf" in param["schema"]: + for element in param["schema"]["oneOf"]: + if "$ref" in element: + common_schema_path_ref = element["$ref"].split("/")[-1] + param["schema"] = data["components"]["schemas"][ + common_schema_path_ref + ] params.append(param) else: params.append(param) @@ -589,68 +612,68 @@ def read_modules() -> Any: params_new = {} parts_new = {} - for m in params: # pylint: disable=invalid-name - a = dict( # pylint: disable=invalid-name - type=m["schema"]["type"], description=m["description"] - ) # pylint: disable=invalid-name + for param in params: + param_dict: Dict[str, Any] = {} + if "description" in param: + param_dict.update( + description=param["description"].replace("\n", "") + ) + + if "type" in param["schema"]: + param_dict.update({"type": param["schema"]["type"]}) - if "default" in m["schema"]: - a.update({"default": m["schema"]["default"]}) + if "default" in param["schema"]: + param_dict.update({"default": param["schema"]["default"]}) - if "enum" in m["schema"]: - a.update({"type": "enum"}) - a.update({"options": m["schema"]["enum"]}) + if "enum" in param["schema"]: + param_dict.update({"type": "enum"}) + param_dict.update({"options": param["schema"]["enum"]}) - if "deprecated" in m["schema"]: - a.update({"deprecated": m["schema"]["deprecated"]}) - a.update( - {"deprecation_message": m["schema"]["x-deprecation-message"]} + if "deprecated" in param: + param_dict.update({"deprecated": param["deprecated"]}) + + if "x-deprecation-message" in param: + param_dict.update( + {"deprecation_message": param["x-deprecation-message"]} ) - params_new.update({m["name"]: a}) + params_new.update({param["name"]: param_dict}) # Removing the deprecated "type" if ( - param_dict["x-operation-group"] != "nodes.hot_threads" + endpoint["x-operation-group"] != "nodes.hot_threads" and "type" in params_new ): params_new.pop("type") if ( - param_dict["x-operation-group"] == "cluster.health" + endpoint["x-operation-group"] == "cluster.health" and "ensure_node_commissioned" in params_new ): params_new.pop("ensure_node_commissioned") if bool(params_new): - param_dict.update({"params": params_new}) - - param_dict.pop("parameters") - - for n in parts: # pylint: disable=invalid-name - b = dict(type=n["schema"]["type"]) # pylint: disable=invalid-name - - if "description" in n: - b.update({"description": n["description"]}) + endpoint.update({"params": params_new}) - if "x-enum-options" in n["schema"]: - b.update({"options": n["schema"]["x-enum-options"]}) + for part in parts: + parts_dict: Dict[str, Any] = {} + if "type" in part["schema"]: + parts_dict.update(type=part["schema"]["type"]) - deprecated_new = {} - if "deprecated" in n: - b.update({"deprecated": n["deprecated"]}) + if "description" in part: + parts_dict.update( + {"description": part["description"].replace("\n", " ")} + ) - if "x-deprecation-version" in n: - deprecated_new.update({"version": n["x-deprecation-version"]}) + if "x-enum-options" in part["schema"]: + parts_dict.update({"options": part["schema"]["x-enum-options"]}) - if "x-deprecation-description" in n: - deprecated_new.update( - {"description": n["x-deprecation-description"]} - ) + if "deprecated" in part: + parts_dict.update({"deprecated": part["deprecated"]}) - parts_new.update({n["name"]: b}) + parts_new.update({part["name"]: parts_dict}) if bool(parts_new): - param_dict.update({"parts": parts_new}) + endpoint.update({"parts": parts_new}) # Sort the input list by the value of the "x-operation-group" key list_of_dicts = sorted(list_of_dicts, key=itemgetter("x-operation-group")) @@ -670,54 +693,58 @@ def read_modules() -> Any: paths = [] all_paths_have_deprecation = True - for key2, value2 in groupby(value, key=itemgetter("path")): + for path, path_dicts in groupby(value, key=itemgetter("path")): # Extract the HTTP methods from the data in the current subgroup methods = [] parts_final = {} - for z in value2: # pylint: disable=invalid-name - methods.append(z["method"].upper()) + for method_dict in path_dicts: + methods.append(method_dict["method"].upper()) # Update 'api' dictionary if "documentation" not in api: - documentation = {"description": z["description"]} + documentation = {"description": method_dict["description"]} api.update({"documentation": documentation}) - if "x-deprecation-message" in z: - x_deprecation_message = z["x-deprecation-message"] + if "x-deprecation-message" in method_dict: + x_deprecation_message = method_dict["x-deprecation-message"] else: all_paths_have_deprecation = False - if "params" not in api and "params" in z: - api.update({"params": z["params"]}) + if "params" not in api and "params" in method_dict: + api.update({"params": method_dict["params"]}) - if "body" not in api and "requestBody" in z: + if ( + "body" not in api + and "requestBody" in method_dict + and "$ref" in method_dict["requestBody"] + ): + requestbody_ref = method_dict["requestBody"]["$ref"].split("/")[-1] body = {"required": False} - if "required" in z["requestBody"]: - body.update({"required": z["requestBody"]["required"]}) - q = z["requestBody"]["content"][ # pylint: disable=invalid-name - "application/json" - ]["schema"]["$ref"].split("/")[-1] - if "description" in data["components"]["schemas"][q]: + if ( + "required" + in data["components"]["requestBodies"][requestbody_ref] + ): body.update( { - "description": data["components"]["schemas"][q][ - "description" - ] - } - ) - if "x-serialize" in data["components"]["schemas"][q]: - body.update( - { - "serialize": data["components"]["schemas"][q][ - "x-serialize" - ] + "required": data["components"]["requestBodies"][ + requestbody_ref + ]["required"] } ) + q = data["components"]["requestBodies"][requestbody_ref]["content"][ + "application/json" + ][ + "schema" + ] # pylint: disable=invalid-name + if "description" in q: + body.update({"description": q["description"]}) + if "x-serialize" in q: + body.update({"serialize": q["x-serialize"]}) api.update({"body": body}) - if "parts" in z: - parts_final.update(z["parts"]) + if "parts" in method_dict: + parts_final.update(method_dict["parts"]) if "POST" in methods or "PUT" in methods: api.update( @@ -739,19 +766,10 @@ def read_modules() -> Any: } ) - if bool(deprecated_new) and bool(parts_final): - paths.append( - { - "path": key2, - "methods": methods, - "parts": parts_final, - "deprecated": deprecated_new, - } - ) - elif bool(parts_final): - paths.append({"path": key2, "methods": methods, "parts": parts_final}) + if bool(parts_final): + paths.append({"path": path, "methods": methods, "parts": parts_final}) else: - paths.append({"path": key2, "methods": methods}) + paths.append({"path": path, "methods": methods}) api.update({"url": {"paths": paths}}) if all_paths_have_deprecation and x_deprecation_message is not None: @@ -833,20 +851,4 @@ def dump_modules(modules: Any) -> None: if __name__ == "__main__": - # Store directories for comparison pre-generation vs post-generation. - root_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) - before_paths = [ - os.path.join(root_dir, f"before_generate/{folder}") - for folder in ["client", "async_client"] - ] - - for path in before_paths: - if os.path.exists(path): - shutil.rmtree(path) - - shutil.copytree(os.path.join(root_dir, "opensearchpy/client"), before_paths[0]) - shutil.copytree( - os.path.join(root_dir, "opensearchpy/_async/client"), before_paths[1] - ) - dump_modules(read_modules()) diff --git a/utils/templates/base b/utils/templates/base index 54db3451..d70f8d94 100644 --- a/utils/templates/base +++ b/utils/templates/base @@ -22,7 +22,7 @@ {% for p, info in api.params %} {% if info.description %} {% filter wordwrap(72, wrapstring="\n ") %} - :arg {{ p }}{% if info.deprecated %} (Deprecated: {{ info['deprecation_message'][:-1] }}.){% endif %}: {{ info.description }} {% if info.options %}Valid choices are {{ info.options|join(", ") }}.{% endif %} + :arg {{ p }}{% if info.deprecated and info.deprecation_message is defined %} (Deprecated: {{ info['deprecation_message'][:-1] }}.){% endif %}: {{ info.description }} {% if info.options and "Valid values: " not in info.description %}Valid choices are {{ info.options|join(", ") }}.{% endif %} {% if info.default is defined %}{% if info.default is not none %}{% if info.default is sameas(false) %}Default is false.{% else %}Default is {{ info.default }}.{% endif %}{% endif %}{% endif %} {% endfilter %}