Merge pull request #54 from AppThreat/feature/ruby

Ruby on rails
AppThreat · Jan 23, 2025 · 1485507 · 1485507
2 parents 264c1d6 + a3c4270
commit 1485507
Show file tree

Hide file tree

Showing 14 changed files with 372 additions and 14 deletions.
diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml
@@ -1,4 +1,4 @@
-on: [push, pull_request]
+on: [workflow_dispatch]
 
 permissions:
   contents: read

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -20,14 +20,15 @@ jobs:
     runs-on: ${{ matrix.os }}
     strategy:
       matrix:
-        os: [ubuntu-latest, macos-latest, windows-latest]
+        os: [ubuntu-24.04, ubuntu-24.04-arm, macos-latest, windows-latest]
+        python: ['3.10', '3.11', '3.12', '3.13']
       fail-fast: false
     steps:
     - uses: actions/checkout@v4
     - name: Set up Python
       uses: actions/setup-python@v5
       with:
-        python-version: "3.12"
+        python-version: ${{ matrix.python }}
     - name: Make sure versions are synced if PR or ref is tag
       if: ${{ github.event_name == 'pull_request' || github.ref_type == 'tag' }}
       shell: bash

diff --git a/.gitignore b/.gitignore
@@ -156,4 +156,4 @@ cython_debug/
 #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
 #  and can be added to the global gitignore or merged into this file.  For a more nuclear
 #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
-#.idea/
+.idea/
diff --git a/README.md b/README.md
@@ -29,7 +29,7 @@ e.g. `atom-tools help
 convert`).
 
 ```
-Atom Tools (version 0.6.0)
+Atom Tools (version 0.7.0)
 
 Usage:
   command [options] [arguments]
@@ -70,7 +70,7 @@ Usage:
   convert [options]
 
 Options:
-  -f, --format=FORMAT              Destination format [default: "openapi3.0.1"]
+  -f, --format=FORMAT              Destination format [default: "openapi3.1.0"]
   -i, --input-slice=INPUT-SLICE  Usages slice file
   -t, --type=TYPE                  Origin type of source on which the atom slice was generated. [default: "java"]
   -o, --output-file=OUTPUT-FILE    Output file [default: "openapi_from_slice.json"]

diff --git a/atom_tools/__init__.py b/atom_tools/__init__.py
@@ -1,4 +1,4 @@
 """
 A cli, classes and functions for converting an atom slice to a different format
 """
-__version__ = '0.6.0'
+__version__ = '0.7.0'
diff --git a/atom_tools/cli/commands/convert.py b/atom_tools/cli/commands/convert.py
@@ -10,7 +10,6 @@
 from atom_tools.lib.converter import OpenAPI
 from atom_tools.lib.utils import export_json
 
-
 logger = logging.getLogger(__name__)
 
 
@@ -37,7 +36,7 @@ class ConvertCommand(Command):
             'f',
             'Destination format',
             flag=False,
-            default='openapi3.0.1',
+            default='openapi3.1.0',
         ),
         option(
             'input-slice',
@@ -76,7 +75,7 @@ def handle(self):
         """
         Executes the convert command and performs the conversion.
         """
-        supported_types = {'java', 'jar', 'python', 'py', 'javascript', 'js', 'typescript', 'ts'}
+        supported_types = {'java', 'jar', 'python', 'py', 'javascript', 'js', 'typescript', 'ts', "ruby", "rb"}
         if self.option('type') not in supported_types:
             raise ValueError(f'Unknown origin type: {self.option("type")}')
         match self.option('format'):

diff --git a/atom_tools/lib/__init__.py b/atom_tools/lib/__init__.py
@@ -0,0 +1,10 @@
+"""
+Common dataclasses
+"""
+from dataclasses import dataclass
+
+
+@dataclass
+class HttpRoute:
+    url_pattern: str
+    method: str
diff --git a/atom_tools/lib/converter.py b/atom_tools/lib/converter.py
@@ -19,7 +19,7 @@
     OpenAPIRegexCollection
 )
 from atom_tools.lib.slices import AtomSlice
-
+from atom_tools.lib.ruby_converter import convert as ruby_convert
 
 logger = logging.getLogger(__name__)
 regex = OpenAPIRegexCollection()
@@ -40,7 +40,7 @@ def __init__(
     ) -> None:
         self.usages: AtomSlice = AtomSlice(usages, origin_type)
         self.openapi_version = dest_format.replace('openapi', '')
-        self.title = f'OpenAPI Specification for {Path(usages).parent.stem}'
+        self.title = f'OpenAPI Specification for {Path(usages).parent.stem}' if Path(usages).parent.stem else "OpenAPI Specification"
         self.file_endpoint_map: Dict = {}
         self.params: Dict[str, List[Dict]] = {}
         self.regex_param_count = 0
@@ -50,6 +50,8 @@ def convert_usages(self) -> Dict[str, Dict]:
         """
         Converts usages to OpenAPI.
         """
+        if self.usages.origin_type in ("rb", "ruby"):
+            return ruby_convert(self.usages)
         methods = self._process_methods()
         methods = self.methods_to_endpoints(methods)
         self.target_line_nums = self._identify_target_line_nums(methods)
@@ -488,6 +490,8 @@ def _process_methods_helper(self, pattern: str) -> Dict[str, Any]:
         for r in result:
             file_name = r['file_name']
             methods = r['resolved_methods']
+            if self.usages.origin_type in ("rb", "ruby"):
+                methods = [m for m in methods if m and not m.startswith("<operator>") and m not in ["(...)", "<body>"] and not m.startswith("<tmp-")]
             if resolved.get(file_name):
                 resolved[file_name]['resolved_methods'].extend(methods)
             else:

diff --git a/atom_tools/lib/ruby_converter.py b/atom_tools/lib/ruby_converter.py
@@ -0,0 +1,62 @@
+"""
+Ruby converter helper
+"""
+from atom_tools.lib.slices import AtomSlice
+from atom_tools.lib.ruby_semantics import code_to_routes
+
+
+def extract_params(url):
+    params = []
+    if not url:
+        return []
+    if ":" in url:
+        for part in url.split("/"):
+            if part.startswith(":"):
+                param = {
+                    "name": part.replace(":", ""),
+                    "in": "path",
+                    "required": True
+                }
+                if part == ":id":
+                    param["schema"] = {
+                        "type": "integer",
+                        "format": "int64"
+                    }
+                params.append(param)
+    return params
+
+
+def convert(usages: AtomSlice):
+    result = []
+    object_slices = usages.content.get("objectSlices", {})
+    for oslice in object_slices:
+        # Nested lambdas lack prefixes
+        if oslice.get('fullName').count("<lambda>") >= 3:
+            continue
+        file_name = oslice.get("fileName", "")
+        line_nums = set()
+        if oslice.get("lineNumber"):
+            line_nums.add(oslice.get("lineNumber"))
+        for usage in oslice.get("usages", []):
+            routes = code_to_routes(usage.get("targetObj", {}).get("name", {}))
+            if routes:
+                if usage.get("lineNumber"):
+                    line_nums.add(usage.get("lineNumber"))
+                for route in routes:
+                    params = extract_params(route.url_pattern)
+                    amethod = {
+                        "operationId": f"{oslice.get('fullName')}" if oslice.get("fullName") else oslice.get(
+                            "fileName"),
+                        "x-atom-usages": {
+                            "call": {file_name: list(line_nums)}
+                        }
+                    }
+                    if params:
+                        amethod["parameters"] = params
+                    aresult = {
+                        route.url_pattern: {
+                            route.method.lower(): amethod
+                        }
+                    }
+                    result.append(aresult)
+    return result
diff --git a/atom_tools/lib/ruby_semantics.py b/atom_tools/lib/ruby_semantics.py
@@ -0,0 +1,113 @@
+"""
+Ruby semantic utils
+"""
+import re
+from typing import List
+
+from atom_tools.lib import HttpRoute
+
+
+def _get_dangling_routes(i, kind, code, code_parts, url_prefix="/"):
+    """
+    Internal method
+
+    Args:
+        i:
+        kind:
+        code:
+        code_parts:
+        url_prefix:
+
+    Returns:
+
+    """
+    routes = []
+    url_pattern = _clean_url(f"{url_prefix}{re.sub('^:', '', code_parts[i + 1])}")
+    if kind == "resources":
+        routes.append(HttpRoute(url_pattern=url_pattern, method="GET"))
+    if ("match " in code and "via: :all" in code) or ("only: [" not in code and "shallow:" not in code):
+        routes.append(HttpRoute(url_pattern=f"{url_pattern}/new", method="GET"))
+        routes.append(HttpRoute(url_pattern=url_pattern, method="POST"))
+        routes.append(HttpRoute(url_pattern=f"{url_pattern}/:id", method="GET"))
+        routes.append(HttpRoute(url_pattern=f"{url_pattern}/:id/edit", method="GET"))
+        routes.append(HttpRoute(url_pattern=f"{url_pattern}/:id", method="PUT"))
+        routes.append(HttpRoute(url_pattern=f"{url_pattern}/:id", method="DELETE"))
+    return routes
+
+
+def _clean_url(url_pattern):
+    return re.sub('[,/]$', '', url_pattern)
+
+
+def code_to_routes(code: str) -> List[HttpRoute]:
+    """
+    Convert code string to routes
+    Args:
+        code: Code snippet
+
+    Returns:
+        List of http routes
+    """
+    routes = []
+    if not code:
+        return []
+    keyword_found = False
+    for keyword in (
+            "namespace", "scope", "concern", "resource", "resources", "get",
+            "post", "patch", "delete", "put", "head", "match",
+            "options"):
+        if f"{keyword} " in code:
+            keyword_found = True
+            break
+    if not keyword_found:
+        return []
+    code_parts = code.strip().replace("...", "").split()
+    # Dangling resources - leads to many kinds of automatic routes
+    has_resources = "resources " in code or "resource " in code
+    url_prefix = ""
+    has_scope = False
+    for i, part in enumerate(code_parts):
+        if not part or len(part) < 2:
+            continue
+        if part in ("scope",) or part.startswith("scope("):
+            has_scope = True
+            if len(code_parts) >= i + 1 and code_parts[i + 1].startswith('":'):
+                url_prefix = f"""/{re.sub('[:",]', '', code_parts[i + 1])}"""
+                continue
+        if (part in ("resource", "resources", "namespace", "member")
+                and len(code_parts) >= i + 1
+                and code_parts[i + 1].startswith(":")):
+            url_pattern = _clean_url(f"/{re.sub('^:', '', code_parts[i + 1])}")
+            # Is there an alias for this patten
+            if len(code_parts) > i + 3 and code_parts[i + 2] in ("path:", "path", "path("):
+                url_pattern = _clean_url(code_parts[i + 3].replace('"', ""))
+                routes += _get_dangling_routes(i, part, code, code_parts,
+                                               f"{url_prefix}/{url_pattern}/")
+                continue
+            if len(code_parts) > i + 2 and code_parts[i + 2] in ("resources", "resource"):
+                routes += _get_dangling_routes(i, code_parts[i + 2], code, code_parts, f"{url_prefix}/")
+            elif i == len(code_parts) - 2 and part in ("resource", "resources"):
+                routes += _get_dangling_routes(i, part, code, code_parts, f"{url_prefix}/")
+            else:
+                url_prefix = f"{url_prefix}{url_pattern}"
+            continue
+        if part in ("collection", "member", "concern", "do", "as:", "constraints:") or part.startswith(
+                ":") or part.startswith('"'):
+            continue
+        if part == "end" and url_prefix:
+            url_prefix = "/".join(url_prefix.split("/")[:-1])
+        for m in ("get", "post", "delete", "patch", "put", "head", "options"):
+            if part == m and len(code_parts) > i + 1 and code_parts[i + 1].startswith('"'):
+                routes.append(
+                    HttpRoute(url_pattern=f"""{url_prefix}/{code_parts[i + 1].replace('"', "")}""",
+                              method=m.upper() if m != "patch" else "PUT"))
+                break
+    if has_resources:
+        if not routes:
+            for i, part in enumerate(code_parts):
+                for m in ("resource", "resources"):
+                    if part == m and code_parts[i + 1].startswith(':') and (
+                            i == len(code_parts) - 2 or (len(code_parts) > i + 2 and code_parts[i + 1] != "do")):
+                        routes += _get_dangling_routes(i, m, code, code_parts, f"{url_prefix}/" if has_scope else "/")
+
+    return routes
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "atom-tools"
-version = "0.6.0"
+version = "0.7.0"
 description = "Collection of tools for use with AppThreat/atom."
 authors = [
   { name = "Caroline Russell", email = "[email protected]" },
@@ -13,6 +13,7 @@ classifiers = [
   "Programming Language :: Python :: 3.10",
   "Programming Language :: Python :: 3.11",
   "Programming Language :: Python :: 3.12",
+  "Programming Language :: Python :: 3.13",
   "License :: OSI Approved :: Apache Software License",
   "Development Status :: 4 - Beta",
   "Intended Audience :: Developers",

diff --git a/test/data/rb-railsgoat-usages.json b/test/data/rb-railsgoat-usages.json
diff --git a/test/test_converter.py b/test/test_converter.py
@@ -2,7 +2,7 @@
 
 from atom_tools.lib.converter import filter_calls, OpenAPI
 from atom_tools.lib.utils import sort_list
-
+from atom_tools.lib.ruby_converter import convert as ruby_convert
 
 def sort_openapi_result(result):
     for k, v in result.items():
@@ -48,6 +48,9 @@ def py_usages_1():
 def py_usages_2():
     return OpenAPI('openapi3.0.1', 'py', 'test/data/py-breakable-flask-usages.json')
 
+@pytest.fixture
+def rb_usages_1():
+    return OpenAPI('openapi3.0.1', 'rb', 'test/data/rb-railsgoat-usages.json')
 
 def test_populate_endpoints(js_usages_1, js_usages_2):
     # The populate_endpoints method is the final operation in convert_usages.
@@ -5751,3 +5754,8 @@ def test_js(js_usages_1):
                                              'schema': {'pattern': 'ftp(?!/quarantine)',
                                                         'type': 'string'}}],
                              'x-atom-usages': {'call': {'server.ts': [250]}}}}
+
+
+def test_rb(rb_usages_1):
+    result = ruby_convert(rb_usages_1.usages)
+    assert result