From e50d13dd6893f9300e5c695ad4a1d5226a51a401 Mon Sep 17 00:00:00 2001
From: Mohamed Mehdi BEN AISSA
 <diode-ben-aissa.consultant@dgfip.finances.gouv.fr>
Date: Fri, 10 Nov 2023 15:00:32 +0100
Subject: [PATCH] fix(knox): fix webhdfs service definition for better support
 of path parameter

---
 .../services/webhdfs/2.4.0/rewrite.xml.j2     | 73 +++++++++++++++++++
 .../services/webhdfs/2.4.0/service.xml.j2     | 71 ++++++++++++++++++
 roles/knox/gateway/tasks/config.yml           | 17 +++++
 3 files changed, 161 insertions(+)
 create mode 100755 roles/knox/common/templates/services/webhdfs/2.4.0/rewrite.xml.j2
 create mode 100755 roles/knox/common/templates/services/webhdfs/2.4.0/service.xml.j2

diff --git a/roles/knox/common/templates/services/webhdfs/2.4.0/rewrite.xml.j2 b/roles/knox/common/templates/services/webhdfs/2.4.0/rewrite.xml.j2
new file mode 100755
index 00000000..3a228631
--- /dev/null
+++ b/roles/knox/common/templates/services/webhdfs/2.4.0/rewrite.xml.j2
@@ -0,0 +1,73 @@
+<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+<rules>
+
+    <rule dir="OUT" scope="WEBHDFS" name="WEBHDFS/webhdfs/outbound" pattern="hdfs://*:*/{path=**}?{**}">
+        <rewrite template="{$frontend[url]}/webhdfs/v1/{path=**}?{**}"/>
+    </rule>
+
+    <rule dir="OUT" name="WEBHDFS/webhdfs/outbound" pattern="webhdfs://*:*/{path=**}?{**}">
+        <rewrite template="{$frontend[url]}/webhdfs/v1/{path=**}?{**}"/>
+    </rule>
+
+    <rule dir="OUT" name="WEBHDFS/webhdfs/outbound/namenode/headers/location">
+        <match pattern="{scheme}://{host}:{port}/{path=**}?{**}"/>
+        <rewrite template="{$frontend[url]}/webhdfs/data/v1/{path=**}?{scheme}?host={$hostmap(host)}?{port}?{**}"/>
+        <encrypt-query/>
+    </rule>
+
+    <rule dir="IN" name="WEBHDFS/webhdfs/inbound/hdfs" pattern="hdfs:/{path=**}?{**}">
+        <rewrite template="{$serviceMappedUrl[NAMENODE]}/{path=**}?{**}"/>
+    </rule>
+
+    <rule dir="IN" name="WEBHDFS/webhdfs/inbound/webhdfs" pattern="webhdfs:/{path=**}?{**}">
+        <rewrite template="{$serviceUrl[WEBHDFS]}/{path=**}?{**}"/>
+    </rule>
+
+    <rule dir="IN" name="WEBHDFS/webhdfs/inbound/namenode/root" pattern="*://*:*/**/webhdfs/{version}/?{**}">
+        <rewrite template="{$serviceUrl[WEBHDFS]}/{version}/?{**}"/>
+    </rule>
+
+    <rule dir="IN" name="WEBHDFS/webhdfs/inbound/namenode/file" pattern="*://*:*/**/webhdfs/{version}/{path_from_url=**}?{**}">
+        <rewrite template="{$serviceUrl[WEBHDFS]}/{version}/{path_from_url=**}?{**}"/>
+    </rule>
+
+    <rule dir="IN" name="WEBHDFS/webhdfs/inbound/namenode/home" pattern="*://*:*/**/webhdfs/{version}/~?{**}">
+        <rewrite template="{$serviceUrl[WEBHDFS]}/{version}/user/{$username}?{**}"/>
+    </rule>
+
+    <rule dir="IN" name="WEBHDFS/webhdfs/inbound/namenode/home/file" pattern="*://*:*/**/webhdfs/{version}/~/{path_from_url=**}?{**}">
+        <rewrite template="{$serviceUrl[WEBHDFS]}/{version}/user/{$username}/{path_from_url=**}?{**}"/>
+    </rule>
+
+    <rule dir="IN" name="WEBHDFS/webhdfs/inbound/datanode">
+        <decrypt-query/>
+        <match pattern="*://*:*/**/webhdfs/data/*/{path=**}?{scheme}?{host}?{port}?{**}"/>
+        <rewrite template="{scheme}://{host}:{port}/{path=**}?{**}"/>
+    </rule>
+
+    <filter name="WEBHDFS/webhdfs/outbound/namenode/headers">
+        <content type="application/x-http-headers">
+            <apply path="Location" rule="WEBHDFS/webhdfs/outbound/namenode/headers/location"/>
+        </content>
+        <content type="application/json">
+            <apply path="$.Location" rule="WEBHDFS/webhdfs/outbound/namenode/headers/location"/>
+        </content>
+    </filter>
+
+</rules>
diff --git a/roles/knox/common/templates/services/webhdfs/2.4.0/service.xml.j2 b/roles/knox/common/templates/services/webhdfs/2.4.0/service.xml.j2
new file mode 100755
index 00000000..48a7d416
--- /dev/null
+++ b/roles/knox/common/templates/services/webhdfs/2.4.0/service.xml.j2
@@ -0,0 +1,71 @@
+<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+<service role="WEBHDFS" name="webhdfs" version="2.4.0">
+    <metadata>
+        <type>API</type>
+        <context>/webhdfs</context>
+        <shortDesc>Web HDFS</shortDesc>
+        <description>An HTTP REST API which supports the complete FileSystem interface for HDFS.</description>
+        <samples>
+            <sample>
+                <description>List all files under 'testPath'</description>
+                <method>GET</method>
+                <path>v1/testPath?op=LISTSTATUS</path>
+            </sample>
+            <sample>
+                <description>Rename a File/Directory under </description>
+                <method>PUT</method>
+                <path>v1/testPath/testFile?op=RENAME&amp;destination=testPath/renamedFile</path>
+            </sample>
+            <sample>
+                <description>Get Home Directory</description>
+                <method>GET</method>
+                <path>v1/?op=GETHOMEDIRECTORY</path>
+            </sample>
+            <sample>
+                <description>You may check out Apache WebHDFS's REST API documentation here</description>
+                <value>https://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-hdfs/WebHDFS.html</value>
+            </sample>
+        </samples>
+    </metadata>
+    <routes>
+        <route path="/webhdfs/v1/?**">
+            <rewrite apply="WEBHDFS/webhdfs/inbound/namenode/root" to="request.url"/>
+        </route>
+        <route path="/webhdfs/v1/**?**">
+            <rewrite apply="WEBHDFS/webhdfs/inbound/namenode/file" to="request.url"/>
+            <rewrite apply="WEBHDFS/webhdfs/outbound/namenode/headers" to="response.headers"/>
+            <rewrite apply="WEBHDFS/webhdfs/outbound/namenode/headers" to="response.body"/>
+        </route>
+        <route path="/webhdfs/v1/~?**">
+            <rewrite apply="WEBHDFS/webhdfs/inbound/namenode/home" to="request.url"/>
+        </route>
+        <route path="/webhdfs/v1/~/**?**">
+            <rewrite apply="WEBHDFS/webhdfs/inbound/namenode/home/file" to="request.url"/>
+            <rewrite apply="WEBHDFS/webhdfs/outbound/namenode/headers" to="response.headers"/>
+        </route>
+        <route path="/webhdfs/data/v1/**?**">
+            <rewrite apply="WEBHDFS/webhdfs/inbound/datanode" to="request.url"/>
+            <dispatch contributor-name="http-client" ha-classname="org.apache.knox.gateway.dispatch.DefaultDispatch" />
+        </route>
+    </routes>
+    <dispatch classname="org.apache.knox.gateway.hdfs.dispatch.HdfsHttpClientDispatch" ha-classname="org.apache.knox.gateway.hdfs.dispatch.WebHdfsHaDispatch"/>
+    <testURLs>
+        <testURL>/webhdfs/v1/?op=LISTSTATUS</testURL>
+    </testURLs>
+</service>
diff --git a/roles/knox/gateway/tasks/config.yml b/roles/knox/gateway/tasks/config.yml
index 7a69c11b..ed07c7e5 100644
--- a/roles/knox/gateway/tasks/config.yml
+++ b/roles/knox/gateway/tasks/config.yml
@@ -122,6 +122,23 @@
     group: "{{ knox_group }}"
     mode: "644"
 
+# Webhdfs service definition
+- name: Template Webhdfs service.xml
+  ansible.builtin.template:
+    src: services/webhdfs/2.4.0/service.xml.j2
+    dest: "{{ knox_data_dir }}/data/services/webhdfs/2.4.0/service.xml"
+    owner: "{{ knox_user }}"
+    group: "{{ knox_group }}"
+    mode: "644"
+
+- name: Template Webhdfs rewrite.xml
+  ansible.builtin.template:
+    src: services/webhdfs/2.4.0/rewrite.xml.j2
+    dest: "{{ knox_data_dir }}/data/services/webhdfs/2.4.0/rewrite.xml"
+    owner: "{{ knox_user }}"
+    group: "{{ knox_group }}"
+    mode: "644"
+
 - name: Template Knox gateway-site.xml
   ansible.builtin.template:
     src: gateway-site.xml.j2