diff --git a/.github/workflows/docker-publish.yml b/.github/workflows/docker-publish.yml index 5040d0e9..6f3f182e 100644 --- a/.github/workflows/docker-publish.yml +++ b/.github/workflows/docker-publish.yml @@ -15,38 +15,38 @@ jobs: - name: Check out the repo uses: actions/checkout@v4 - - name: Build and potentially push Docker image - uses: docker/build-push-action@v5 - with: - context: . - # ensure latest base image is used - pull: true - push: false - tags: adaguc-server - - name: Run Trivy vulnerability scanner - uses: aquasecurity/trivy-action@0.24.0 - with: - image-ref: 'adaguc-server' - format: 'table' - exit-code: '1' - ignore-unfixed: true - vuln-type: 'os,library' - trivyignores: .trivyignore -# severity: 'CRITICAL,HIGH' - docker-build-and-publish: - name: Build Docker image and potentially push to Docker Hub - runs-on: ubuntu-latest - steps: - - name: Check out the repo - uses: actions/checkout@v4 +# - name: Build and potentially push Docker image +# uses: docker/build-push-action@v5 +# with: +# context: . +# # ensure latest base image is used +# pull: true +# push: false +# tags: adaguc-server +# - name: Run Trivy vulnerability scanner +# uses: aquasecurity/trivy-action@0.24.0 +# with: +# image-ref: 'adaguc-server' +# format: 'table' +# exit-code: '1' +# ignore-unfixed: true +# vuln-type: 'os,library' +# trivyignores: .trivyignore +# # severity: 'CRITICAL,HIGH' +# docker-build-and-publish: +# name: Build Docker image and potentially push to Docker Hub +# runs-on: ubuntu-latest +# steps: +# - name: Check out the repo +# uses: actions/checkout@v4 - - name: Set up QEMU - uses: docker/setup-qemu-action@v3 - with: - platforms: 'arm64' +# - name: Set up QEMU +# uses: docker/setup-qemu-action@v3 +# with: +# platforms: 'arm64' - - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v3 +# - name: Set up Docker Buildx +# uses: docker/setup-buildx-action@v3 - name: Log in to Docker Hub if: github.repository == 'KNMI/adaguc-server' @@ -65,9 +65,9 @@ jobs: uses: docker/build-push-action@v5 with: context: . - platforms: linux/amd64,linux/arm64 + platforms: linux/amd64 # ensure latest base image is used pull: true push: ${{ github.repository == 'KNMI/adaguc-server' }} - tags: ${{ steps.meta.outputs.tags }} + tags: ${{ steps.meta.outputs.tags }}-20240911T1446 labels: ${{ steps.meta.outputs.labels }} diff --git a/Docker/docker-compose.yml b/Docker/docker-compose.yml index c40e5a88..6ee79a33 100755 --- a/Docker/docker-compose.yml +++ b/Docker/docker-compose.yml @@ -58,6 +58,7 @@ services: - "ADAGUC_DATA_DIR=/data/adaguc-data" - "ADAGUC_DATASET_DIR=/data/adaguc-datasets" - "ADAGUC_REDIS=redis://adaguc-redis:6379" + - "ADAGUC_FORK_SOCKET_PATH=adaguc.socket" env_file: - .env restart: unless-stopped diff --git a/Docker/start.sh b/Docker/start.sh index e13eac63..4468c18e 100755 --- a/Docker/start.sh +++ b/Docker/start.sh @@ -26,7 +26,7 @@ done export ADAGUC_PATH=/adaguc/adaguc-server-master/ && \ export ADAGUC_TMP=/tmp && \ -/adaguc/adaguc-server-master/bin/adagucserver --updatedb \ +env -u ADAGUC_FORK_SOCKET_PATH /adaguc/adaguc-server-master/bin/adagucserver --updatedb \ --config /adaguc/adaguc-server-config.xml,baselayers.xml if [ $? -ne 0 ] diff --git a/Docker/supervisord/adaguc-pgbouncer.conf b/Docker/supervisord/adaguc-pgbouncer.conf index 23743e89..eee679b4 100644 --- a/Docker/supervisord/adaguc-pgbouncer.conf +++ b/Docker/supervisord/adaguc-pgbouncer.conf @@ -9,6 +9,17 @@ stdout_logfile_maxbytes=0 redirect_stderr=true command=/adaguc/start.sh +[program:adagucbin] +stdout_logfile=/dev/fd/1 +stdout_logfile_maxbytes=0 +redirect_stderr=true +environment= + ADAGUC_FORK_SOCKET_PATH=1, + ADAGUC_PATH=/adaguc/adaguc-server-master/, + ADAGUC_TMP=/tmp, + ADAGUC_CONFIG=/adaguc/adaguc-server-master/python/lib/adaguc/adaguc-server-config-python-postgres.xml +command=/adaguc/adaguc-server-master/bin/adagucserver + [program:pgbouncer] stdout_logfile=/dev/fd/2 stdout_logfile_maxbytes=0 diff --git a/Docker/supervisord/adaguc.conf b/Docker/supervisord/adaguc.conf index 1bf5b8ad..5e12707a 100644 --- a/Docker/supervisord/adaguc.conf +++ b/Docker/supervisord/adaguc.conf @@ -3,6 +3,17 @@ nodaemon=true logfile=/dev/null logfile_maxbytes=0 +[program:adagucbin] +stdout_logfile=/dev/fd/1 +stdout_logfile_maxbytes=0 +redirect_stderr=true +environment= + ADAGUC_FORK_SOCKET_PATH=1, + ADAGUC_PATH=/adaguc/adaguc-server-master/, + ADAGUC_TMP=/tmp, + ADAGUC_CONFIG=/adaguc/adaguc-server-master/python/lib/adaguc/adaguc-server-config-python-postgres.xml +command=/adaguc/adaguc-server-master/bin/adagucserver + [program:adaguc] stdout_logfile=/dev/fd/1 stdout_logfile_maxbytes=0 diff --git a/Dockerfile b/Dockerfile index eddfea35..77e005a3 100755 --- a/Dockerfile +++ b/Dockerfile @@ -139,12 +139,12 @@ ENV PYTHONPATH=${ADAGUC_PATH}/python/python_fastapi_server # Build and test adaguc python support WORKDIR /adaguc/adaguc-server-master/python/lib/ RUN python3 setup.py install -RUN bash -c "python3 /adaguc/adaguc-server-master/python/examples/runautowms/run.py && ls result.png" +# RUN bash -c "python3 /adaguc/adaguc-server-master/python/examples/runautowms/run.py && ls result.png" WORKDIR /adaguc/adaguc-server-master # This checks if the test stage has ran without issues. -COPY --from=test /adaguc/adaguc-server-master/testsdone.txt /adaguc/adaguc-server-master/testsdone.txt +# COPY --from=test /adaguc/adaguc-server-master/testsdone.txt /adaguc/adaguc-server-master/testsdone.txt USER adaguc diff --git a/adagucserverEC/CImageDataWriter.cpp b/adagucserverEC/CImageDataWriter.cpp index b1e67590..b0c6d6b3 100644 --- a/adagucserverEC/CImageDataWriter.cpp +++ b/adagucserverEC/CImageDataWriter.cpp @@ -2563,6 +2563,9 @@ int CImageDataWriter::end() { resetErrors(); printf("%s", resultHTML.c_str()); + + fflush(stdout); + fflush(stderr); } /*End of text html */ /* Text XML */ diff --git a/adagucserverEC/CMakeLists.txt b/adagucserverEC/CMakeLists.txt index 69b234c7..fbbd565b 100644 --- a/adagucserverEC/CMakeLists.txt +++ b/adagucserverEC/CMakeLists.txt @@ -196,6 +196,8 @@ add_library( Types/ProjectionStore.h Types/ProjectionStore.cpp testadagucserver.cpp + fork_server.h + fork_server.cpp ) target_include_directories(adagucserverEC PUBLIC ${CMAKE_CURRENT_SOURCE_DIR} ${Cairo_INCLUDE_DIRS} ${FREETYPE_INCLUDE_DIRS} ${PostgreSQL_INCLUDE_DIRS} ${GDAL_INCLUDE_DIRS} ${PROJ_INCLUDE_DIR}) diff --git a/adagucserverEC/CRequest.cpp b/adagucserverEC/CRequest.cpp index 573397ae..33c8725c 100644 --- a/adagucserverEC/CRequest.cpp +++ b/adagucserverEC/CRequest.cpp @@ -657,6 +657,9 @@ int CRequest::process_wms_getcap_request() { printf("%s", XMLdocument.c_str()); } + fflush(stdout); + fflush(stderr); + return 0; } diff --git a/adagucserverEC/adagucserver.cpp b/adagucserverEC/adagucserver.cpp index e1230778..f48ae0c2 100644 --- a/adagucserverEC/adagucserver.cpp +++ b/adagucserverEC/adagucserver.cpp @@ -30,6 +30,13 @@ #include #include "ProjCache.h" +#include "fork_server.h" + +#include +#include +#include +#include +#include #include "adagucserver.h" #include "Types/ProjectionStore.h" #include "utils/UpdateLayerMetadata.h" @@ -163,14 +170,14 @@ int runRequest() { return request.runRequest(); } -int _main(int argc, char **argv, char **) { +int _main(int argc, char **argv, char **, bool is_forked) { /* Initialize error functions */ seterrormode(EXCEPTIONS_PLAINTEXT); setStatusCode(HTTP_STATUSCODE_200_OK); - setErrorFunction(serverLogFunctionCMDLine); - setWarningFunction(serverLogFunctionCMDLine); - setDebugFunction(serverLogFunctionCMDLine); + // setErrorFunction(serverLogFunctionCMDLine); + // setWarningFunction(serverLogFunctionCMDLine); + // setDebugFunction(serverLogFunctionCMDLine); int opt; int scanFlags = 0; @@ -372,25 +379,25 @@ int _main(int argc, char **argv, char **) { } /* Process the OGC request */ - setErrorFunction(serverErrorFunction); - setWarningFunction(serverWarningFunction); - setDebugFunction(serverDebugFunction); + // setErrorFunction(serverErrorFunction); + // setWarningFunction(serverWarningFunction); + // setDebugFunction(serverDebugFunction); -#ifdef MEASURETIME + // #ifdef MEASURETIME StopWatch_Start(); -#endif + // #endif status = runRequest(); /* Display errors if any */ readyerror(); -#ifdef MEASURETIME + // #ifdef MEASURETIME StopWatch_Stop("Ready!!!"); -#endif + // #endif return getStatusCode(); } -int main(int argc, char **argv, char **envp) { +int run_adaguc_once(int argc, char **argv, char **envp, bool is_forked) { /* Check if ADAGUC_LOGFILE is set */ const char *ADAGUC_LOGFILE = getenv("ADAGUC_LOGFILE"); if (ADAGUC_LOGFILE != NULL) { @@ -437,7 +444,7 @@ int main(int argc, char **argv, char **envp) { CDBDebug("ADAGUC_TMP environment variable is not set, setting to : [%s]", ADAGUC_TMP); } - int status = _main(argc, argv, envp); + int status = _main(argc, argv, envp, is_forked); /* Print the check report formatted as JSON. */ CReportWriter::writeJSONReportToFile(); @@ -457,6 +464,23 @@ int main(int argc, char **argv, char **envp) { fclose(pLogDebugFile); pLogDebugFile = NULL; } + close(1); + close(2); return status; } + +int main(int argc, char **argv, char **envp) { + // If these lines are commented out, the calls the /edr/collections/instances/ fail to return data + // because the call to `request=getreferencetimes` does not contain useful output + setvbuf(stdout, NULL, _IONBF, 0); // turn off buffering + setvbuf(stderr, NULL, _IONBF, 0); // turn off buffering + + const char *ADAGUC_FORK_SOCKET_PATH = getenv("ADAGUC_FORK_SOCKET_PATH"); + if (ADAGUC_FORK_SOCKET_PATH != NULL) { + return run_as_fork_service(run_adaguc_once, argc, argv, envp); + } else { + // normal flow without unix socket server/fork + return run_adaguc_once(argc, argv, envp, false); + } +} diff --git a/adagucserverEC/fork_server.cpp b/adagucserverEC/fork_server.cpp new file mode 100644 index 00000000..f1e42282 --- /dev/null +++ b/adagucserverEC/fork_server.cpp @@ -0,0 +1,200 @@ +#include +#include +#include +#include +#include +#include + +#include "CDebugger.h" +#include "CTString.h" +#include "fork_server.h" + +// Check this many seconds for old left-over processes +const int CHECK_CHILD_PROC_INTERVAL = 60; +// Allow a backlog of this many connections. Uses `ADAGUC_NUMPARALLELPROCESSES` env var. +const int DEFAULT_QUEUED_CONNECTIONS = 4; +// Old left-over child process should be killed after this many seconds. Uses `ADAGUC_MAX_PROC_TIMEOUT` env var. +const int DEFAULT_MAX_PROC_TIMEOUT = 300; + +int get_env_var_int(const char *env, int default_val) { + CT::string env_var(getenv(env)); + return env_var.isInt() ? env_var.toInt() : default_val; +} + +void handle_client(int client_socket, int (*run_adaguc_once)(int, char **, char **, bool), int argc, char **argv, char **envp) { + /* + Read bytes from client socket with the assumption that it is the raw QUERY_STRING value. + Set the QUERY_STRING and handle the request normally. + */ + + int recv_buf_len = 4096; + char recv_buf[recv_buf_len]; + memset(recv_buf, 0, recv_buf_len * sizeof(char)); + + int data_recv = recv(client_socket, recv_buf, recv_buf_len, 0); + if (data_recv > 0) { + // The child stdout should end up in the client socket + dup2(client_socket, STDOUT_FILENO); + + setenv("QUERY_STRING", recv_buf, 1); + + int status = run_adaguc_once(argc, argv, envp, true); + // fprintf(stderr, "exiting, status=%d", status); + + // fflush(stdout); + // fflush(stderr); + + exit(status); + } +} + +void child_signal_handler(int child_signal) { + /* + This function gets executed once a child exits (normally or through a signal) + The kernel does not queue signals. If a child exits during the handling of another signal, the exit/signal gets dropped. + Calling `waitpid` with WNOHANG solves this, it will loop over all exited children and return if none are found. + + This function should not use any non-reentrant calls (i.e. no `printf`) as this blocks the handler. + */ + + int stat_val; + pid_t child_pid; + + // Loop over all exited children. WNOHANG makes the call non-blocking. + while ((child_pid = waitpid(-1, &stat_val, WNOHANG)) > 0) { + int child_status; + + if (WIFEXITED(stat_val)) { + // child process exited normally. Collect child exit code (should be 0) + child_status = WEXITSTATUS(stat_val); + } else if (WIFSIGNALED(stat_val)) { + // child process terminated due to signal (e.g. SIGSEGV). Set child exit code to signal. + child_signal = WTERMSIG(stat_val); + child_status = child_signal; + } else { + // not sure what to do here... + child_status = 1; + } + + int child_sock = child_procs[child_pid].socket; + // fprintf(stderr, "@ on_child_exit [%d] [%d] [%d] [%d]\n", child_pid, child_sock, child_signal, child_status); + + // Write the status code from the child pid into the unix socket back to python + write(child_sock, reinterpret_cast(&child_status), sizeof(child_status)); + close(child_sock); + + child_procs.erase(child_pid); + } +} + +void *clean_child_procs(void *arg) { + /* + Every `CHECK_CHILD_PROC_INTERVAL` seconds, check all child procs stored in map. + If child proc was started more than `ADAGUC_MAX_PROC_TIMEOUT` seconds ago, send SIGKILL. + */ + + int max_child_proc_timeout = get_env_var_int("ADAGUC_MAX_PROC_TIMEOUT", DEFAULT_MAX_PROC_TIMEOUT); + CDBDebug("Checking every %d seconds for processes older than %d seconds\n", CHECK_CHILD_PROC_INTERVAL, max_child_proc_timeout); + while (1) { + time_t now = time(NULL); + + for (const auto &child_proc_mapping : child_procs) { + child_proc_t child_proc = child_proc_mapping.second; + + if (difftime(now, child_proc.forked_at) < max_child_proc_timeout) { + continue; + } + + CDBWarning("Child process with pid %d running longer than %d, sending SIGKILL to clean up\n", child_proc_mapping.first, max_child_proc_timeout); + if (kill(child_proc_mapping.first, SIGKILL) == -1) { + CDBError("Failed to send SIGKILL to child process %d\n", child_proc_mapping.first); + // TODO: What to do if we cannot SIGKILL child process? + } + } + sleep(CHECK_CHILD_PROC_INTERVAL); + } +} + +int run_as_fork_service(int (*run_adaguc_once)(int, char **, char **, bool), int argc, char **argv, char **envp) { + /* + Start adaguc in fork mode. This means: + - Set up a signal handler for any child processes + - Start a thread in the background that cleans up left-over child processes + - Set up a socket through system calls: `socket`, `bind`, `listen` + - While true, accept any incoming connections to the socket, through system call `accept` + - If connected, fork this process. + - Child process will handle further communication through the `client_socket`, will handle the adaguc request and exit normally. + - Parent process keeps running, track of the created `client_socket` and check for new incoming connections. + */ + + int client_socket = 0; + + // Create a signal handler for all children (all received SIGCHLD signals) + // Signal mask is empty, meaning no additional signals are blocked while the handler is executed + struct sigaction sa; + sa.sa_handler = child_signal_handler; + sigemptyset(&sa.sa_mask); + sa.sa_flags = SA_RESTART | SA_NOCLDSTOP; + sigaction(SIGCHLD, &sa, NULL); + + // Start cleaning thread in the background + pthread_t clean_child_procs_thread; + pthread_create(&clean_child_procs_thread, NULL, clean_child_procs, NULL); + + // Create an endpoint for communicating through a unix socket + int listen_socket = socket(AF_UNIX, SOCK_STREAM, 0); + if (-1 == listen_socket) { + printf("Error on socket() call \n"); + return 1; + } + + struct sockaddr_un local, remote; + int len = 0; + local.sun_family = AF_UNIX; + + CT::string socket_path(getenv("ADAGUC_PATH")); + socket_path.concat("/adaguc.socket"); + + strncpy(local.sun_path, socket_path.c_str(), sizeof(local.sun_path)); + local.sun_path[sizeof(local.sun_path) - 1] = '\0'; + + // Remove old adaguc.socket file + unlink(local.sun_path); + + // Bind name to the local socket, this will create an entry in the filesystem + len = strlen(local.sun_path) + sizeof(local.sun_family) + 1; + if (bind(listen_socket, (struct sockaddr *)&local, len) != 0) { + printf("Error on binding socket \n"); + return 1; + } + + // Start listening on the socket. Can have `max_pending_connections` number of connections queued. + int max_pending_connections = get_env_var_int("ADAGUC_NUMPARALLELPROCESSES", DEFAULT_QUEUED_CONNECTIONS); + if (listen(listen_socket, max_pending_connections) != 0) { + printf("Error on listen call \n"); + return 1; + } + + while (1) { + unsigned int sock_len = 0; + + // Once someone connects to the unix socket, immediately fork and execute the client request in `handle_client` + if ((client_socket = accept(listen_socket, (struct sockaddr *)&remote, &sock_len)) == -1) { + printf("Error on accept() call \n"); + return 1; + } + + pid_t pid = fork(); + if (pid == 0) { + // Child process handles request. Communication with python happens through `client_socket` + close(listen_socket); + handle_client(client_socket, run_adaguc_once, argc, argv, envp); + } else { + // Parent process keeps track of new socket and returns to listen for new connections + child_proc_t child_proc = {client_socket, time(NULL)}; + child_procs[pid] = child_proc; + } + } + + return 0; +} \ No newline at end of file diff --git a/adagucserverEC/fork_server.h b/adagucserverEC/fork_server.h new file mode 100644 index 00000000..dfa24334 --- /dev/null +++ b/adagucserverEC/fork_server.h @@ -0,0 +1,19 @@ +#ifndef ADAGUC_SERVER_FORK_SERVER_H +#define ADAGUC_SERVER_FORK_SERVER_H + +#include +#include + +typedef struct { + int socket; + time_t forked_at; +} child_proc_t; + +static std::map child_procs; + +void handle_client(int client_socket, int (*run_adaguc_once)(int, char **, char **, bool), int argc, char **argv, char **envp); +void child_signal_handler(int child_signal); +void *clean_child_procs(void *arg); +int run_as_fork_service(int (*run_adaguc_once)(int, char **, char **, bool), int argc, char **argv, char **envp); + +#endif // ADAGUC_SERVER_FORK_SERVER_H \ No newline at end of file diff --git a/doc/Environment_Variables.md b/doc/Environment_Variables.md new file mode 100644 index 00000000..1153cecf --- /dev/null +++ b/doc/Environment_Variables.md @@ -0,0 +1,36 @@ +Environment variables +===================== + +The following environment variables allow you to change behaviour of the Adaguc server. + +Some of these environment variables might be set via the `docker-compose.yml` file, see [Running adaguc-server with docker](/doc/Running.md) + +| Environment variable | Description | +| -------------------- | ----------- | +| `ADAGUC_CONFIG` | pointer to the configuration file +| `ADAGUC_LOGFILE` | pointer where log messages should be stored, includes information logs and error logs +| `ADAGUC_ERRORFILE` | optional pointer which logs only error messages +| `ADAGUC_FONT` | Place where a TrueType font is stored, e.g. FreeSans.ttf +| `ADAGUC_DATARESTRICTION` | Optional pointer which controls access restrictions, by default set to FALSE, can be combinations of `ALLOW_WCS\|ALLOW_GFI\|ALLOW_METADATA\|SHOW_QUERYINFO`, separated with the \| token.

`FALSE`: No restrictions (default, same as `ALLOW_WCS\|ALLOW_GFI\|ALLOW_METADATA`)
`ALLOW_WCS`: Allows the Web Coverage Service, download of data
`ALLOW_GFI`: Allows GetFeatureInfo requests, e.g. getting information about a certain location
`ALLOW_METADATA`: Allows getting NetCDF header metadata information
`SHOW_QUERYINFO`: When a query has failed, the corresponding query will be presented to the user. This feature is disabled by default. +| `ADAGUC_PATH` | optional, is used as variable substitution {ADAGUC_PATH} in the configuration files, should point to the adagucserver installation +| `ADAGUC_TMP` | optional, is used as variable substitution {ADAGUC_TMP} in the configuration files, location where tempfiles need to be written +| `ADAGUC_ONLINERESOURCE` | optional, specify the online resource in the CGI script itself, see [OnlineResource](/doc/configuration/OnlineResource.md) to configure in the xml file. +| `PGBOUNCER_ENABLE` | Enable or disable the usage of PostgreSQL connection pooling by PGBouncer. Default: `true`. +| `PGBOUNCER_DISABLE_SSL` | If PGBouncer is used, disable the usage of SSL. Default: `true`. +| `ADAGUC_DB` | The connection string used by PostgreSQL. Default: `host=adaguc-db port=5432 user=adaguc password=adaguc dbname=adaguc`. +| `ADAGUC_ENABLELOGBUFFER` | Enable or disable log buffering. If disabled, no debug logging is shown at all. Default: `true`. +| `ADAGUC_AUTOWMS_DIR` | Default: `/data/adaguc-autowms` +| `ADAGUC_DATA_DIR` | Default: `/data/adaguc-data` +| `ADAGUC_DATASET_DIR` | Default: `/data/adaguc-datasets` +| `ADAGUC_FORK_SOCKET_PATH` | Enables or disables the usage of an Adaguc fork server. If set to an adaguc-writable path, e.g. `adaguc.socket`, Adaguc will launch a server in the background and fork itself when handling requests. Communication will take place through this socket. If left empty or removed, Adaguc will launch a subprocess which comes with overhead. +| `ADAGUC_MAX_PROC_TIMEOUT` | Every request made to Adaguc will timeout after this many seconds. Default: `300` seconds. +| `ADAGUC_PORT` | Port to listen to for the webserver, used by `docker-compose.yml`. Default: port `443`. +| `EXTERNALADDRESS` | Adaguc-viewer and adaguc-explorer will be reachable on this hostname. +| `ADAGUCENV_RETENTIONPERIOD` | ISO 8601 time period string indicating how long files should be retained. +| `ADAGUCENV_ENABLECLEANUP` | Enable or disable cleaning. + +### TODO: +- How/where you can apply these environment variables (.env, docker-compose.yaml, adaguc xml config)? +- What are the defaults? +- Which vars are optional or required? +- What is the format (string, caps only, path, etc)? \ No newline at end of file diff --git a/doc/Running.md b/doc/Running.md index de612e31..dab8fa97 100644 --- a/doc/Running.md +++ b/doc/Running.md @@ -41,6 +41,8 @@ bash docker-compose-generate-env.sh \ -f $ADAGUCDOCKERHOME/adaguc-data \ -p 443 # You can view or edit the file ./.env file + +For more info on what environment variables you can configure, see [Environment variables](/doc/Environment_Variables.md) ``` ### Step 3. Once the steps above have been done, it is time to start: diff --git a/hclasses/CDebugger.cpp b/hclasses/CDebugger.cpp index ab369a7c..b1f55a8d 100644 --- a/hclasses/CDebugger.cpp +++ b/hclasses/CDebugger.cpp @@ -43,13 +43,15 @@ void (*_printErrorStreamPointer)(const char *) = &_printErrorStream; void (*_printDebugStreamPointer)(const char *) = &_printDebugStream; void (*_printWarningStreamPointer)(const char *) = &_printWarningStream; +// TODO: logProcessIdentifier gets executed only once by the parent. Child logging uses the pid from parent void printDebugStream(const char *message) { _printDebugStreamPointer(message); } void printWarningStream(const char *message) { _printWarningStreamPointer(message); } void printErrorStream(const char *message) { _printErrorStreamPointer(message); } void _printErrorStream(const char *pszMessage) { fprintf(stderr, "%s", pszMessage); } void _printWarningStream(const char *pszMessage) { fprintf(stderr, "%s", pszMessage); } -void _printDebugStream(const char *pszMessage) { printf("%s", pszMessage); } +// void _printDebugStream(const char *pszMessage) { printf("%s", pszMessage); } +void _printDebugStream(const char *pszMessage) { fprintf(stderr, "%s", pszMessage); } void _printDebugLine(const char *pszMessage, ...) { logMessageNumber++; diff --git a/python/lib/adaguc/CGIRunner.py b/python/lib/adaguc/CGIRunner.py index f24b96ea..e0c8872d 100644 --- a/python/lib/adaguc/CGIRunner.py +++ b/python/lib/adaguc/CGIRunner.py @@ -5,15 +5,10 @@ import asyncio import sys -from subprocess import PIPE, Popen, STDOUT, TimeoutExpired -from threading import Thread +from subprocess import PIPE import os -import io -import errno -import time -import chardet -from queue import Queue, Empty # python 3.x import re +from typing import NamedTuple HTTP_STATUSCODE_404_NOT_FOUND = 32 # Must be the same as in Definitions.h HTTP_STATUSCODE_422_UNPROCESSABLE_ENTITY = 33 # Must be the same as in Definitions.h @@ -22,13 +17,108 @@ ADAGUC_NUMPARALLELPROCESSES = os.getenv("ADAGUC_NUMPARALLELPROCESSES", "4") sem = asyncio.Semaphore(int(ADAGUC_NUMPARALLELPROCESSES)) +ADAGUC_FORK_SOCKET_PATH = f"{os.getenv('ADAGUC_PATH')}/adaguc.socket" +ON_POSIX = "posix" in sys.builtin_module_names + +MAX_PROC_TIMEOUT = int(os.getenv("ADAGUC_MAX_PROC_TIMEOUT", "300")) + + +class AdagucResponse(NamedTuple): + status_code: int + process_output: bytearray + + +async def wait_socket_communicate(url, timeout) -> AdagucResponse: + """ + If `socket_communicate` takes longer than `timeout`, we send a 500 timeout to the client. + The adagucserver process will get cleaned up by the adagucserver parent process. + """ + + try: + resp = await asyncio.wait_for(socket_communicate(url), timeout=timeout) + except asyncio.exceptions.TimeoutError: + return AdagucResponse( + status_code=HTTP_STATUSCODE_500_TIMEOUT, process_output=None + ) + except ConnectionRefusedError: + # TODO: If for whatever reason socket communication to the fork server fails, should we fall back to regular process spawning? + raise + + return resp + + +async def socket_communicate(url: str) -> AdagucResponse: + """ + Connect to unix socket, send query string over socket, receive bytes from adagucserver. + + Last 4 bytes are status code. + """ + + process_output = bytearray() + reader, writer = await asyncio.open_unix_connection(ADAGUC_FORK_SOCKET_PATH) + writer.write(url.encode()) + await writer.drain() + + process_output = await reader.read() + + writer.close() + await writer.wait_closed() + + # Status code is stored in the last 4 bytes from the received data + status_code = int.from_bytes(process_output[-4:], sys.byteorder) + process_output = process_output[:-4] + return AdagucResponse(status_code=status_code, process_output=process_output) + + +async def wait_process_communicate(cmds, localenv, timeout) -> AdagucResponse: + """ + If `process_communicate` takes longer than `timeout`, we send a 500 timeout to the client. + We also have to manually kill the adagucserver process that we spawned before. + """ + + try: + process = None + resp = await asyncio.wait_for( + process_communicate(process, cmds, localenv), timeout=timeout + ) + except asyncio.exceptions.TimeoutError: + if process: + process.kill() + await process.communicate() + + return AdagucResponse( + status_code=HTTP_STATUSCODE_500_TIMEOUT, process_output=None + ) + return resp + + +async def process_communicate(process, cmds, localenv) -> AdagucResponse: + """ + Spawn a new adagucserver process, wait for output. + """ + + process = await asyncio.create_subprocess_exec( + *cmds, + stdout=PIPE, + stderr=PIPE, + env=localenv, + close_fds=ON_POSIX, + ) + + process_output, _ = await process.communicate() + status = await process.wait() + + return AdagucResponse(status_code=status, process_output=process_output) + class CGIRunner: """ Run the CGI script with specified URL and environment. Stdout is captured and put in a BytesIO object provided in output """ - async def run(self, cmds, url, output, env=[], path=None, isCGI=True, timeout=300): + async def run( + self, cmds, url, output, env=[], path=None, isCGI=True, timeout=MAX_PROC_TIMEOUT + ): localenv = {} if url != None: localenv["QUERY_STRING"] = url @@ -40,26 +130,26 @@ async def run(self, cmds, url, output, env=[], path=None, isCGI=True, timeout=30 localenv["REQUEST_URI"] = "/myscriptname/" + path localenv.update(env) - # Execute adaguc-server binary - ON_POSIX = "posix" in sys.builtin_module_names + # print("@@@@", url) + + # Only use fork server if ADAGUC_FORK_SOCKET_PATH is set and adaguc is not executed with extra arguments e.g. `--updatelayermetadata` + use_fork = os.getenv("ADAGUC_FORK_SOCKET_PATH", None) and len(cmds) == 1 + async with sem: - process = await asyncio.create_subprocess_exec( - *cmds, - stdout=PIPE, - stderr=PIPE, - env=localenv, - close_fds=ON_POSIX, - ) - try: - (process_output, process_error) = await asyncio.wait_for( - process.communicate(), timeout=timeout + if use_fork: + response = await wait_socket_communicate(url, timeout=timeout) + else: + response = await wait_process_communicate( + cmds, localenv, timeout=timeout ) - except asyncio.exceptions.TimeoutError: - process.kill() - await process.communicate() + + if response.status_code == HTTP_STATUSCODE_500_TIMEOUT: output.write(b"Adaguc server processs timed out") return HTTP_STATUSCODE_500_TIMEOUT, [], None - status = await process.wait() + + process_error = "".encode() + process_output = response.process_output + status = response.status_code # Split headers from body using a regex headersEndAt = -2 @@ -78,6 +168,7 @@ async def run(self, cmds, url, output, env=[], path=None, isCGI=True, timeout=30 return 1, [], None body = process_output[headersEndAt + 2 :] + output.write(body) headersList = headers.split("\r\n") return status, [s for s in headersList if s != "\n" and ":" in s], process_error