From 40bc31468898b55b9aefda12df88f9b4687917ba Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Mon, 23 Oct 2023 15:20:00 +0200 Subject: [PATCH 01/52] service exposes inactivity --- .github/workflows/check-image.yml | 6 +-- .osparc/jupyter-math/runtime.yml | 5 ++ Dockerfile | 7 ++- Makefile | 4 +- docker/entrypoint.bash | 1 + docker/inactivity.py | 9 ++++ docker/kernel_cehcker.py | 90 +++++++++++++++++++++++++++++++ 7 files changed, 115 insertions(+), 7 deletions(-) create mode 100644 docker/inactivity.py create mode 100644 docker/kernel_cehcker.py diff --git a/.github/workflows/check-image.yml b/.github/workflows/check-image.yml index d64dc01..b460c9e 100644 --- a/.github/workflows/check-image.yml +++ b/.github/workflows/check-image.yml @@ -9,14 +9,14 @@ jobs: - name: Checkout repo content uses: actions/checkout@v2 - name: ooil version - uses: docker://itisfoundation/ci-service-integration-library:v1.0.3-dev-4 + uses: docker://itisfoundation/ci-service-integration-library:v1.0.3-dev-8 with: args: ooil --version - name: Assemble docker-compose spec - uses: docker://itisfoundation/ci-service-integration-library:v1.0.3-dev-4 + uses: docker://itisfoundation/ci-service-integration-library:v1.0.3-dev-8 with: args: ooil compose - name: Build all images if multiple - uses: docker://itisfoundation/ci-service-integration-library:v1.0.3-dev-4 + uses: docker://itisfoundation/ci-service-integration-library:v1.0.3-dev-8 with: args: docker-compose build diff --git a/.osparc/jupyter-math/runtime.yml b/.osparc/jupyter-math/runtime.yml index 6f83724..32e020a 100644 --- a/.osparc/jupyter-math/runtime.yml +++ b/.osparc/jupyter-math/runtime.yml @@ -18,3 +18,8 @@ paths-mapping: outputs_path: /home/jovyan/work/outputs state_paths: - /home/jovyan/work/workspace +callbacks-mapping: + inactivity: + service: container + command: "/path/to/your/inactivity/hook" + timeout: 1 \ No newline at end of file diff --git a/Dockerfile b/Dockerfile index 40bfcc9..82bf24e 100644 --- a/Dockerfile +++ b/Dockerfile @@ -74,8 +74,8 @@ RUN jupyter serverextension enable voila && \ # Import matplotlib the first time to build the font cache. ENV XDG_CACHE_HOME /home/$NB_USER/.cache/ RUN MPLBACKEND=Agg .venv/bin/python -c "import matplotlib.pyplot" && \ -fix-permissions /home/$NB_USER - # run fix permissions only once. This can be probably optimized, so it is faster to build + fix-permissions /home/$NB_USER +# run fix permissions only once. This can be probably optimized, so it is faster to build # copy README and CHANGELOG COPY --chown=$NB_UID:$NB_GID CHANGELOG.md ${NOTEBOOK_BASE_DIR}/CHANGELOG.md @@ -91,6 +91,9 @@ ENV JP_LSP_VIRTUAL_DIR="/home/${NB_USER}/.virtual_documents" # Copying boot scripts COPY --chown=$NB_UID:$NB_GID docker /docker +RUN chmod +x /docker/inactivity.py \ + && chmod +x /docker/kernel_cehcker.py + RUN echo 'export PATH="/home/${NB_USER}/.venv/bin:$PATH"' >> "/home/${NB_USER}/.bashrc" EXPOSE 8888 diff --git a/Makefile b/Makefile index cf4b223..3e5d796 100644 --- a/Makefile +++ b/Makefile @@ -35,7 +35,7 @@ define _bumpversion # upgrades as $(subst $(1),,$@) version, commits and tags @docker run -it --rm -v $(PWD):/${DOCKER_IMAGE_NAME} \ -u $(shell id -u):$(shell id -g) \ - itisfoundation/ci-service-integration-library:v1.0.3-dev-4 \ + itisfoundation/ci-service-integration-library:v1.0.3-dev-8 \ sh -c "cd /${DOCKER_IMAGE_NAME} && bump2version --verbose --list --config-file $(1) $(subst $(2),,$@)" endef @@ -49,7 +49,7 @@ version-patch version-minor version-major: .bumpversion.cfg ## increases service compose-spec: ## runs ooil to assemble the docker-compose.yml file @docker run -it --rm -v $(PWD):/${DOCKER_IMAGE_NAME} \ -u $(shell id -u):$(shell id -g) \ - itisfoundation/ci-service-integration-library:v1.0.3-dev-4 \ + itisfoundation/ci-service-integration-library:v1.0.3-dev-8 \ sh -c "cd /${DOCKER_IMAGE_NAME} && ooil compose" build: | compose-spec ## build docker image diff --git a/docker/entrypoint.bash b/docker/entrypoint.bash index 790a3d7..c36537a 100755 --- a/docker/entrypoint.bash +++ b/docker/entrypoint.bash @@ -74,4 +74,5 @@ chmod gu-w "/home/${NB_USER}/work" echo echo "$INFO" "Starting notebook ..." +exec gosu "$NB_USER" /docker/kernel_cehcker.py & exec gosu "$NB_USER" /docker/boot_notebook.bash diff --git a/docker/inactivity.py b/docker/inactivity.py new file mode 100644 index 0000000..c144d73 --- /dev/null +++ b/docker/inactivity.py @@ -0,0 +1,9 @@ + +#!/home/jovyan/.venv/bin/python + +# prints the result of the inactivity command + +import requests + +r = requests.get("http://localhost:9000") +print(r.text) \ No newline at end of file diff --git a/docker/kernel_cehcker.py b/docker/kernel_cehcker.py new file mode 100644 index 0000000..198cb66 --- /dev/null +++ b/docker/kernel_cehcker.py @@ -0,0 +1,90 @@ +#!/home/jovyan/.venv/bin/python + + +import asyncio +import json +import requests +from datetime import datetime +import tornado +from contextlib import suppress +from typing import Final + + +KERNEL_BUSY_CHECK_INTERVAL_S: Final[float] = 5 + + +class JupyterKernelChecker: + BASE_URL = "http://localhost:8888" + HEADERS = {"accept": "application/json"} + + def __init__(self) -> None: + self.last_busy: datetime| None = None + + def _get(self, path: str) -> dict: + r = requests.get(f'{self.BASE_URL}{path}', headers=self.HEADERS) + return r.json() + + def _are_kernels_busy(self)-> bool: + json_response = self._get("/api/kernels") + + are_kernels_busy = False + + for kernel_data in json_response: + kernel_id = kernel_data["id"] + + kernel_info = self._get(f"/api/kernels/{kernel_id}") + if kernel_info["execution_state"] != "idle": + are_kernels_busy = True + + return are_kernels_busy + + def check(self): + are_kernels_busy = self._are_kernels_busy() + print(f"{are_kernels_busy=}") + + if not are_kernels_busy: + self.last_busy = None + + if are_kernels_busy and self.last_busy is None: + self.last_busy = datetime.utcnow() + + + def get_idle_seconds(self)-> float: + if self.last_busy is None: + return 0 + + return (datetime.utcnow() - self.last_busy).total_seconds() + + async def run(self): + while True: + with suppress(Exception): + self.check() + await asyncio.sleep(KERNEL_BUSY_CHECK_INTERVAL_S) + + + +kernel_checker = JupyterKernelChecker() + + +class MainHandler(tornado.web.RequestHandler): + def get(self): + idle_seconds = kernel_checker.get_idle_seconds() + response = ( + {"is_inactive": True, "seconds_inactive" : idle_seconds} + if idle_seconds > 0 else + {"is_inactive": False, "seconds_inactive" : None} + ) + self.write(json.dumps(response)) + + +def make_app()-> tornado.web.Application: + return tornado.web.Application([(r"/", MainHandler)]) + +async def main(): + app = make_app() + app.listen(9000) + asyncio.create_task(kernel_checker.run()) + await asyncio.Event().wait() + +if __name__ == "__main__": + asyncio.run(main()) From 5c52d5e02537ef93ac309c35c6413217c540a804 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Mon, 23 Oct 2023 15:32:56 +0200 Subject: [PATCH 02/52] inverting logic --- docker/kernel_cehcker.py | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/docker/kernel_cehcker.py b/docker/kernel_cehcker.py index 198cb66..1f324c2 100644 --- a/docker/kernel_cehcker.py +++ b/docker/kernel_cehcker.py @@ -18,7 +18,7 @@ class JupyterKernelChecker: HEADERS = {"accept": "application/json"} def __init__(self) -> None: - self.last_busy: datetime| None = None + self.last_idle: datetime| None = None def _get(self, path: str) -> dict: r = requests.get(f'{self.BASE_URL}{path}', headers=self.HEADERS) @@ -40,20 +40,19 @@ def _are_kernels_busy(self)-> bool: def check(self): are_kernels_busy = self._are_kernels_busy() - print(f"{are_kernels_busy=}") - if not are_kernels_busy: - self.last_busy = None + if are_kernels_busy: + self.last_idle = None - if are_kernels_busy and self.last_busy is None: - self.last_busy = datetime.utcnow() + if not are_kernels_busy and self.last_idle is None: + self.last_idle = datetime.utcnow() def get_idle_seconds(self)-> float: - if self.last_busy is None: + if self.last_idle is None: return 0 - return (datetime.utcnow() - self.last_busy).total_seconds() + return (datetime.utcnow() - self.last_idle).total_seconds() async def run(self): while True: From 874d99f0c8b8595fb3dbdf87cddd26f9ef8af7c5 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Mon, 23 Oct 2023 15:35:53 +0200 Subject: [PATCH 03/52] renaming --- .osparc/jupyter-math/runtime.yml | 2 +- Dockerfile | 2 +- docker/entrypoint.bash | 2 +- docker/inactivity.py | 1 - docker/{kernel_cehcker.py => kernel_checker.py} | 0 5 files changed, 3 insertions(+), 4 deletions(-) rename docker/{kernel_cehcker.py => kernel_checker.py} (100%) diff --git a/.osparc/jupyter-math/runtime.yml b/.osparc/jupyter-math/runtime.yml index 32e020a..a12ec6e 100644 --- a/.osparc/jupyter-math/runtime.yml +++ b/.osparc/jupyter-math/runtime.yml @@ -21,5 +21,5 @@ paths-mapping: callbacks-mapping: inactivity: service: container - command: "/path/to/your/inactivity/hook" + command: ["python", "/docker/inactivity.py"] timeout: 1 \ No newline at end of file diff --git a/Dockerfile b/Dockerfile index 82bf24e..9b0c8f6 100644 --- a/Dockerfile +++ b/Dockerfile @@ -92,7 +92,7 @@ ENV JP_LSP_VIRTUAL_DIR="/home/${NB_USER}/.virtual_documents" COPY --chown=$NB_UID:$NB_GID docker /docker RUN chmod +x /docker/inactivity.py \ - && chmod +x /docker/kernel_cehcker.py + && chmod +x /docker/kernel_checker.py RUN echo 'export PATH="/home/${NB_USER}/.venv/bin:$PATH"' >> "/home/${NB_USER}/.bashrc" diff --git a/docker/entrypoint.bash b/docker/entrypoint.bash index c36537a..84b5150 100755 --- a/docker/entrypoint.bash +++ b/docker/entrypoint.bash @@ -74,5 +74,5 @@ chmod gu-w "/home/${NB_USER}/work" echo echo "$INFO" "Starting notebook ..." -exec gosu "$NB_USER" /docker/kernel_cehcker.py & +exec gosu "$NB_USER" /docker/kernel_checker.py & exec gosu "$NB_USER" /docker/boot_notebook.bash diff --git a/docker/inactivity.py b/docker/inactivity.py index c144d73..1145924 100644 --- a/docker/inactivity.py +++ b/docker/inactivity.py @@ -1,4 +1,3 @@ - #!/home/jovyan/.venv/bin/python # prints the result of the inactivity command diff --git a/docker/kernel_cehcker.py b/docker/kernel_checker.py similarity index 100% rename from docker/kernel_cehcker.py rename to docker/kernel_checker.py From d311da0ec575164d8badbbd7afbdea3fc6821658 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Mon, 23 Oct 2023 15:43:34 +0200 Subject: [PATCH 04/52] refactor --- .osparc/jupyter-math/runtime.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.osparc/jupyter-math/runtime.yml b/.osparc/jupyter-math/runtime.yml index a12ec6e..ee7d2e0 100644 --- a/.osparc/jupyter-math/runtime.yml +++ b/.osparc/jupyter-math/runtime.yml @@ -21,5 +21,5 @@ paths-mapping: callbacks-mapping: inactivity: service: container - command: ["python", "/docker/inactivity.py"] + command: "/docker/inactivity.py" timeout: 1 \ No newline at end of file From be8989e95bd8b37351dec9aac0938ba4b1fcf4f8 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Mon, 23 Oct 2023 16:09:19 +0200 Subject: [PATCH 05/52] replacing docker compose strings --- .github/workflows/check-image.yml | 4 ++-- Makefile | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/check-image.yml b/.github/workflows/check-image.yml index b460c9e..9419f33 100644 --- a/.github/workflows/check-image.yml +++ b/.github/workflows/check-image.yml @@ -12,11 +12,11 @@ jobs: uses: docker://itisfoundation/ci-service-integration-library:v1.0.3-dev-8 with: args: ooil --version - - name: Assemble docker-compose spec + - name: Assemble docker compose spec uses: docker://itisfoundation/ci-service-integration-library:v1.0.3-dev-8 with: args: ooil compose - name: Build all images if multiple uses: docker://itisfoundation/ci-service-integration-library:v1.0.3-dev-8 with: - args: docker-compose build + args: docker compose build diff --git a/Makefile b/Makefile index 3e5d796..3c154ea 100644 --- a/Makefile +++ b/Makefile @@ -53,12 +53,12 @@ compose-spec: ## runs ooil to assemble the docker-compose.yml file sh -c "cd /${DOCKER_IMAGE_NAME} && ooil compose" build: | compose-spec ## build docker image - docker-compose build + docker compose build # To test built service locally ------------------------------------------------------------------------- .PHONY: run-local run-local: ## runs image with local configuration - docker-compose --file docker-compose-local.yml up + docker compose --file docker-compose-local.yml up .PHONY: publish-local publish-local: ## push to local throw away registry to test integration From 5bf7f712f1848e98ff874e49528b6b048a53ca58 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Tue, 24 Oct 2023 15:14:06 +0200 Subject: [PATCH 06/52] using new API interface --- docker/kernel_checker.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/docker/kernel_checker.py b/docker/kernel_checker.py index 1f324c2..0df4a9e 100644 --- a/docker/kernel_checker.py +++ b/docker/kernel_checker.py @@ -69,9 +69,7 @@ class MainHandler(tornado.web.RequestHandler): def get(self): idle_seconds = kernel_checker.get_idle_seconds() response = ( - {"is_inactive": True, "seconds_inactive" : idle_seconds} - if idle_seconds > 0 else - {"is_inactive": False, "seconds_inactive" : None} + {"seconds_inactive" : idle_seconds if idle_seconds > 0 else None} ) self.write(json.dumps(response)) From aaf69b9094a577bdeb7f7b3a4a54814454db07f9 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Tue, 24 Oct 2023 15:18:00 +0200 Subject: [PATCH 07/52] using different random port --- docker/inactivity.py | 2 +- docker/kernel_checker.py | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/docker/inactivity.py b/docker/inactivity.py index 1145924..37ea005 100644 --- a/docker/inactivity.py +++ b/docker/inactivity.py @@ -4,5 +4,5 @@ import requests -r = requests.get("http://localhost:9000") +r = requests.get("http://localhost:19597") print(r.text) \ No newline at end of file diff --git a/docker/kernel_checker.py b/docker/kernel_checker.py index 0df4a9e..0b2cec9 100644 --- a/docker/kernel_checker.py +++ b/docker/kernel_checker.py @@ -12,7 +12,6 @@ KERNEL_BUSY_CHECK_INTERVAL_S: Final[float] = 5 - class JupyterKernelChecker: BASE_URL = "http://localhost:8888" HEADERS = {"accept": "application/json"} @@ -79,7 +78,7 @@ def make_app()-> tornado.web.Application: async def main(): app = make_app() - app.listen(9000) + app.listen(19597) asyncio.create_task(kernel_checker.run()) await asyncio.Event().wait() From 712afcd18e5d4e27abcf3db0f4255a68f8385698 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Thu, 7 Mar 2024 14:12:29 +0100 Subject: [PATCH 08/52] refactor --- docker/kernel_checker.py | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/docker/kernel_checker.py b/docker/kernel_checker.py index 0b2cec9..52d2ab2 100644 --- a/docker/kernel_checker.py +++ b/docker/kernel_checker.py @@ -12,18 +12,19 @@ KERNEL_BUSY_CHECK_INTERVAL_S: Final[float] = 5 + class JupyterKernelChecker: BASE_URL = "http://localhost:8888" HEADERS = {"accept": "application/json"} def __init__(self) -> None: - self.last_idle: datetime| None = None - + self.last_idle: datetime | None = None + def _get(self, path: str) -> dict: - r = requests.get(f'{self.BASE_URL}{path}', headers=self.HEADERS) + r = requests.get(f"{self.BASE_URL}{path}", headers=self.HEADERS) return r.json() - def _are_kernels_busy(self)-> bool: + def _are_kernels_busy(self) -> bool: json_response = self._get("/api/kernels") are_kernels_busy = False @@ -36,29 +37,27 @@ def _are_kernels_busy(self)-> bool: are_kernels_busy = True return are_kernels_busy - + def check(self): are_kernels_busy = self._are_kernels_busy() - + if are_kernels_busy: self.last_idle = None if not are_kernels_busy and self.last_idle is None: self.last_idle = datetime.utcnow() - - def get_idle_seconds(self)-> float: + def get_idle_seconds(self) -> float: if self.last_idle is None: return 0 return (datetime.utcnow() - self.last_idle).total_seconds() - + async def run(self): while True: with suppress(Exception): self.check() await asyncio.sleep(KERNEL_BUSY_CHECK_INTERVAL_S) - kernel_checker = JupyterKernelChecker() @@ -67,20 +66,20 @@ async def run(self): class MainHandler(tornado.web.RequestHandler): def get(self): idle_seconds = kernel_checker.get_idle_seconds() - response = ( - {"seconds_inactive" : idle_seconds if idle_seconds > 0 else None} - ) + response = {"seconds_inactive": idle_seconds if idle_seconds > 0 else 0} self.write(json.dumps(response)) -def make_app()-> tornado.web.Application: +def make_app() -> tornado.web.Application: return tornado.web.Application([(r"/", MainHandler)]) + async def main(): app = make_app() app.listen(19597) asyncio.create_task(kernel_checker.run()) await asyncio.Event().wait() + if __name__ == "__main__": asyncio.run(main()) From 0c43510a72fd45cca9a9b7c9b45f483f02062df0 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Thu, 7 Mar 2024 14:25:06 +0100 Subject: [PATCH 09/52] rename --- .osparc/jupyter-math/runtime.yml | 2 +- Dockerfile | 2 +- docker/{inactivity.py => activity.py} | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) rename docker/{inactivity.py => activity.py} (69%) diff --git a/.osparc/jupyter-math/runtime.yml b/.osparc/jupyter-math/runtime.yml index ee7d2e0..9498aaf 100644 --- a/.osparc/jupyter-math/runtime.yml +++ b/.osparc/jupyter-math/runtime.yml @@ -21,5 +21,5 @@ paths-mapping: callbacks-mapping: inactivity: service: container - command: "/docker/inactivity.py" + command: "/docker/activity.py" timeout: 1 \ No newline at end of file diff --git a/Dockerfile b/Dockerfile index 9b0c8f6..fc99631 100644 --- a/Dockerfile +++ b/Dockerfile @@ -91,7 +91,7 @@ ENV JP_LSP_VIRTUAL_DIR="/home/${NB_USER}/.virtual_documents" # Copying boot scripts COPY --chown=$NB_UID:$NB_GID docker /docker -RUN chmod +x /docker/inactivity.py \ +RUN chmod +x /docker/activity.py \ && chmod +x /docker/kernel_checker.py RUN echo 'export PATH="/home/${NB_USER}/.venv/bin:$PATH"' >> "/home/${NB_USER}/.bashrc" diff --git a/docker/inactivity.py b/docker/activity.py similarity index 69% rename from docker/inactivity.py rename to docker/activity.py index 37ea005..51173a6 100644 --- a/docker/inactivity.py +++ b/docker/activity.py @@ -1,6 +1,6 @@ #!/home/jovyan/.venv/bin/python -# prints the result of the inactivity command +# prints the result of the activity command import requests From 39f7156981f623dae0f3c6c13296c3c51f1c2f33 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Mon, 18 Mar 2024 15:22:07 +0100 Subject: [PATCH 10/52] update kernel checker script --- docker/kernel_checker.py | 92 +++++++++++++++++++++++++++++++++------- 1 file changed, 76 insertions(+), 16 deletions(-) diff --git a/docker/kernel_checker.py b/docker/kernel_checker.py index 52d2ab2..fd7874d 100644 --- a/docker/kernel_checker.py +++ b/docker/kernel_checker.py @@ -1,30 +1,39 @@ #!/home/jovyan/.venv/bin/python +# How does this work? +# 1. controls that the service is not busy at regular intervals +# 2a. cheks if kernels are busy +# 2b. checks total CPU usage of all children processes is >= THRESHOLD_CPU_USAGE +# 3. if either of the above checks if True the service will result as busy + + import asyncio import json +import psutil import requests -from datetime import datetime import tornado + +from concurrent.futures import ThreadPoolExecutor, as_completed from contextlib import suppress +from datetime import datetime from typing import Final -KERNEL_BUSY_CHECK_INTERVAL_S: Final[float] = 5 +CHECK_INTERVAL_S: Final[float] = 5 +CPU_USAGE_MONITORING_INTERVAL_S: Final[float] = 1 +THRESHOLD_CPU_USAGE: Final[float] = 20 # percent in range [0, 100] -class JupyterKernelChecker: +class JupyterKernelMonitor: BASE_URL = "http://localhost:8888" HEADERS = {"accept": "application/json"} - def __init__(self) -> None: - self.last_idle: datetime | None = None - def _get(self, path: str) -> dict: r = requests.get(f"{self.BASE_URL}{path}", headers=self.HEADERS) return r.json() - def _are_kernels_busy(self) -> bool: + def are_kernels_busy(self) -> bool: json_response = self._get("/api/kernels") are_kernels_busy = False @@ -38,13 +47,63 @@ def _are_kernels_busy(self) -> bool: return are_kernels_busy + +class CPUUsageMonitor: + def __init__(self, threshold: float): + self.threshold = threshold + def _get_children_processes(self, pid) -> list[psutil.Process]: + try: + return psutil.Process(pid).children(recursive=True) + except psutil.NoSuchProcess: + return [] + + def _get_brother_processes(self) -> list[psutil.Process]: + # Returns the CPU usage of all processes except this one. + # ASSUMPTIONS: + # - `CURRENT_PROC` is a child of root process + # - `CURRENT_PROC` does not create any child processes + # + # It looks for its brothers (and their children) p1 to pN in order + # to compute real CPU usage. + # - CURRENT_PROC + # - p1 + # ... + # - pN + current_process = psutil.Process() + parent_pid = current_process.ppid() + children = self._get_children_processes(parent_pid) + return [c for c in children if c.pid != current_process.pid] + + def _get_total_cpu_usage(self) -> float: + with ThreadPoolExecutor(max_workers=10) as executor: + futures = [ + executor.submit(x.cpu_percent, CPU_USAGE_MONITORING_INTERVAL_S) + for x in self._get_brother_processes() + ] + return sum([future.result() for future in as_completed(futures)]) + + def are_children_busy(self) -> bool: + return self._get_total_cpu_usage() >= self.threshold + + +class ActivityManager: + def __init__(self, interval: float) -> None: + self.interval = interval + self.last_idle: datetime | None = None + + self.jupyter_kernel_monitor = JupyterKernelMonitor() + self.cpu_usage_monitor = CPUUsageMonitor(THRESHOLD_CPU_USAGE) + def check(self): - are_kernels_busy = self._are_kernels_busy() + is_busy = ( + self.jupyter_kernel_monitor.are_kernels_busy() + or self.cpu_usage_monitor.are_children_busy() + ) - if are_kernels_busy: + if is_busy: self.last_idle = None - if not are_kernels_busy and self.last_idle is None: + if not is_busy and self.last_idle is None: self.last_idle = datetime.utcnow() def get_idle_seconds(self) -> float: @@ -57,17 +116,18 @@ async def run(self): while True: with suppress(Exception): self.check() - await asyncio.sleep(KERNEL_BUSY_CHECK_INTERVAL_S) + await asyncio.sleep(self.interval) -kernel_checker = JupyterKernelChecker() +activity_manager = ActivityManager(CHECK_INTERVAL_S) class MainHandler(tornado.web.RequestHandler): def get(self): - idle_seconds = kernel_checker.get_idle_seconds() - response = {"seconds_inactive": idle_seconds if idle_seconds > 0 else 0} - self.write(json.dumps(response)) + idle_seconds = activity_manager.get_idle_seconds() + seconds_inactive = idle_seconds if idle_seconds > 0 else 0 + + self.write(json.dumps({"seconds_inactive": seconds_inactive})) def make_app() -> tornado.web.Application: @@ -77,7 +137,7 @@ def make_app() -> tornado.web.Application: async def main(): app = make_app() app.listen(19597) - asyncio.create_task(kernel_checker.run()) + asyncio.create_task(activity_manager.run()) await asyncio.Event().wait() From 40e15036582b38f89af1584dd6758c4c285bc381 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Mon, 18 Mar 2024 15:30:49 +0100 Subject: [PATCH 11/52] rename --- docker/{kernel_checker.py => activity_monitor.py} | 0 docker/entrypoint.bash | 2 +- 2 files changed, 1 insertion(+), 1 deletion(-) rename docker/{kernel_checker.py => activity_monitor.py} (100%) diff --git a/docker/kernel_checker.py b/docker/activity_monitor.py similarity index 100% rename from docker/kernel_checker.py rename to docker/activity_monitor.py diff --git a/docker/entrypoint.bash b/docker/entrypoint.bash index 84b5150..ac73777 100755 --- a/docker/entrypoint.bash +++ b/docker/entrypoint.bash @@ -74,5 +74,5 @@ chmod gu-w "/home/${NB_USER}/work" echo echo "$INFO" "Starting notebook ..." -exec gosu "$NB_USER" /docker/kernel_checker.py & +exec gosu "$NB_USER" /docker/activity_monitor.py & exec gosu "$NB_USER" /docker/boot_notebook.bash From 6a8d74e9d2060c8097c6d460c98dc38509fa9d0a Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Tue, 19 Mar 2024 11:03:41 +0100 Subject: [PATCH 12/52] refactor lowe usage percentage --- Dockerfile | 2 +- docker/activity_monitor.py | 39 +++++++++++++++++++++++++++++++++++--- 2 files changed, 37 insertions(+), 4 deletions(-) mode change 100644 => 100755 docker/activity_monitor.py diff --git a/Dockerfile b/Dockerfile index 40fcb66..4831188 100644 --- a/Dockerfile +++ b/Dockerfile @@ -93,7 +93,7 @@ ENV JP_LSP_VIRTUAL_DIR="/home/${NB_USER}/.virtual_documents" COPY --chown=$NB_UID:$NB_GID docker /docker RUN chmod +x /docker/activity.py \ - && chmod +x /docker/kernel_checker.py + && chmod +x /docker/activity_monitor.py RUN echo 'export PATH="/home/${NB_USER}/.venv/bin:$PATH"' >> "/home/${NB_USER}/.bashrc" diff --git a/docker/activity_monitor.py b/docker/activity_monitor.py old mode 100644 new mode 100755 index fd7874d..7919bfc --- a/docker/activity_monitor.py +++ b/docker/activity_monitor.py @@ -13,6 +13,7 @@ import psutil import requests import tornado +import subprocess from concurrent.futures import ThreadPoolExecutor, as_completed from contextlib import suppress @@ -22,7 +23,7 @@ CHECK_INTERVAL_S: Final[float] = 5 CPU_USAGE_MONITORING_INTERVAL_S: Final[float] = 1 -THRESHOLD_CPU_USAGE: Final[float] = 20 # percent in range [0, 100] +THRESHOLD_CPU_USAGE: Final[float] = 5 # percent in range [0, 100] class JupyterKernelMonitor: @@ -51,6 +52,7 @@ def are_kernels_busy(self) -> bool: class CPUUsageMonitor: def __init__(self, threshold: float): self.threshold = threshold + def _get_children_processes(self, pid) -> list[psutil.Process]: try: return psutil.Process(pid).children(recursive=True) @@ -62,7 +64,7 @@ def _get_brother_processes(self) -> list[psutil.Process]: # ASSUMPTIONS: # - `CURRENT_PROC` is a child of root process # - `CURRENT_PROC` does not create any child processes - # + # # It looks for its brothers (and their children) p1 to pN in order # to compute real CPU usage. # - CURRENT_PROC @@ -74,6 +76,15 @@ def _get_brother_processes(self) -> list[psutil.Process]: children = self._get_children_processes(parent_pid) return [c for c in children if c.pid != current_process.pid] + def _get_cpu_usage(self, pid: int) -> float: + cmd = f"ps -p {pid} -o %cpu --no-headers" + output = subprocess.check_output(cmd, shell=True, universal_newlines=True) + try: + return float(output) + except ValueError: + print(f"Could not parse {pid} cpu usage: {output}") + return float(0) + def _get_total_cpu_usage(self) -> float: with ThreadPoolExecutor(max_workers=10) as executor: futures = [ @@ -122,6 +133,23 @@ async def run(self): activity_manager = ActivityManager(CHECK_INTERVAL_S) +class DebugHandler(tornado.web.RequestHandler): + def get(self): + self.write( + json.dumps( + { + "cpu_usage": { + "current": activity_manager.cpu_usage_monitor._get_total_cpu_usage(), + "busy": activity_manager.cpu_usage_monitor.are_children_busy(), + }, + "kernal_monitor": { + "busy": activity_manager.jupyter_kernel_monitor.are_kernels_busy() + }, + } + ) + ) + + class MainHandler(tornado.web.RequestHandler): def get(self): idle_seconds = activity_manager.get_idle_seconds() @@ -131,7 +159,12 @@ def get(self): def make_app() -> tornado.web.Application: - return tornado.web.Application([(r"/", MainHandler)]) + return tornado.web.Application( + [ + (r"/", MainHandler), + (r"/debug", DebugHandler), + ] + ) async def main(): From e92fe5b04604537ebbb1be4e7abdbf3b4ee7dc94 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Tue, 19 Mar 2024 12:25:15 +0100 Subject: [PATCH 13/52] bump library version --- .github/workflows/check-image.yml | 6 +++--- Makefile | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/check-image.yml b/.github/workflows/check-image.yml index e17852a..168acd3 100644 --- a/.github/workflows/check-image.yml +++ b/.github/workflows/check-image.yml @@ -9,14 +9,14 @@ jobs: - name: Checkout repo content uses: actions/checkout@v2 - name: ooil version - uses: docker://itisfoundation/ci-service-integration-library:v1.0.3-dev-8 + uses: docker://itisfoundation/ci-service-integration-library:v1.0.3-dev-10 with: args: ooil --version - name: Assemble docker compose spec - uses: docker://itisfoundation/ci-service-integration-library:v1.0.3-dev-8 + uses: docker://itisfoundation/ci-service-integration-library:v1.0.3-dev-10 with: args: ooil compose - name: Build all images if multiple - uses: docker://itisfoundation/ci-service-integration-library:v1.0.3-dev-8 + uses: docker://itisfoundation/ci-service-integration-library:v1.0.3-dev-10 with: args: docker compose build diff --git a/Makefile b/Makefile index ffd3020..9a3fbf8 100644 --- a/Makefile +++ b/Makefile @@ -35,7 +35,7 @@ define _bumpversion # upgrades as $(subst $(1),,$@) version, commits and tags @docker run -it --rm -v $(PWD):/${DOCKER_IMAGE_NAME} \ -u $(shell id -u):$(shell id -g) \ - itisfoundation/ci-service-integration-library:v1.0.3-dev-8 \ + itisfoundation/ci-service-integration-library:v1.0.3-dev-10 \ sh -c "cd /${DOCKER_IMAGE_NAME} && bump2version --verbose --list --config-file $(1) $(subst $(2),,$@)" endef @@ -49,7 +49,7 @@ version-patch version-minor version-major: .bumpversion.cfg ## increases service compose-spec: ## runs ooil to assemble the docker-compose.yml file @docker run -it --rm -v $(PWD):/${DOCKER_IMAGE_NAME} \ -u $(shell id -u):$(shell id -g) \ - itisfoundation/ci-service-integration-library:v1.0.3-dev-8 \ + itisfoundation/ci-service-integration-library:v1.0.3-dev-10 \ sh -c "cd /${DOCKER_IMAGE_NAME} && ooil compose" build: | compose-spec ## build docker image From 3ea2298749cd430cef2e31270af122495953a697 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Tue, 19 Mar 2024 12:30:16 +0100 Subject: [PATCH 14/52] updating --- kernels/python-maths/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernels/python-maths/requirements.txt b/kernels/python-maths/requirements.txt index bfd1372..b1b1d2e 100644 --- a/kernels/python-maths/requirements.txt +++ b/kernels/python-maths/requirements.txt @@ -2,7 +2,7 @@ # This file is autogenerated by pip-compile with Python 3.10 # by the following command: # -# pip-compile --output-file=kernels/python-maths/requirements.txt --resolver=backtracking kernels/python-maths/requirements.in +# pip-compile --output-file=kernels/python-maths/requirements.txt kernels/python-maths/requirements.in # aiofiles==22.1.0 # via ypy-websocket From 1ef6b75b4af853906a3099b2644ed2c8e3b9f0f3 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Tue, 19 Mar 2024 16:17:38 +0100 Subject: [PATCH 15/52] wip --- Makefile | 9 ++ docker/activity_monitor.py | 181 ++++++++++++++++++++------------- requirements/test.in | 12 +++ requirements/test.txt | 41 ++++++++ tests/_import_utils.py | 13 +++ tests/conftest.py | 0 tests/test_activity_monitor.py | 159 +++++++++++++++++++++++++++++ 7 files changed, 346 insertions(+), 69 deletions(-) create mode 100644 requirements/test.in create mode 100644 requirements/test.txt create mode 100644 tests/_import_utils.py create mode 100644 tests/conftest.py create mode 100644 tests/test_activity_monitor.py diff --git a/Makefile b/Makefile index 9a3fbf8..655e9a2 100644 --- a/Makefile +++ b/Makefile @@ -29,6 +29,7 @@ devenv: .venv ## create a python virtual environment with tools to dev, run and requirements: devenv ## runs pip-tools to build requirements.txt that will be installed in the JupyterLab # freezes requirements pip-compile kernels/python-maths/requirements.in --resolver=backtracking --output-file kernels/python-maths/requirements.txt + pip-compile requirements/test.in --resolver=backtracking --output-file requirements/test.txt # Builds new service version ---------------------------------------------------------------------------- define _bumpversion @@ -66,6 +67,14 @@ publish-local: ## push to local throw away registry to test integration docker push registry:5000/simcore/services/dynamic/$(DOCKER_IMAGE_NAME):$(DOCKER_IMAGE_TAG) @curl registry:5000/v2/_catalog | jq +.PHONY: install-dev +install-dev: ## run tests in development mode + pip install -r requirements/test.txt + +.PHONY: tests-dev +tests-dev: ## run tests in development mode + .venv/bin/pytest --pdb -vvv tests + .PHONY: help help: ## this colorful help @echo "Recipes for '$(notdir $(CURDIR))':" diff --git a/docker/activity_monitor.py b/docker/activity_monitor.py index 7919bfc..95b8765 100755 --- a/docker/activity_monitor.py +++ b/docker/activity_monitor.py @@ -1,24 +1,19 @@ #!/home/jovyan/.venv/bin/python -# How does this work? -# 1. controls that the service is not busy at regular intervals -# 2a. cheks if kernels are busy -# 2b. checks total CPU usage of all children processes is >= THRESHOLD_CPU_USAGE -# 3. if either of the above checks if True the service will result as busy - - import asyncio import json import psutil import requests import tornado -import subprocess +import time +from threading import Thread from concurrent.futures import ThreadPoolExecutor, as_completed from contextlib import suppress from datetime import datetime from typing import Final +from abc import abstractmethod CHECK_INTERVAL_S: Final[float] = 5 @@ -26,15 +21,86 @@ THRESHOLD_CPU_USAGE: Final[float] = 5 # percent in range [0, 100] -class JupyterKernelMonitor: +# Utilities +class AbstractIsBusyMonitor: + def __init__(self, poll_interval: float) -> None: + self._poll_interval: float = poll_interval + self._keep_running: bool = True + self._thread: Thread | None = None + + self.is_busy: bool = True + + @abstractmethod + def _check_if_busy(self) -> bool: + """Must be user defined and returns if current + metric is to be considered busy + + Returns: + bool: True if considered busy + """ + + def _worker(self) -> None: + while self._keep_running: + self.is_busy = self._check_if_busy() + time.sleep(self._poll_interval) + + def start(self) -> None: + self._thread = Thread(target=self._worker, daemon=True) + self._thread.start() + + def stop(self) -> None: + self._keep_running = False + if self._thread: + self._thread.join() + + def __enter__(self): + self.start() + return self + + def __exit__(self, exc_type, exc_value, traceback): + self.stop() + + +def __get_children_processes(pid) -> list[psutil.Process]: + try: + return psutil.Process(pid).children(recursive=True) + except psutil.NoSuchProcess: + return [] + + +def _get_brother_processes() -> list[psutil.Process]: + # Returns the CPU usage of all processes except this one. + # ASSUMPTIONS: + # - `CURRENT_PROC` is a child of root process + # - `CURRENT_PROC` does not create any child processes + # + # It looks for its brothers (and their children) p1 to pN in order + # to compute real CPU usage. + # - CURRENT_PROC + # - p1 + # ... + # - pN + current_process = psutil.Process() + parent_pid = current_process.ppid() + children = __get_children_processes(parent_pid) + return [c for c in children if c.pid != current_process.pid] + + +# Monitors + + +class JupyterKernelMonitor(AbstractIsBusyMonitor): BASE_URL = "http://localhost:8888" HEADERS = {"accept": "application/json"} + def __init__(self, poll_interval: float) -> None: + super().__init__(poll_interval=poll_interval) + def _get(self, path: str) -> dict: r = requests.get(f"{self.BASE_URL}{path}", headers=self.HEADERS) return r.json() - def are_kernels_busy(self) -> bool: + def _are_kernels_busy(self) -> bool: json_response = self._get("/api/kernels") are_kernels_busy = False @@ -48,52 +114,24 @@ def are_kernels_busy(self) -> bool: return are_kernels_busy + def _check_if_busy(self) -> bool: + return self._are_kernels_busy() -class CPUUsageMonitor: - def __init__(self, threshold: float): - self.threshold = threshold - def _get_children_processes(self, pid) -> list[psutil.Process]: - try: - return psutil.Process(pid).children(recursive=True) - except psutil.NoSuchProcess: - return [] - - def _get_brother_processes(self) -> list[psutil.Process]: - # Returns the CPU usage of all processes except this one. - # ASSUMPTIONS: - # - `CURRENT_PROC` is a child of root process - # - `CURRENT_PROC` does not create any child processes - # - # It looks for its brothers (and their children) p1 to pN in order - # to compute real CPU usage. - # - CURRENT_PROC - # - p1 - # ... - # - pN - current_process = psutil.Process() - parent_pid = current_process.ppid() - children = self._get_children_processes(parent_pid) - return [c for c in children if c.pid != current_process.pid] - - def _get_cpu_usage(self, pid: int) -> float: - cmd = f"ps -p {pid} -o %cpu --no-headers" - output = subprocess.check_output(cmd, shell=True, universal_newlines=True) - try: - return float(output) - except ValueError: - print(f"Could not parse {pid} cpu usage: {output}") - return float(0) +class CPUUsageMonitor(AbstractIsBusyMonitor): + def __init__(self, poll_interval: float, *, threshold: float): + super().__init__(poll_interval=poll_interval) + self.threshold = threshold def _get_total_cpu_usage(self) -> float: with ThreadPoolExecutor(max_workers=10) as executor: futures = [ executor.submit(x.cpu_percent, CPU_USAGE_MONITORING_INTERVAL_S) - for x in self._get_brother_processes() + for x in _get_brother_processes() ] return sum([future.result() for future in as_completed(futures)]) - def are_children_busy(self) -> bool: + def _check_if_busy(self) -> bool: return self._get_total_cpu_usage() >= self.threshold @@ -102,14 +140,13 @@ def __init__(self, interval: float) -> None: self.interval = interval self.last_idle: datetime | None = None - self.jupyter_kernel_monitor = JupyterKernelMonitor() - self.cpu_usage_monitor = CPUUsageMonitor(THRESHOLD_CPU_USAGE) + self.jupyter_kernel_monitor = JupyterKernelMonitor(CHECK_INTERVAL_S) + self.cpu_usage_monitor = CPUUsageMonitor( + CHECK_INTERVAL_S, threshold=THRESHOLD_CPU_USAGE + ) def check(self): - is_busy = ( - self.jupyter_kernel_monitor.are_kernels_busy() - or self.cpu_usage_monitor.are_children_busy() - ) + is_busy = self.jupyter_kernel_monitor.is_busy or self.cpu_usage_monitor.is_busy if is_busy: self.last_idle = None @@ -121,7 +158,8 @@ def get_idle_seconds(self) -> float: if self.last_idle is None: return 0 - return (datetime.utcnow() - self.last_idle).total_seconds() + idle_seconds = (datetime.utcnow() - self.last_idle).total_seconds() + return idle_seconds if idle_seconds > 0 else 0 async def run(self): while True: @@ -130,20 +168,21 @@ async def run(self): await asyncio.sleep(self.interval) -activity_manager = ActivityManager(CHECK_INTERVAL_S) - - class DebugHandler(tornado.web.RequestHandler): - def get(self): + def initialize(self, activity_manager: ActivityManager): + self.activity_manager: ActivityManager = activity_manager + + async def get(self): + assert self.activity_manager self.write( json.dumps( { + "seconds_inactive": self.activity_manager.get_idle_seconds(), "cpu_usage": { - "current": activity_manager.cpu_usage_monitor._get_total_cpu_usage(), - "busy": activity_manager.cpu_usage_monitor.are_children_busy(), + "is_busy": self.activity_manager.cpu_usage_monitor.is_busy, }, - "kernal_monitor": { - "busy": activity_manager.jupyter_kernel_monitor.are_kernels_busy() + "kernel_monitor": { + "is_busy": self.activity_manager.jupyter_kernel_monitor.is_busy }, } ) @@ -151,24 +190,28 @@ def get(self): class MainHandler(tornado.web.RequestHandler): - def get(self): - idle_seconds = activity_manager.get_idle_seconds() - seconds_inactive = idle_seconds if idle_seconds > 0 else 0 + def initialize(self, activity_manager: ActivityManager): + self.activity_manager: ActivityManager = activity_manager - self.write(json.dumps({"seconds_inactive": seconds_inactive})) + async def get(self): + assert self.activity_manager + self.write( + json.dumps({"seconds_inactive": self.activity_manager.get_idle_seconds()}) + ) -def make_app() -> tornado.web.Application: +def make_app(activity_manager) -> tornado.web.Application: return tornado.web.Application( [ - (r"/", MainHandler), - (r"/debug", DebugHandler), + (r"/", MainHandler, dict(activity_manager=activity_manager)), + (r"/debug", DebugHandler, dict(activity_manager=activity_manager)), ] ) async def main(): - app = make_app() + activity_manager = ActivityManager(CHECK_INTERVAL_S) + app = make_app(activity_manager) app.listen(19597) asyncio.create_task(activity_manager.run()) await asyncio.Event().wait() diff --git a/requirements/test.in b/requirements/test.in new file mode 100644 index 0000000..e41c02b --- /dev/null +++ b/requirements/test.in @@ -0,0 +1,12 @@ +# from jupyter + +psutil +tornado + +# testing + +pytest +pytest-asyncio +pytest-mock +requests +tenacity \ No newline at end of file diff --git a/requirements/test.txt b/requirements/test.txt new file mode 100644 index 0000000..6f731a3 --- /dev/null +++ b/requirements/test.txt @@ -0,0 +1,41 @@ +# +# This file is autogenerated by pip-compile with Python 3.10 +# by the following command: +# +# pip-compile --output-file=requirements/test.txt requirements/test.in +# +certifi==2024.2.2 + # via requests +charset-normalizer==3.3.2 + # via requests +exceptiongroup==1.2.0 + # via pytest +idna==3.6 + # via requests +iniconfig==2.0.0 + # via pytest +packaging==24.0 + # via pytest +pluggy==1.4.0 + # via pytest +psutil==5.9.8 + # via -r requirements/test.in +pytest==8.1.1 + # via + # -r requirements/test.in + # pytest-asyncio + # pytest-mock +pytest-asyncio==0.23.6 + # via -r requirements/test.in +pytest-mock==3.12.0 + # via -r requirements/test.in +requests==2.31.0 + # via -r requirements/test.in +tenacity==8.2.3 + # via -r requirements/test.in +tomli==2.0.1 + # via pytest +tornado==6.4 + # via -r requirements/test.in +urllib3==2.2.1 + # via requests diff --git a/tests/_import_utils.py b/tests/_import_utils.py new file mode 100644 index 0000000..33681c4 --- /dev/null +++ b/tests/_import_utils.py @@ -0,0 +1,13 @@ +import sys +from pathlib import Path + +_CURRENT_DIR = ( + Path(sys.argv[0] if __name__ == "__main__" else __file__).resolve().parent +) + + +def allow_imports() -> None: + path = (_CURRENT_DIR / "..." / ".." / ".." / "docker").absolute().resolve() + sys.path.append(f"{path}") + + import activity_monitor diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/test_activity_monitor.py b/tests/test_activity_monitor.py new file mode 100644 index 0000000..9387183 --- /dev/null +++ b/tests/test_activity_monitor.py @@ -0,0 +1,159 @@ +import ctypes +import pytest +import socket +import threading +import psutil +import time + +from concurrent.futures import ThreadPoolExecutor, wait +from multiprocessing import Array, Process +from tempfile import NamedTemporaryFile + +from typing import Callable, Final, TYPE_CHECKING +from pytest_mock import MockFixture +from tenacity import AsyncRetrying +from tenacity.stop import stop_after_delay +from tenacity.wait import wait_fixed + + +if TYPE_CHECKING: + from ..docker import activity_monitor +else: + from _import_utils import allow_imports + + allow_imports() + import activity_monitor + + +_LOCAL_LISTEN_PORT: Final[int] = 12345 + +pytestmark = pytest.mark.asyncio + + +class _ListenSocketServer: + def __init__(self): + self.server_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + self.server_socket.bind(("localhost", _LOCAL_LISTEN_PORT)) + self.server_socket.listen(100) # max number of connections + self.stop_event = threading.Event() + + def start(self): + threading.Thread(target=self._accept_clients, daemon=True).start() + + def stop(self): + self.stop_event.set() + + def _accept_clients(self): + while not self.stop_event.is_set(): + client_socket, _ = self.server_socket.accept() + threading.Thread( + target=self._handle_client, daemon=True, args=(client_socket,) + ).start() + + def _handle_client(self, client_socket): + try: + while not self.stop_event.is_set(): + data = client_socket.recv(1024) + if not data: + break + finally: + client_socket.close() + + +class _ActivityGenerator: + def __init__(self, *, network: bool, cpu: bool, disk: bool) -> None: + self._process: Process | None = None + + _keep_running = True + self.shared_array = Array(ctypes.c_bool, 4) + self.shared_array[0] = network + self.shared_array[1] = cpu + self.shared_array[2] = disk + self.shared_array[3] = _keep_running + + def __load_cpu(self) -> None: + for _ in range(1000000): + pass + + def __load_network(self) -> None: + client_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + client_socket.connect(("localhost", _LOCAL_LISTEN_PORT)) + client_socket.sendall("mock_message_to_send".encode()) + client_socket.close() + + def __load_disk(self) -> None: + with NamedTemporaryFile() as temp_file: + temp_file.write(b"0" * 1024 * 1024) # 1MB + temp_file.read() + + def _run(self) -> None: + with ThreadPoolExecutor(max_workers=3) as executor: + while self.shared_array[3]: + futures = [] + if self.shared_array[0]: + futures.append(executor.submit(self.__load_network)) + if self.shared_array[1]: + futures.append(executor.submit(self.__load_cpu)) + if self.shared_array[2]: + futures.append(executor.submit(self.__load_disk)) + + wait(futures) + time.sleep(0.01) + + def __enter__(self): + self.start() + return self + + def __exit__(self, exc_type, exc_value, traceback): + self.stop() + + def start(self) -> None: + self._process = Process(target=self._run, daemon=True) + self._process.start() + + def stop(self) -> None: + self.shared_array[3] = False + if self._process: + self._process.join() + + def get_pid(self) -> int: + assert self._process + return self._process.pid + + +@pytest.fixture +def socket_server() -> None: + socket_server = _ListenSocketServer() + socket_server.start() + yield None + socket_server.stop() + + +@pytest.fixture +def mock__get_brother_processes(mocker: MockFixture) -> Callable: + def _(pids: list[int]) -> None: + mocker.patch( + "activity_monitor._get_brother_processes", + return_value=[psutil.Process(p) for p in pids], + ) + + return _ + + +async def test_is_working(socket_server: None, mock__get_brother_processes: Callable): + with _ActivityGenerator(network=False, cpu=False, disk=False) as activity_generator: + mock__get_brother_processes([activity_generator.get_pid()]) + + assert len(activity_monitor._get_brother_processes()) == 1 + + # some tests + with activity_monitor.CPUUsageMonitor(1, threshold=0) as cpu_usage_monitor: + # poll for it to be idle since it takes some time + async for attempt in AsyncRetrying( + stop=stop_after_delay(3), wait=wait_fixed(0.1), reraise=True + ): + with attempt: + # TODO: figure out why test is wrong here + assert cpu_usage_monitor.is_busy is False + + # now we can test whatever here From a6d8c9b399ebf5ab29b2ea128b4550daac798e55 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Thu, 21 Mar 2024 09:50:33 +0100 Subject: [PATCH 16/52] cpu usage test --- tests/test_activity_monitor.py | 100 +++++++++++++++++++++++---------- 1 file changed, 71 insertions(+), 29 deletions(-) diff --git a/tests/test_activity_monitor.py b/tests/test_activity_monitor.py index 9387183..0aa4c76 100644 --- a/tests/test_activity_monitor.py +++ b/tests/test_activity_monitor.py @@ -9,7 +9,7 @@ from multiprocessing import Array, Process from tempfile import NamedTemporaryFile -from typing import Callable, Final, TYPE_CHECKING +from typing import Callable, Final, TYPE_CHECKING, Iterable from pytest_mock import MockFixture from tenacity import AsyncRetrying from tenacity.stop import stop_after_delay @@ -35,16 +35,19 @@ def __init__(self): self.server_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) self.server_socket.bind(("localhost", _LOCAL_LISTEN_PORT)) self.server_socket.listen(100) # max number of connections - self.stop_event = threading.Event() + self._process: Process | None = None def start(self): - threading.Thread(target=self._accept_clients, daemon=True).start() + self._process = Process(target=self._accept_clients, daemon=True) + self._process.start() def stop(self): - self.stop_event.set() + if self._process: + self._process.terminate() + self._process.join() def _accept_clients(self): - while not self.stop_event.is_set(): + while True: client_socket, _ = self.server_socket.accept() threading.Thread( target=self._handle_client, daemon=True, args=(client_socket,) @@ -52,7 +55,7 @@ def _accept_clients(self): def _handle_client(self, client_socket): try: - while not self.stop_event.is_set(): + while True: data = client_socket.recv(1024) if not data: break @@ -98,13 +101,13 @@ def _run(self) -> None: futures.append(executor.submit(self.__load_disk)) wait(futures) - time.sleep(0.01) + time.sleep(0.1) def __enter__(self): self.start() return self - def __exit__(self, exc_type, exc_value, traceback): + def __exit__(self, exc_type, exc_val, exc_tb): self.stop() def start(self) -> None: @@ -112,7 +115,8 @@ def start(self) -> None: self._process.start() def stop(self) -> None: - self.shared_array[3] = False + _keep_running = False + self.shared_array[3] = _keep_running if self._process: self._process.join() @@ -130,30 +134,68 @@ def socket_server() -> None: @pytest.fixture -def mock__get_brother_processes(mocker: MockFixture) -> Callable: +def mock__get_brother_processes(mocker: MockFixture) -> Callable[[list[int]], list[psutil.Process]]: + def _get_processes(pids: list[int]) -> list[psutil.Process]: + results = [] + for pid in pids: + proc = psutil.Process(pid) + assert proc.status() + results.append(proc) + return results + def _(pids: list[int]) -> None: mocker.patch( - "activity_monitor._get_brother_processes", - return_value=[psutil.Process(p) for p in pids], + "activity_monitor._get_brother_processes", return_value=_get_processes(pids) ) return _ -async def test_is_working(socket_server: None, mock__get_brother_processes: Callable): - with _ActivityGenerator(network=False, cpu=False, disk=False) as activity_generator: - mock__get_brother_processes([activity_generator.get_pid()]) - - assert len(activity_monitor._get_brother_processes()) == 1 - - # some tests - with activity_monitor.CPUUsageMonitor(1, threshold=0) as cpu_usage_monitor: - # poll for it to be idle since it takes some time - async for attempt in AsyncRetrying( - stop=stop_after_delay(3), wait=wait_fixed(0.1), reraise=True - ): - with attempt: - # TODO: figure out why test is wrong here - assert cpu_usage_monitor.is_busy is False - - # now we can test whatever here +@pytest.fixture +def create_activity_generator() -> ( + Iterable[Callable[[bool, bool, bool], _ActivityGenerator]] +): + created: list[_ActivityGenerator] = [] + + def _(*, network: bool, cpu: bool, disk: bool) -> _ActivityGenerator: + instance = _ActivityGenerator(network=network, cpu=cpu, disk=disk) + instance.start() + created.append(instance) + return instance + + yield _ + + for instance in created: + instance.stop() + + +async def test_cpu_usage_monitor_not_busy( + socket_server: None, + mock__get_brother_processes: Callable[[list[int]], list[psutil.Process]], + create_activity_generator: Callable[[bool, bool, bool], _ActivityGenerator], +): + activity_generator = create_activity_generator(network=False, cpu=False, disk=False) + mock__get_brother_processes([activity_generator.get_pid()]) + + with activity_monitor.CPUUsageMonitor(1, threshold=5) as cpu_usage_monitor: + async for attempt in AsyncRetrying( + stop=stop_after_delay(5), wait=wait_fixed(0.1), reraise=True + ): + with attempt: + assert cpu_usage_monitor.is_busy is False + + +async def test_cpu_usage_monitor_still_busy( + socket_server: None, + mock__get_brother_processes: Callable[[list[int]], list[psutil.Process]], + create_activity_generator: Callable[[bool, bool, bool], _ActivityGenerator], +): + activity_generator = create_activity_generator(network=False, cpu=True, disk=False) + mock__get_brother_processes([activity_generator.get_pid()]) + + with activity_monitor.CPUUsageMonitor(0.5, threshold=5) as cpu_usage_monitor: + # wait for monitor to trigger + time.sleep(1) + + # must still result busy + assert cpu_usage_monitor.is_busy is True From c16219ccc3d02a528e03cf6b3ab56afac0ae8a40 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Thu, 21 Mar 2024 09:55:17 +0100 Subject: [PATCH 17/52] refactor tests --- tests/conftest.py | 135 ++++++++++++++++++++++++++++++++ tests/test_activity_monitor.py | 138 ++------------------------------- 2 files changed, 140 insertions(+), 133 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index e69de29..78984a4 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -0,0 +1,135 @@ +import ctypes +import pytest +import socket +import threading +import time + +from concurrent.futures import ThreadPoolExecutor, wait +from multiprocessing import Array, Process +from tempfile import NamedTemporaryFile + +from typing import Callable, Final, Iterable + + +_LOCAL_LISTEN_PORT: Final[int] = 12345 + + +class _ListenSocketServer: + def __init__(self): + self.server_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + self.server_socket.bind(("localhost", _LOCAL_LISTEN_PORT)) + self.server_socket.listen(100) # max number of connections + self._process: Process | None = None + + def start(self): + self._process = Process(target=self._accept_clients, daemon=True) + self._process.start() + + def stop(self): + if self._process: + self._process.terminate() + self._process.join() + + def _accept_clients(self): + while True: + client_socket, _ = self.server_socket.accept() + threading.Thread( + target=self._handle_client, daemon=True, args=(client_socket,) + ).start() + + def _handle_client(self, client_socket): + try: + while True: + data = client_socket.recv(1024) + if not data: + break + finally: + client_socket.close() + + +@pytest.fixture +def socket_server() -> None: + socket_server = _ListenSocketServer() + socket_server.start() + yield None + socket_server.stop() + + +class _ActivityGenerator: + def __init__(self, *, network: bool, cpu: bool, disk: bool) -> None: + self._process: Process | None = None + + _keep_running = True + self.shared_array = Array(ctypes.c_bool, 4) + self.shared_array[0] = network + self.shared_array[1] = cpu + self.shared_array[2] = disk + self.shared_array[3] = _keep_running + + def __load_cpu(self) -> None: + for _ in range(1000000): + pass + + def __load_network(self) -> None: + client_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + client_socket.connect(("localhost", _LOCAL_LISTEN_PORT)) + client_socket.sendall("mock_message_to_send".encode()) + client_socket.close() + + def __load_disk(self) -> None: + with NamedTemporaryFile() as temp_file: + temp_file.write(b"0" * 1024 * 1024) # 1MB + temp_file.read() + + def _run(self) -> None: + with ThreadPoolExecutor(max_workers=3) as executor: + while self.shared_array[3]: + futures = [] + if self.shared_array[0]: + futures.append(executor.submit(self.__load_network)) + if self.shared_array[1]: + futures.append(executor.submit(self.__load_cpu)) + if self.shared_array[2]: + futures.append(executor.submit(self.__load_disk)) + + wait(futures) + time.sleep(0.1) + + def __enter__(self): + self.start() + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + self.stop() + + def start(self) -> None: + self._process = Process(target=self._run, daemon=True) + self._process.start() + + def stop(self) -> None: + _keep_running = False + self.shared_array[3] = _keep_running + if self._process: + self._process.join() + + def get_pid(self) -> int: + assert self._process + return self._process.pid + + +@pytest.fixture +def create_activity_generator() -> ( + Iterable[Callable[[bool, bool, bool], _ActivityGenerator]] +): + created: list[_ActivityGenerator] = [] + + def _(*, network: bool, cpu: bool, disk: bool) -> _ActivityGenerator: + instance = _ActivityGenerator(network=network, cpu=cpu, disk=disk) + instance.start() + created.append(instance) + return instance + + yield _ + + for instance in created: + instance.stop() diff --git a/tests/test_activity_monitor.py b/tests/test_activity_monitor.py index 0aa4c76..6e20c6d 100644 --- a/tests/test_activity_monitor.py +++ b/tests/test_activity_monitor.py @@ -1,20 +1,14 @@ -import ctypes import pytest -import socket -import threading import psutil import time -from concurrent.futures import ThreadPoolExecutor, wait -from multiprocessing import Array, Process -from tempfile import NamedTemporaryFile -from typing import Callable, Final, TYPE_CHECKING, Iterable +from typing import Callable, TYPE_CHECKING from pytest_mock import MockFixture from tenacity import AsyncRetrying from tenacity.stop import stop_after_delay from tenacity.wait import wait_fixed - +from conftest import _ActivityGenerator if TYPE_CHECKING: from ..docker import activity_monitor @@ -24,117 +18,13 @@ allow_imports() import activity_monitor - -_LOCAL_LISTEN_PORT: Final[int] = 12345 - pytestmark = pytest.mark.asyncio -class _ListenSocketServer: - def __init__(self): - self.server_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) - self.server_socket.bind(("localhost", _LOCAL_LISTEN_PORT)) - self.server_socket.listen(100) # max number of connections - self._process: Process | None = None - - def start(self): - self._process = Process(target=self._accept_clients, daemon=True) - self._process.start() - - def stop(self): - if self._process: - self._process.terminate() - self._process.join() - - def _accept_clients(self): - while True: - client_socket, _ = self.server_socket.accept() - threading.Thread( - target=self._handle_client, daemon=True, args=(client_socket,) - ).start() - - def _handle_client(self, client_socket): - try: - while True: - data = client_socket.recv(1024) - if not data: - break - finally: - client_socket.close() - - -class _ActivityGenerator: - def __init__(self, *, network: bool, cpu: bool, disk: bool) -> None: - self._process: Process | None = None - - _keep_running = True - self.shared_array = Array(ctypes.c_bool, 4) - self.shared_array[0] = network - self.shared_array[1] = cpu - self.shared_array[2] = disk - self.shared_array[3] = _keep_running - - def __load_cpu(self) -> None: - for _ in range(1000000): - pass - - def __load_network(self) -> None: - client_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) - client_socket.connect(("localhost", _LOCAL_LISTEN_PORT)) - client_socket.sendall("mock_message_to_send".encode()) - client_socket.close() - - def __load_disk(self) -> None: - with NamedTemporaryFile() as temp_file: - temp_file.write(b"0" * 1024 * 1024) # 1MB - temp_file.read() - - def _run(self) -> None: - with ThreadPoolExecutor(max_workers=3) as executor: - while self.shared_array[3]: - futures = [] - if self.shared_array[0]: - futures.append(executor.submit(self.__load_network)) - if self.shared_array[1]: - futures.append(executor.submit(self.__load_cpu)) - if self.shared_array[2]: - futures.append(executor.submit(self.__load_disk)) - - wait(futures) - time.sleep(0.1) - - def __enter__(self): - self.start() - return self - - def __exit__(self, exc_type, exc_val, exc_tb): - self.stop() - - def start(self) -> None: - self._process = Process(target=self._run, daemon=True) - self._process.start() - - def stop(self) -> None: - _keep_running = False - self.shared_array[3] = _keep_running - if self._process: - self._process.join() - - def get_pid(self) -> int: - assert self._process - return self._process.pid - - -@pytest.fixture -def socket_server() -> None: - socket_server = _ListenSocketServer() - socket_server.start() - yield None - socket_server.stop() - - @pytest.fixture -def mock__get_brother_processes(mocker: MockFixture) -> Callable[[list[int]], list[psutil.Process]]: +def mock__get_brother_processes( + mocker: MockFixture, +) -> Callable[[list[int]], list[psutil.Process]]: def _get_processes(pids: list[int]) -> list[psutil.Process]: results = [] for pid in pids: @@ -151,24 +41,6 @@ def _(pids: list[int]) -> None: return _ -@pytest.fixture -def create_activity_generator() -> ( - Iterable[Callable[[bool, bool, bool], _ActivityGenerator]] -): - created: list[_ActivityGenerator] = [] - - def _(*, network: bool, cpu: bool, disk: bool) -> _ActivityGenerator: - instance = _ActivityGenerator(network=network, cpu=cpu, disk=disk) - instance.start() - created.append(instance) - return instance - - yield _ - - for instance in created: - instance.stop() - - async def test_cpu_usage_monitor_not_busy( socket_server: None, mock__get_brother_processes: Callable[[list[int]], list[psutil.Process]], From bdef8b341b54105f8c7864f734ae06fe1744920f Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Thu, 21 Mar 2024 10:35:23 +0100 Subject: [PATCH 18/52] refactor and add disk usage --- docker/activity_monitor.py | 87 +++++++++++++++++++++++++++++----- tests/test_activity_monitor.py | 4 +- 2 files changed, 76 insertions(+), 15 deletions(-) diff --git a/docker/activity_monitor.py b/docker/activity_monitor.py index 95b8765..9a0375b 100755 --- a/docker/activity_monitor.py +++ b/docker/activity_monitor.py @@ -17,8 +17,8 @@ CHECK_INTERVAL_S: Final[float] = 5 -CPU_USAGE_MONITORING_INTERVAL_S: Final[float] = 1 -THRESHOLD_CPU_USAGE: Final[float] = 5 # percent in range [0, 100] +BUSY_THRESHOLD_CPU_USAGE: Final[float] = 5 # percent in range [0, 100] +THREAD_EXECUTOR_WORKERS: Final[int] = 10 # Utilities @@ -29,6 +29,7 @@ def __init__(self, poll_interval: float) -> None: self._thread: Thread | None = None self.is_busy: bool = True + self.thread_executor = ThreadPoolExecutor(max_workers=THREAD_EXECUTOR_WORKERS) @abstractmethod def _check_if_busy(self) -> bool: @@ -52,6 +53,7 @@ def stop(self) -> None: self._keep_running = False if self._thread: self._thread.join() + self.thread_executor.shutdown(wait=True) def __enter__(self): self.start() @@ -119,20 +121,73 @@ def _check_if_busy(self) -> bool: class CPUUsageMonitor(AbstractIsBusyMonitor): - def __init__(self, poll_interval: float, *, threshold: float): + CPU_USAGE_MONITORING_INTERVAL_S: Final[float] = 1 + + def __init__(self, poll_interval: float, *, busy_threshold: float): super().__init__(poll_interval=poll_interval) - self.threshold = threshold + self.busy_threshold = busy_threshold def _get_total_cpu_usage(self) -> float: - with ThreadPoolExecutor(max_workers=10) as executor: - futures = [ - executor.submit(x.cpu_percent, CPU_USAGE_MONITORING_INTERVAL_S) - for x in _get_brother_processes() - ] - return sum([future.result() for future in as_completed(futures)]) + futures = [ + self.thread_executor.submit( + x.cpu_percent, self.CPU_USAGE_MONITORING_INTERVAL_S + ) + for x in _get_brother_processes() + ] + return sum([future.result() for future in as_completed(futures)]) def _check_if_busy(self) -> bool: - return self._get_total_cpu_usage() >= self.threshold + return self._get_total_cpu_usage() >= self.busy_threshold + + +class DiskUsageMonitor(AbstractIsBusyMonitor): + DISK_USAGE_MONITORING_INTERVAL_S: Final[float] = 1 + + def __init__( + self, + poll_interval: float, + *, + read_usage_threshold: float, + write_usage_threshold: float, + ): + super().__init__(poll_interval=poll_interval) + self.read_usage_threshold = read_usage_threshold + self.write_usage_threshold = write_usage_threshold + self.executor = ThreadPoolExecutor(max_workers=THREAD_EXECUTOR_WORKERS) + + def _get_process_disk_usage(self, proc: psutil.Process) -> tuple[int, int]: + io_start = proc.disk_io_counters() + time.sleep(self.DISK_USAGE_MONITORING_INTERVAL_S) + io_end = proc.disk_io_counters() + + # Calculate the differences + read_bytes = io_end.read_bytes - io_start.read_bytes + write_bytes = io_end.write_bytes - io_start.write_bytes + return read_bytes, write_bytes + + def _get_total_disk_usage(self) -> tuple[int, int]: + futures = [ + self.thread_executor.submit(self._get_process_disk_usage, x) + for x in _get_brother_processes() + ] + + disk_usage: list[tuple[int, int]] = [ + future.result() for future in as_completed(futures) + ] + read_bytes: int = 0 + write_bytes: int = 0 + for read, write in disk_usage: + read_bytes += read + write_bytes += write + + return read_bytes, write_bytes + + def _check_if_busy(self) -> bool: + read_bytes, write_bytes = self._get_total_disk_usage() + return ( + read_bytes >= self.read_usage_threshold + or write_bytes >= self.write_usage_threshold + ) class ActivityManager: @@ -142,11 +197,17 @@ def __init__(self, interval: float) -> None: self.jupyter_kernel_monitor = JupyterKernelMonitor(CHECK_INTERVAL_S) self.cpu_usage_monitor = CPUUsageMonitor( - CHECK_INTERVAL_S, threshold=THRESHOLD_CPU_USAGE + CHECK_INTERVAL_S, busy_threshold=BUSY_THRESHOLD_CPU_USAGE ) + # TODO: change threshold + self.disk_usage_monitor = DiskUsageMonitor(CHECK_INTERVAL_S, usage_threshold=40) def check(self): - is_busy = self.jupyter_kernel_monitor.is_busy or self.cpu_usage_monitor.is_busy + is_busy = ( + self.jupyter_kernel_monitor.is_busy + or self.cpu_usage_monitor.is_busy + or self.disk_usage_monitor.is_busy + ) if is_busy: self.last_idle = None diff --git a/tests/test_activity_monitor.py b/tests/test_activity_monitor.py index 6e20c6d..d1d7f26 100644 --- a/tests/test_activity_monitor.py +++ b/tests/test_activity_monitor.py @@ -49,7 +49,7 @@ async def test_cpu_usage_monitor_not_busy( activity_generator = create_activity_generator(network=False, cpu=False, disk=False) mock__get_brother_processes([activity_generator.get_pid()]) - with activity_monitor.CPUUsageMonitor(1, threshold=5) as cpu_usage_monitor: + with activity_monitor.CPUUsageMonitor(1, busy_threshold=5) as cpu_usage_monitor: async for attempt in AsyncRetrying( stop=stop_after_delay(5), wait=wait_fixed(0.1), reraise=True ): @@ -65,7 +65,7 @@ async def test_cpu_usage_monitor_still_busy( activity_generator = create_activity_generator(network=False, cpu=True, disk=False) mock__get_brother_processes([activity_generator.get_pid()]) - with activity_monitor.CPUUsageMonitor(0.5, threshold=5) as cpu_usage_monitor: + with activity_monitor.CPUUsageMonitor(0.5, busy_threshold=5) as cpu_usage_monitor: # wait for monitor to trigger time.sleep(1) From 7869229442d281742fd5f9d4474274d6c83bb76a Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Thu, 21 Mar 2024 10:52:46 +0100 Subject: [PATCH 19/52] adding tests for disk usage --- docker/activity_monitor.py | 24 +++++++++++-------- tests/test_activity_monitor.py | 42 ++++++++++++++++++++++++++++++++++ 2 files changed, 57 insertions(+), 9 deletions(-) diff --git a/docker/activity_monitor.py b/docker/activity_monitor.py index 9a0375b..5bfaada 100755 --- a/docker/activity_monitor.py +++ b/docker/activity_monitor.py @@ -17,9 +17,12 @@ CHECK_INTERVAL_S: Final[float] = 5 -BUSY_THRESHOLD_CPU_USAGE: Final[float] = 5 # percent in range [0, 100] THREAD_EXECUTOR_WORKERS: Final[int] = 10 +BUSY_USAGE_THRESHOLD_CPU: Final[float] = 5 # percent in range [0, 100] +BUSY_USAGE_THRESHOLD_DISK_READ: Final[int] = 0 # in bytes +BUSY_USAGE_THRESHOLD_DISK_WRITE: Final[int] = 0 # in bytes + # Utilities class AbstractIsBusyMonitor: @@ -137,7 +140,7 @@ def _get_total_cpu_usage(self) -> float: return sum([future.result() for future in as_completed(futures)]) def _check_if_busy(self) -> bool: - return self._get_total_cpu_usage() >= self.busy_threshold + return self._get_total_cpu_usage() > self.busy_threshold class DiskUsageMonitor(AbstractIsBusyMonitor): @@ -156,9 +159,9 @@ def __init__( self.executor = ThreadPoolExecutor(max_workers=THREAD_EXECUTOR_WORKERS) def _get_process_disk_usage(self, proc: psutil.Process) -> tuple[int, int]: - io_start = proc.disk_io_counters() + io_start = proc.io_counters() time.sleep(self.DISK_USAGE_MONITORING_INTERVAL_S) - io_end = proc.disk_io_counters() + io_end = proc.io_counters() # Calculate the differences read_bytes = io_end.read_bytes - io_start.read_bytes @@ -185,8 +188,8 @@ def _get_total_disk_usage(self) -> tuple[int, int]: def _check_if_busy(self) -> bool: read_bytes, write_bytes = self._get_total_disk_usage() return ( - read_bytes >= self.read_usage_threshold - or write_bytes >= self.write_usage_threshold + read_bytes > self.read_usage_threshold + or write_bytes > self.write_usage_threshold ) @@ -197,10 +200,13 @@ def __init__(self, interval: float) -> None: self.jupyter_kernel_monitor = JupyterKernelMonitor(CHECK_INTERVAL_S) self.cpu_usage_monitor = CPUUsageMonitor( - CHECK_INTERVAL_S, busy_threshold=BUSY_THRESHOLD_CPU_USAGE + CHECK_INTERVAL_S, busy_threshold=BUSY_USAGE_THRESHOLD_CPU + ) + self.disk_usage_monitor = DiskUsageMonitor( + CHECK_INTERVAL_S, + read_usage_threshold=BUSY_USAGE_THRESHOLD_DISK_READ, + write_usage_threshold=BUSY_USAGE_THRESHOLD_DISK_WRITE, ) - # TODO: change threshold - self.disk_usage_monitor = DiskUsageMonitor(CHECK_INTERVAL_S, usage_threshold=40) def check(self): is_busy = ( diff --git a/tests/test_activity_monitor.py b/tests/test_activity_monitor.py index d1d7f26..3a160f0 100644 --- a/tests/test_activity_monitor.py +++ b/tests/test_activity_monitor.py @@ -71,3 +71,45 @@ async def test_cpu_usage_monitor_still_busy( # must still result busy assert cpu_usage_monitor.is_busy is True + + +async def test_disk_usage_monitor_not_busy( + socket_server: None, + mock__get_brother_processes: Callable[[list[int]], list[psutil.Process]], + create_activity_generator: Callable[[bool, bool, bool], _ActivityGenerator], +): + activity_generator = create_activity_generator(network=False, cpu=False, disk=False) + mock__get_brother_processes([activity_generator.get_pid()]) + + with activity_monitor.DiskUsageMonitor( + 0.5, read_usage_threshold=0, write_usage_threshold=0 + ) as disk_usage_monitor: + async for attempt in AsyncRetrying( + stop=stop_after_delay(5), wait=wait_fixed(0.1), reraise=True + ): + with attempt: + read_bytes, write_bytes = disk_usage_monitor._get_total_disk_usage() + assert read_bytes == 0 + assert write_bytes == 0 + assert disk_usage_monitor.is_busy is False + + +async def test_disk_usage_monitor_still_busy( + socket_server: None, + mock__get_brother_processes: Callable[[list[int]], list[psutil.Process]], + create_activity_generator: Callable[[bool, bool, bool], _ActivityGenerator], +): + activity_generator = create_activity_generator(network=False, cpu=False, disk=True) + mock__get_brother_processes([activity_generator.get_pid()]) + + with activity_monitor.DiskUsageMonitor( + 0.5, read_usage_threshold=0, write_usage_threshold=0 + ) as disk_usage_monitor: + # wait for monitor to trigger + time.sleep(1) + _, write_bytes = disk_usage_monitor._get_total_disk_usage() + # NOTE: due to os disk cache reading is not reliable not testing it + assert write_bytes > 0 + + # must still result busy + assert disk_usage_monitor.is_busy is True From 0ebfc4420b20f363ddabef5c44e51b591265b8d1 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Thu, 21 Mar 2024 11:06:38 +0100 Subject: [PATCH 20/52] refactor tests --- docker/activity_monitor.py | 3 +++ tests/test_activity_monitor.py | 2 ++ 2 files changed, 5 insertions(+) diff --git a/docker/activity_monitor.py b/docker/activity_monitor.py index 5bfaada..3b36ffc 100755 --- a/docker/activity_monitor.py +++ b/docker/activity_monitor.py @@ -248,6 +248,9 @@ async def get(self): "cpu_usage": { "is_busy": self.activity_manager.cpu_usage_monitor.is_busy, }, + "disk_usage": { + "is_busy": self.activity_manager.disk_usage_monitor.is_busy + }, "kernel_monitor": { "is_busy": self.activity_manager.jupyter_kernel_monitor.is_busy }, diff --git a/tests/test_activity_monitor.py b/tests/test_activity_monitor.py index 3a160f0..b3b7ebf 100644 --- a/tests/test_activity_monitor.py +++ b/tests/test_activity_monitor.py @@ -54,6 +54,7 @@ async def test_cpu_usage_monitor_not_busy( stop=stop_after_delay(5), wait=wait_fixed(0.1), reraise=True ): with attempt: + assert cpu_usage_monitor._get_total_cpu_usage() == 0 assert cpu_usage_monitor.is_busy is False @@ -70,6 +71,7 @@ async def test_cpu_usage_monitor_still_busy( time.sleep(1) # must still result busy + assert cpu_usage_monitor._get_total_cpu_usage() > 0 assert cpu_usage_monitor.is_busy is True From 7ae3faa623cb3d48c55eec63f45e4bf9d5eef1c0 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Thu, 21 Mar 2024 14:35:20 +0100 Subject: [PATCH 21/52] added new tests --- docker/activity_monitor.py | 15 +++--- tests/test_activity_monitor.py | 87 ++++++++++++++++++++++++++++++++-- 2 files changed, 92 insertions(+), 10 deletions(-) diff --git a/docker/activity_monitor.py b/docker/activity_monitor.py index 3b36ffc..817618a 100755 --- a/docker/activity_monitor.py +++ b/docker/activity_monitor.py @@ -270,20 +270,21 @@ async def get(self): ) -def make_app(activity_manager) -> tornado.web.Application: - return tornado.web.Application( +async def make_app() -> tornado.web.Application: + activity_manager = ActivityManager(CHECK_INTERVAL_S) + app = tornado.web.Application( [ - (r"/", MainHandler, dict(activity_manager=activity_manager)), - (r"/debug", DebugHandler, dict(activity_manager=activity_manager)), + (r"/", MainHandler, {"activity_manager": activity_manager}), + (r"/debug", DebugHandler, {"activity_manager": activity_manager}), ] ) + asyncio.create_task(activity_manager.run()) + return app async def main(): - activity_manager = ActivityManager(CHECK_INTERVAL_S) - app = make_app(activity_manager) + app = await make_app() app.listen(19597) - asyncio.create_task(activity_manager.run()) await asyncio.Event().wait() diff --git a/tests/test_activity_monitor.py b/tests/test_activity_monitor.py index b3b7ebf..7c08359 100644 --- a/tests/test_activity_monitor.py +++ b/tests/test_activity_monitor.py @@ -1,6 +1,12 @@ +import asyncio import pytest import psutil -import time +import requests +import tornado.web +import tornado.httpserver +import tornado.ioloop +import threading +import pytest_asyncio from typing import Callable, TYPE_CHECKING @@ -68,7 +74,7 @@ async def test_cpu_usage_monitor_still_busy( with activity_monitor.CPUUsageMonitor(0.5, busy_threshold=5) as cpu_usage_monitor: # wait for monitor to trigger - time.sleep(1) + await asyncio.sleep(1) # must still result busy assert cpu_usage_monitor._get_total_cpu_usage() > 0 @@ -108,10 +114,85 @@ async def test_disk_usage_monitor_still_busy( 0.5, read_usage_threshold=0, write_usage_threshold=0 ) as disk_usage_monitor: # wait for monitor to trigger - time.sleep(1) + await asyncio.sleep(1) _, write_bytes = disk_usage_monitor._get_total_disk_usage() # NOTE: due to os disk cache reading is not reliable not testing it assert write_bytes > 0 # must still result busy assert disk_usage_monitor.is_busy is True + + +@pytest_asyncio.fixture +async def server_url() -> str: + return "http://localhost:8899" + + +@pytest_asyncio.fixture +async def tornado_server(server_url: str) -> None: + app = await activity_monitor.make_app() + + def _start_tornado(): + http_server = tornado.httpserver.HTTPServer(app) + http_server.listen(8899) + tornado.ioloop.IOLoop.current().start() + + def _stop_tornado(): + tornado.ioloop.IOLoop.current().stop() + + thread = threading.Thread(target=lambda: _start_tornado(), daemon=True) + thread.start() + + # ensure server is running + async for attempt in AsyncRetrying( + stop=stop_after_delay(3), wait=wait_fixed(0.1), reraise=True + ): + with attempt: + result = requests.get(f"{server_url}/", timeout=1) + assert result.status_code == 200, result.text + + yield None + + _stop_tornado() + + +@pytest.fixture +def mock_check_interval(mocker: MockFixture) -> None: + mocker.patch("activity_monitor.CHECK_INTERVAL_S", new=0.5) + + +@pytest.mark.asyncio +async def test_tornado_server_ok(mock_check_interval: None, tornado_server: None, server_url:str): + result = requests.get(f"{server_url}/", timeout=5) + assert result.status_code == 200 + + +async def test_activity_monitor_not_busy( + mock_check_interval: None, + socket_server: None, + mock__get_brother_processes: Callable[[list[int]], list[psutil.Process]], + create_activity_generator: Callable[[bool, bool, bool], _ActivityGenerator], + tornado_server: None, + server_url: str, +): + activity_generator = create_activity_generator(network=False, cpu=False, disk=False) + mock__get_brother_processes([activity_generator.get_pid()]) + + async for attempt in AsyncRetrying( + stop=stop_after_delay(10), wait=wait_fixed(0.1), reraise=True + ): + with attempt: + result = requests.get(f"{server_url}/", timeout=5) + assert result.status_code == 200 + response = result.json() + assert response["seconds_inactive"] > 0 + + result = requests.get(f"{server_url}/debug", timeout=5) + assert result.status_code == 200 + response = result.json() + assert response == { + "seconds_inactive": 0, + "cpu_usage": {"is_busy": True}, + "disk_usage": {"is_busy": True}, + "kernel_monitor": {"is_busy": True}, + } From b85c80cd162e7033eb3383e00998b4b75278574a Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Thu, 21 Mar 2024 15:20:55 +0100 Subject: [PATCH 22/52] refactor tests --- tests/test_activity_monitor.py | 31 +++++++++++++++++++++++-------- 1 file changed, 23 insertions(+), 8 deletions(-) diff --git a/tests/test_activity_monitor.py b/tests/test_activity_monitor.py index 7c08359..9b3153d 100644 --- a/tests/test_activity_monitor.py +++ b/tests/test_activity_monitor.py @@ -8,7 +8,7 @@ import threading import pytest_asyncio - +from queue import Queue from typing import Callable, TYPE_CHECKING from pytest_mock import MockFixture from tenacity import AsyncRetrying @@ -132,15 +132,24 @@ async def server_url() -> str: async def tornado_server(server_url: str) -> None: app = await activity_monitor.make_app() - def _start_tornado(): + stop_queue = Queue() + + def _run_server_worker(): http_server = tornado.httpserver.HTTPServer(app) http_server.listen(8899) - tornado.ioloop.IOLoop.current().start() + current_io_loop = tornado.ioloop.IOLoop.current() + + def _queue_stopper() -> None: + stop_queue.get() + current_io_loop.stop() - def _stop_tornado(): - tornado.ioloop.IOLoop.current().stop() + stopping_thread = threading.Thread(target=_queue_stopper, daemon=True) + stopping_thread.start() - thread = threading.Thread(target=lambda: _start_tornado(), daemon=True) + current_io_loop.start() + stopping_thread.join() + + thread = threading.Thread(target=_run_server_worker, daemon=True) thread.start() # ensure server is running @@ -153,7 +162,11 @@ def _stop_tornado(): yield None - _stop_tornado() + stop_queue.put(None) + thread.join(timeout=1) + + with pytest.raises(requests.exceptions.ReadTimeout): + requests.get(f"{server_url}/", timeout=1) @pytest.fixture @@ -162,7 +175,9 @@ def mock_check_interval(mocker: MockFixture) -> None: @pytest.mark.asyncio -async def test_tornado_server_ok(mock_check_interval: None, tornado_server: None, server_url:str): +async def test_tornado_server_ok( + mock_check_interval: None, tornado_server: None, server_url: str +): result = requests.get(f"{server_url}/", timeout=5) assert result.status_code == 200 From 6cd4ccfb525153603a8bdd3cad7a70d4a9d7c246 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Thu, 21 Mar 2024 15:50:27 +0100 Subject: [PATCH 23/52] final version of tests --- docker/activity_monitor.py | 18 ++++++++++------ tests/test_activity_monitor.py | 38 +++++++++++++++++++--------------- 2 files changed, 33 insertions(+), 23 deletions(-) diff --git a/docker/activity_monitor.py b/docker/activity_monitor.py index 817618a..a423c12 100755 --- a/docker/activity_monitor.py +++ b/docker/activity_monitor.py @@ -45,7 +45,8 @@ def _check_if_busy(self) -> bool: def _worker(self) -> None: while self._keep_running: - self.is_busy = self._check_if_busy() + with suppress(Exception): + self.is_busy = self._check_if_busy() time.sleep(self._poll_interval) def start(self) -> None: @@ -130,7 +131,7 @@ def __init__(self, poll_interval: float, *, busy_threshold: float): super().__init__(poll_interval=poll_interval) self.busy_threshold = busy_threshold - def _get_total_cpu_usage(self) -> float: + def get_total_cpu_usage(self) -> float: futures = [ self.thread_executor.submit( x.cpu_percent, self.CPU_USAGE_MONITORING_INTERVAL_S @@ -140,7 +141,7 @@ def _get_total_cpu_usage(self) -> float: return sum([future.result() for future in as_completed(futures)]) def _check_if_busy(self) -> bool: - return self._get_total_cpu_usage() > self.busy_threshold + return self.get_total_cpu_usage() > self.busy_threshold class DiskUsageMonitor(AbstractIsBusyMonitor): @@ -168,7 +169,7 @@ def _get_process_disk_usage(self, proc: psutil.Process) -> tuple[int, int]: write_bytes = io_end.write_bytes - io_start.write_bytes return read_bytes, write_bytes - def _get_total_disk_usage(self) -> tuple[int, int]: + def get_total_disk_usage(self) -> tuple[int, int]: futures = [ self.thread_executor.submit(self._get_process_disk_usage, x) for x in _get_brother_processes() @@ -186,7 +187,7 @@ def _get_total_disk_usage(self) -> tuple[int, int]: return read_bytes, write_bytes def _check_if_busy(self) -> bool: - read_bytes, write_bytes = self._get_total_disk_usage() + read_bytes, write_bytes = self.get_total_disk_usage() return ( read_bytes > self.read_usage_threshold or write_bytes > self.write_usage_threshold @@ -229,6 +230,9 @@ def get_idle_seconds(self) -> float: return idle_seconds if idle_seconds > 0 else 0 async def run(self): + self.jupyter_kernel_monitor.start() + self.cpu_usage_monitor.start() + self.disk_usage_monitor.start() while True: with suppress(Exception): self.check() @@ -247,9 +251,11 @@ async def get(self): "seconds_inactive": self.activity_manager.get_idle_seconds(), "cpu_usage": { "is_busy": self.activity_manager.cpu_usage_monitor.is_busy, + "total": self.activity_manager.cpu_usage_monitor.get_total_cpu_usage(), }, "disk_usage": { - "is_busy": self.activity_manager.disk_usage_monitor.is_busy + "is_busy": self.activity_manager.disk_usage_monitor.is_busy, + "total": self.activity_manager.disk_usage_monitor.get_total_disk_usage(), }, "kernel_monitor": { "is_busy": self.activity_manager.jupyter_kernel_monitor.is_busy diff --git a/tests/test_activity_monitor.py b/tests/test_activity_monitor.py index 9b3153d..0398204 100644 --- a/tests/test_activity_monitor.py +++ b/tests/test_activity_monitor.py @@ -60,7 +60,7 @@ async def test_cpu_usage_monitor_not_busy( stop=stop_after_delay(5), wait=wait_fixed(0.1), reraise=True ): with attempt: - assert cpu_usage_monitor._get_total_cpu_usage() == 0 + assert cpu_usage_monitor.get_total_cpu_usage() == 0 assert cpu_usage_monitor.is_busy is False @@ -77,7 +77,7 @@ async def test_cpu_usage_monitor_still_busy( await asyncio.sleep(1) # must still result busy - assert cpu_usage_monitor._get_total_cpu_usage() > 0 + assert cpu_usage_monitor.get_total_cpu_usage() > 0 assert cpu_usage_monitor.is_busy is True @@ -96,7 +96,7 @@ async def test_disk_usage_monitor_not_busy( stop=stop_after_delay(5), wait=wait_fixed(0.1), reraise=True ): with attempt: - read_bytes, write_bytes = disk_usage_monitor._get_total_disk_usage() + read_bytes, write_bytes = disk_usage_monitor.get_total_disk_usage() assert read_bytes == 0 assert write_bytes == 0 assert disk_usage_monitor.is_busy is False @@ -115,7 +115,7 @@ async def test_disk_usage_monitor_still_busy( ) as disk_usage_monitor: # wait for monitor to trigger await asyncio.sleep(1) - _, write_bytes = disk_usage_monitor._get_total_disk_usage() + _, write_bytes = disk_usage_monitor.get_total_disk_usage() # NOTE: due to os disk cache reading is not reliable not testing it assert write_bytes > 0 @@ -128,8 +128,13 @@ async def server_url() -> str: return "http://localhost:8899" +@pytest.fixture +def mock_jupyter_kernel_monitor(mocker: MockFixture) -> None: + activity_monitor.JupyterKernelMonitor._are_kernels_busy = lambda _: False + + @pytest_asyncio.fixture -async def tornado_server(server_url: str) -> None: +async def tornado_server(mock_jupyter_kernel_monitor: None, server_url: str) -> None: app = await activity_monitor.make_app() stop_queue = Queue() @@ -171,7 +176,8 @@ def _queue_stopper() -> None: @pytest.fixture def mock_check_interval(mocker: MockFixture) -> None: - mocker.patch("activity_monitor.CHECK_INTERVAL_S", new=0.5) + mocker.patch("activity_monitor.CHECK_INTERVAL_S", new=1) + assert activity_monitor.CHECK_INTERVAL_S == 1 @pytest.mark.asyncio @@ -182,7 +188,7 @@ async def test_tornado_server_ok( assert result.status_code == 200 -async def test_activity_monitor_not_busy( +async def test_activity_monitor_becomes_not_busy( mock_check_interval: None, socket_server: None, mock__get_brother_processes: Callable[[list[int]], list[psutil.Process]], @@ -197,17 +203,15 @@ async def test_activity_monitor_not_busy( stop=stop_after_delay(10), wait=wait_fixed(0.1), reraise=True ): with attempt: - result = requests.get(f"{server_url}/", timeout=5) + # check that all become not busy + result = requests.get(f"{server_url}/debug", timeout=5) assert result.status_code == 200 - response = result.json() - assert response["seconds_inactive"] > 0 + debug_response = result.json() + assert debug_response["cpu_usage"]["is_busy"] is False + assert debug_response["disk_usage"]["is_busy"] is False + assert debug_response["kernel_monitor"]["is_busy"] is False - result = requests.get(f"{server_url}/debug", timeout=5) + result = requests.get(f"{server_url}/", timeout=2) assert result.status_code == 200 response = result.json() - assert response == { - "seconds_inactive": 0, - "cpu_usage": {"is_busy": True}, - "disk_usage": {"is_busy": True}, - "kernel_monitor": {"is_busy": True}, - } + assert response["seconds_inactive"] > 0 From 6a5ea59de03db5e3883fa11f2ede478ef4e323ff Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Thu, 21 Mar 2024 16:00:42 +0100 Subject: [PATCH 24/52] fixed tornado shutdown --- tests/test_activity_monitor.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/test_activity_monitor.py b/tests/test_activity_monitor.py index 0398204..56373e3 100644 --- a/tests/test_activity_monitor.py +++ b/tests/test_activity_monitor.py @@ -154,6 +154,10 @@ def _queue_stopper() -> None: current_io_loop.start() stopping_thread.join() + # cleanly shut down tornado server and loop + current_io_loop.close() + http_server.stop() + thread = threading.Thread(target=_run_server_worker, daemon=True) thread.start() From 7235d5cf98c5341f97ed058bd30eaddd4398cf5e Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Thu, 21 Mar 2024 16:44:09 +0100 Subject: [PATCH 25/52] fixed tests --- docker/activity_monitor.py | 2 ++ requirements/test.in | 1 + requirements/test.txt | 6 +++++ tests/test_activity_monitor.py | 40 +++++++++++++++++++++++++--------- 4 files changed, 39 insertions(+), 10 deletions(-) diff --git a/docker/activity_monitor.py b/docker/activity_monitor.py index a423c12..1b20449 100755 --- a/docker/activity_monitor.py +++ b/docker/activity_monitor.py @@ -16,6 +16,8 @@ from abc import abstractmethod +LISTEN_PORT: Final[int] = 19597 + CHECK_INTERVAL_S: Final[float] = 5 THREAD_EXECUTOR_WORKERS: Final[int] = 10 diff --git a/requirements/test.in b/requirements/test.in index e41c02b..4749a2a 100644 --- a/requirements/test.in +++ b/requirements/test.in @@ -9,4 +9,5 @@ pytest pytest-asyncio pytest-mock requests +requests-mock tenacity \ No newline at end of file diff --git a/requirements/test.txt b/requirements/test.txt index 6f731a3..ca58c6c 100644 --- a/requirements/test.txt +++ b/requirements/test.txt @@ -30,7 +30,13 @@ pytest-asyncio==0.23.6 pytest-mock==3.12.0 # via -r requirements/test.in requests==2.31.0 + # via + # -r requirements/test.in + # requests-mock +requests-mock==1.11.0 # via -r requirements/test.in +six==1.16.0 + # via requests-mock tenacity==8.2.3 # via -r requirements/test.in tomli==2.0.1 diff --git a/tests/test_activity_monitor.py b/tests/test_activity_monitor.py index 56373e3..101423a 100644 --- a/tests/test_activity_monitor.py +++ b/tests/test_activity_monitor.py @@ -1,21 +1,24 @@ import asyncio import pytest import psutil -import requests import tornado.web +import json import tornado.httpserver import tornado.ioloop import threading import pytest_asyncio +import requests +import requests_mock from queue import Queue -from typing import Callable, TYPE_CHECKING +from typing import Callable, Iterable, TYPE_CHECKING from pytest_mock import MockFixture from tenacity import AsyncRetrying from tenacity.stop import stop_after_delay from tenacity.wait import wait_fixed from conftest import _ActivityGenerator + if TYPE_CHECKING: from ..docker import activity_monitor else: @@ -123,14 +126,30 @@ async def test_disk_usage_monitor_still_busy( assert disk_usage_monitor.is_busy is True -@pytest_asyncio.fixture -async def server_url() -> str: - return "http://localhost:8899" +@pytest.fixture +def mock_jupyter_kernel_monitor(are_kernels_busy: bool) -> Iterable[None]: + with requests_mock.Mocker(real_http=True) as m: + m.get("http://localhost:8888/api/kernels", text=json.dumps([{"id": "atest1"}])) + m.get( + "http://localhost:8888/api/kernels/atest1", + text=json.dumps( + {"execution_state": "running" if are_kernels_busy else "idle"} + ), + ) + yield -@pytest.fixture -def mock_jupyter_kernel_monitor(mocker: MockFixture) -> None: - activity_monitor.JupyterKernelMonitor._are_kernels_busy = lambda _: False +@pytest.mark.parametrize("are_kernels_busy", [True, False]) +def test_jupyter_kernel_monitor( + mock_jupyter_kernel_monitor: None, are_kernels_busy: bool +): + kernel_monitor = activity_monitor.JupyterKernelMonitor(1) + assert kernel_monitor._are_kernels_busy() is are_kernels_busy + + +@pytest_asyncio.fixture +async def server_url() -> str: + return f"http://localhost:{activity_monitor.LISTEN_PORT}" @pytest_asyncio.fixture @@ -141,7 +160,7 @@ async def tornado_server(mock_jupyter_kernel_monitor: None, server_url: str) -> def _run_server_worker(): http_server = tornado.httpserver.HTTPServer(app) - http_server.listen(8899) + http_server.listen(activity_monitor.LISTEN_PORT) current_io_loop = tornado.ioloop.IOLoop.current() def _queue_stopper() -> None: @@ -184,7 +203,7 @@ def mock_check_interval(mocker: MockFixture) -> None: assert activity_monitor.CHECK_INTERVAL_S == 1 -@pytest.mark.asyncio +@pytest.mark.parametrize("are_kernels_busy", [False]) async def test_tornado_server_ok( mock_check_interval: None, tornado_server: None, server_url: str ): @@ -192,6 +211,7 @@ async def test_tornado_server_ok( assert result.status_code == 200 +@pytest.mark.parametrize("are_kernels_busy", [False]) async def test_activity_monitor_becomes_not_busy( mock_check_interval: None, socket_server: None, From 162c972fd0f405c9c00fbaadabe46a89419738cf Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Thu, 21 Mar 2024 16:48:51 +0100 Subject: [PATCH 26/52] added ci scripts --- .github/workflows/check-image.yml | 2 ++ scripts/ci/run_tests.sh | 5 +++++ 2 files changed, 7 insertions(+) create mode 100755 scripts/ci/run_tests.sh diff --git a/.github/workflows/check-image.yml b/.github/workflows/check-image.yml index 168acd3..44763c7 100644 --- a/.github/workflows/check-image.yml +++ b/.github/workflows/check-image.yml @@ -16,6 +16,8 @@ jobs: uses: docker://itisfoundation/ci-service-integration-library:v1.0.3-dev-10 with: args: ooil compose + - name: Run local tests + run: ./scripts/ci/run_tests.sh - name: Build all images if multiple uses: docker://itisfoundation/ci-service-integration-library:v1.0.3-dev-10 with: diff --git a/scripts/ci/run_tests.sh b/scripts/ci/run_tests.sh new file mode 100755 index 0000000..215ee98 --- /dev/null +++ b/scripts/ci/run_tests.sh @@ -0,0 +1,5 @@ +#/bin/sh +make .venv +source .venv/bin/activate +make install-dev +make tests-local \ No newline at end of file From 5e578de4e87dca93a4a991f8d0336852833bcf13 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Thu, 21 Mar 2024 16:50:34 +0100 Subject: [PATCH 27/52] fix tests --- Makefile | 4 ++++ scripts/ci/run_tests.sh | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 655e9a2..416eab1 100644 --- a/Makefile +++ b/Makefile @@ -75,6 +75,10 @@ install-dev: ## run tests in development mode tests-dev: ## run tests in development mode .venv/bin/pytest --pdb -vvv tests +.PHONY: tests-ci +tests-dev: ## run tests in development mode + .venv/bin/pytest -vvv tests + .PHONY: help help: ## this colorful help @echo "Recipes for '$(notdir $(CURDIR))':" diff --git a/scripts/ci/run_tests.sh b/scripts/ci/run_tests.sh index 215ee98..253f528 100755 --- a/scripts/ci/run_tests.sh +++ b/scripts/ci/run_tests.sh @@ -2,4 +2,4 @@ make .venv source .venv/bin/activate make install-dev -make tests-local \ No newline at end of file +make tests-ci \ No newline at end of file From cbafd57d1507b616ed8e687a1548a3e167436cc9 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Thu, 21 Mar 2024 16:51:20 +0100 Subject: [PATCH 28/52] refactor --- .github/workflows/check-image.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/check-image.yml b/.github/workflows/check-image.yml index 44763c7..71a8e44 100644 --- a/.github/workflows/check-image.yml +++ b/.github/workflows/check-image.yml @@ -8,6 +8,8 @@ jobs: steps: - name: Checkout repo content uses: actions/checkout@v2 + - name: run tests + run: ./scripts/ci/run_tests.sh - name: ooil version uses: docker://itisfoundation/ci-service-integration-library:v1.0.3-dev-10 with: @@ -16,8 +18,6 @@ jobs: uses: docker://itisfoundation/ci-service-integration-library:v1.0.3-dev-10 with: args: ooil compose - - name: Run local tests - run: ./scripts/ci/run_tests.sh - name: Build all images if multiple uses: docker://itisfoundation/ci-service-integration-library:v1.0.3-dev-10 with: From a648f43fc111e3fb763cbf775bf40b3a5dfdb704 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Thu, 21 Mar 2024 16:52:09 +0100 Subject: [PATCH 29/52] fix test --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 416eab1..92e6e6b 100644 --- a/Makefile +++ b/Makefile @@ -76,7 +76,7 @@ tests-dev: ## run tests in development mode .venv/bin/pytest --pdb -vvv tests .PHONY: tests-ci -tests-dev: ## run tests in development mode +tests-ci: ## run testds in the CI .venv/bin/pytest -vvv tests .PHONY: help From c6169153f3307b94b01e7be3d00eb7b0aae88665 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Thu, 21 Mar 2024 16:54:22 +0100 Subject: [PATCH 30/52] refactor --- scripts/ci/run_tests.sh | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/scripts/ci/run_tests.sh b/scripts/ci/run_tests.sh index 253f528..3dd1ad5 100755 --- a/scripts/ci/run_tests.sh +++ b/scripts/ci/run_tests.sh @@ -1,4 +1,11 @@ #/bin/sh + +# http://redsymbol.net/articles/unofficial-bash-strict-mode/ +set -o errexit # abort on nonzero exitstatus +set -o nounset # abort on unbound variable +set -o pipefail # don't hide errors within pipes +IFS=$'\n\t' + make .venv source .venv/bin/activate make install-dev From 9c3410775de990c39fccd3d255d51615cb127405 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Fri, 22 Mar 2024 11:14:45 +0100 Subject: [PATCH 31/52] add coverage and fail rate --- Makefile | 6 +++--- requirements/test.in | 1 + requirements/test.txt | 9 ++++++++- tests/test_activity_monitor.py | 4 ++++ 4 files changed, 16 insertions(+), 4 deletions(-) diff --git a/Makefile b/Makefile index 92e6e6b..0b4abf3 100644 --- a/Makefile +++ b/Makefile @@ -67,8 +67,8 @@ publish-local: ## push to local throw away registry to test integration docker push registry:5000/simcore/services/dynamic/$(DOCKER_IMAGE_NAME):$(DOCKER_IMAGE_TAG) @curl registry:5000/v2/_catalog | jq -.PHONY: install-dev -install-dev: ## run tests in development mode +.PHONY: install-test +install-test: ## install dependencies for testing pip install -r requirements/test.txt .PHONY: tests-dev @@ -77,7 +77,7 @@ tests-dev: ## run tests in development mode .PHONY: tests-ci tests-ci: ## run testds in the CI - .venv/bin/pytest -vvv tests + .venv/bin/pytest -vvv --color=yes --cov-report term --cov=activity_monitor tests .PHONY: help help: ## this colorful help diff --git a/requirements/test.in b/requirements/test.in index 4749a2a..202a1b1 100644 --- a/requirements/test.in +++ b/requirements/test.in @@ -7,6 +7,7 @@ tornado pytest pytest-asyncio +pytest-cov pytest-mock requests requests-mock diff --git a/requirements/test.txt b/requirements/test.txt index ca58c6c..43fae43 100644 --- a/requirements/test.txt +++ b/requirements/test.txt @@ -8,6 +8,8 @@ certifi==2024.2.2 # via requests charset-normalizer==3.3.2 # via requests +coverage[toml]==7.4.4 + # via pytest-cov exceptiongroup==1.2.0 # via pytest idna==3.6 @@ -24,9 +26,12 @@ pytest==8.1.1 # via # -r requirements/test.in # pytest-asyncio + # pytest-cov # pytest-mock pytest-asyncio==0.23.6 # via -r requirements/test.in +pytest-cov==4.1.0 + # via -r requirements/test.in pytest-mock==3.12.0 # via -r requirements/test.in requests==2.31.0 @@ -40,7 +45,9 @@ six==1.16.0 tenacity==8.2.3 # via -r requirements/test.in tomli==2.0.1 - # via pytest + # via + # coverage + # pytest tornado==6.4 # via -r requirements/test.in urllib3==2.2.1 diff --git a/tests/test_activity_monitor.py b/tests/test_activity_monitor.py index 101423a..8296f4a 100644 --- a/tests/test_activity_monitor.py +++ b/tests/test_activity_monitor.py @@ -239,3 +239,7 @@ async def test_activity_monitor_becomes_not_busy( assert result.status_code == 200 response = result.json() assert response["seconds_inactive"] > 0 + + +def test_always_fails(): + assert False \ No newline at end of file From b4dfd7405391302089ebe794d9f8c03b0dcd4544 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Fri, 22 Mar 2024 11:16:53 +0100 Subject: [PATCH 32/52] fix test script --- scripts/ci/run_tests.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/ci/run_tests.sh b/scripts/ci/run_tests.sh index 3dd1ad5..e1d4c8f 100755 --- a/scripts/ci/run_tests.sh +++ b/scripts/ci/run_tests.sh @@ -8,5 +8,5 @@ IFS=$'\n\t' make .venv source .venv/bin/activate -make install-dev +make install-test make tests-ci \ No newline at end of file From b8ee98b61df564ea934ae5e5ae837edd0295082b Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Fri, 22 Mar 2024 11:19:55 +0100 Subject: [PATCH 33/52] removed failing test --- tests/test_activity_monitor.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/tests/test_activity_monitor.py b/tests/test_activity_monitor.py index 8296f4a..101423a 100644 --- a/tests/test_activity_monitor.py +++ b/tests/test_activity_monitor.py @@ -239,7 +239,3 @@ async def test_activity_monitor_becomes_not_busy( assert result.status_code == 200 response = result.json() assert response["seconds_inactive"] > 0 - - -def test_always_fails(): - assert False \ No newline at end of file From 4fea5b12a34912c2845ceb4520c4b6901038b7ba Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Fri, 22 Mar 2024 11:20:11 +0100 Subject: [PATCH 34/52] update gitignore --- .gitignore | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index be71fd5..7c0d726 100644 --- a/.gitignore +++ b/.gitignore @@ -53,4 +53,5 @@ docker-compose.yml # ignore all files in validation validation/ -*.ignore.* \ No newline at end of file +*.ignore.* +.coverage \ No newline at end of file From 0158da63af9bbb9df61904835c836dcb88f8be91 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Fri, 22 Mar 2024 15:00:18 +0100 Subject: [PATCH 35/52] refactor --- Makefile | 2 +- README.md | 9 +++++++++ docker/activity_monitor.py | 6 +++--- 3 files changed, 13 insertions(+), 4 deletions(-) diff --git a/Makefile b/Makefile index 0b4abf3..15bf5c5 100644 --- a/Makefile +++ b/Makefile @@ -77,7 +77,7 @@ tests-dev: ## run tests in development mode .PHONY: tests-ci tests-ci: ## run testds in the CI - .venv/bin/pytest -vvv --color=yes --cov-report term --cov=activity_monitor tests + .venv/bin/pytest -vvv --color=yes --cov-report term --cov=activity_monitor tests .PHONY: help help: ## this colorful help diff --git a/README.md b/README.md index 096624f..516169d 100644 --- a/README.md +++ b/README.md @@ -29,6 +29,15 @@ If you already have a local copy of **o2S2PARC** running a make publish-local ``` +Setup for test development locally: + +```shell +make devenv +source .venv/bin/activate +make tests-dev +``` + + ### Testing manually After a new service version has been published on the master deployment, it can be manually tested. For example a Template, called "Test Jupyter-math 2.0.9 ipywidgets" can be used for internal testing on the master deployment. diff --git a/docker/activity_monitor.py b/docker/activity_monitor.py index 1b20449..a89bbe8 100755 --- a/docker/activity_monitor.py +++ b/docker/activity_monitor.py @@ -76,7 +76,7 @@ def __get_children_processes(pid) -> list[psutil.Process]: return [] -def _get_brother_processes() -> list[psutil.Process]: +def _get_sibling_processes() -> list[psutil.Process]: # Returns the CPU usage of all processes except this one. # ASSUMPTIONS: # - `CURRENT_PROC` is a child of root process @@ -138,7 +138,7 @@ def get_total_cpu_usage(self) -> float: self.thread_executor.submit( x.cpu_percent, self.CPU_USAGE_MONITORING_INTERVAL_S ) - for x in _get_brother_processes() + for x in _get_sibling_processes() ] return sum([future.result() for future in as_completed(futures)]) @@ -174,7 +174,7 @@ def _get_process_disk_usage(self, proc: psutil.Process) -> tuple[int, int]: def get_total_disk_usage(self) -> tuple[int, int]: futures = [ self.thread_executor.submit(self._get_process_disk_usage, x) - for x in _get_brother_processes() + for x in _get_sibling_processes() ] disk_usage: list[tuple[int, int]] = [ From ee9e253db768183b79c530474b40feb8b3273e82 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Fri, 22 Mar 2024 16:23:49 +0100 Subject: [PATCH 36/52] refactor CPUUsage to be none blocking --- docker/activity_monitor.py | 76 ++++++++++++++++++++++++++++------ tests/test_activity_monitor.py | 28 ++++++------- 2 files changed, 78 insertions(+), 26 deletions(-) diff --git a/docker/activity_monitor.py b/docker/activity_monitor.py index a89bbe8..cd91823 100755 --- a/docker/activity_monitor.py +++ b/docker/activity_monitor.py @@ -12,7 +12,7 @@ from concurrent.futures import ThreadPoolExecutor, as_completed from contextlib import suppress from datetime import datetime -from typing import Final +from typing import Final, TypeAlias from abc import abstractmethod @@ -69,7 +69,7 @@ def __exit__(self, exc_type, exc_value, traceback): self.stop() -def __get_children_processes(pid) -> list[psutil.Process]: +def __get_children_processes_recursive(pid) -> list[psutil.Process]: try: return psutil.Process(pid).children(recursive=True) except psutil.NoSuchProcess: @@ -90,8 +90,8 @@ def _get_sibling_processes() -> list[psutil.Process]: # - pN current_process = psutil.Process() parent_pid = current_process.ppid() - children = __get_children_processes(parent_pid) - return [c for c in children if c.pid != current_process.pid] + all_children = __get_children_processes_recursive(parent_pid) + return [c for c in all_children if c.pid != current_process.pid] # Monitors @@ -126,6 +126,11 @@ def _check_if_busy(self) -> bool: return self._are_kernels_busy() +ProcessID: TypeAlias = int +TimeSeconds: TypeAlias = float +PercentCPU: TypeAlias = float + + class CPUUsageMonitor(AbstractIsBusyMonitor): CPU_USAGE_MONITORING_INTERVAL_S: Final[float] = 1 @@ -133,17 +138,53 @@ def __init__(self, poll_interval: float, *, busy_threshold: float): super().__init__(poll_interval=poll_interval) self.busy_threshold = busy_threshold - def get_total_cpu_usage(self) -> float: + # snapshot + self._last_sample: dict[ProcessID, tuple[TimeSeconds, PercentCPU]] = ( + self._sample_total_cpu_usage() + ) + + def _sample_cpu_usage( + self, process: psutil.Process + ) -> tuple[ProcessID, tuple[TimeSeconds, PercentCPU]]: + # returns a tuple[pid, tuple[time, percent_cpu_usage]] + return (process.pid, (time.time(), process.cpu_percent())) + + def _sample_total_cpu_usage( + self, + ) -> dict[ProcessID, tuple[TimeSeconds, PercentCPU]]: futures = [ - self.thread_executor.submit( - x.cpu_percent, self.CPU_USAGE_MONITORING_INTERVAL_S - ) - for x in _get_sibling_processes() + self.thread_executor.submit(self._sample_cpu_usage, p) + for p in _get_sibling_processes() ] - return sum([future.result() for future in as_completed(futures)]) + return dict([f.result() for f in as_completed(futures)]) + + @staticmethod + def get_cpu_over_1_second( + last: tuple[TimeSeconds, PercentCPU], current: tuple[TimeSeconds, PercentCPU] + ) -> float: + interval = current[0] - last[0] + measured_cpu_in_interval = current[1] + # cpu_over_1_second[%] = 1[s] * measured_cpu_in_interval[%] / interval[s] + return measured_cpu_in_interval / interval + + def get_total_cpu_usage_over_1_second(self) -> float: + current_sample = self._sample_total_cpu_usage() + + total_cpu: float = 0 + for pid, time_and_cpu_usage in current_sample.items(): + if pid not in self._last_sample: + continue # skip if not found + + last_time_and_cpu_usage = self._last_sample[pid] + total_cpu += self.get_cpu_over_1_second( + last_time_and_cpu_usage, time_and_cpu_usage + ) + + self._last_sample = current_sample # replace + return total_cpu def _check_if_busy(self) -> bool: - return self.get_total_cpu_usage() > self.busy_threshold + return self.get_total_cpu_usage_over_1_second() > self.busy_threshold class DiskUsageMonitor(AbstractIsBusyMonitor): @@ -161,7 +202,12 @@ def __init__( self.write_usage_threshold = write_usage_threshold self.executor = ThreadPoolExecutor(max_workers=THREAD_EXECUTOR_WORKERS) + # TODO: can we refactor these to take advantage of the internal sleep? + # and then report partial counters instead of doing it like it is now def _get_process_disk_usage(self, proc: psutil.Process) -> tuple[int, int]: + # between the two calls it can fail! + # rewrite to make it better! + # store previous counters -> measure and update again io_start = proc.io_counters() time.sleep(self.DISK_USAGE_MONITORING_INTERVAL_S) io_end = proc.io_counters() @@ -196,6 +242,12 @@ def _check_if_busy(self) -> bool: ) +class NetworkUsageMonitor(AbstractIsBusyMonitor): + NETWORK_USAGE_MONITORING_INTERVAL_S: Final[float] = 1 + + # Measure at regular intervals (is busy calls) + + class ActivityManager: def __init__(self, interval: float) -> None: self.interval = interval @@ -253,7 +305,7 @@ async def get(self): "seconds_inactive": self.activity_manager.get_idle_seconds(), "cpu_usage": { "is_busy": self.activity_manager.cpu_usage_monitor.is_busy, - "total": self.activity_manager.cpu_usage_monitor.get_total_cpu_usage(), + "total": self.activity_manager.cpu_usage_monitor.get_total_cpu_usage_over_1_second(), }, "disk_usage": { "is_busy": self.activity_manager.disk_usage_monitor.is_busy, diff --git a/tests/test_activity_monitor.py b/tests/test_activity_monitor.py index 101423a..dce5523 100644 --- a/tests/test_activity_monitor.py +++ b/tests/test_activity_monitor.py @@ -31,7 +31,7 @@ @pytest.fixture -def mock__get_brother_processes( +def mock__get_sibling_processes( mocker: MockFixture, ) -> Callable[[list[int]], list[psutil.Process]]: def _get_processes(pids: list[int]) -> list[psutil.Process]: @@ -44,7 +44,7 @@ def _get_processes(pids: list[int]) -> list[psutil.Process]: def _(pids: list[int]) -> None: mocker.patch( - "activity_monitor._get_brother_processes", return_value=_get_processes(pids) + "activity_monitor._get_sibling_processes", return_value=_get_processes(pids) ) return _ @@ -52,45 +52,45 @@ def _(pids: list[int]) -> None: async def test_cpu_usage_monitor_not_busy( socket_server: None, - mock__get_brother_processes: Callable[[list[int]], list[psutil.Process]], + mock__get_sibling_processes: Callable[[list[int]], list[psutil.Process]], create_activity_generator: Callable[[bool, bool, bool], _ActivityGenerator], ): activity_generator = create_activity_generator(network=False, cpu=False, disk=False) - mock__get_brother_processes([activity_generator.get_pid()]) + mock__get_sibling_processes([activity_generator.get_pid()]) with activity_monitor.CPUUsageMonitor(1, busy_threshold=5) as cpu_usage_monitor: async for attempt in AsyncRetrying( stop=stop_after_delay(5), wait=wait_fixed(0.1), reraise=True ): with attempt: - assert cpu_usage_monitor.get_total_cpu_usage() == 0 + assert cpu_usage_monitor.get_total_cpu_usage_over_1_second() == 0 assert cpu_usage_monitor.is_busy is False async def test_cpu_usage_monitor_still_busy( socket_server: None, - mock__get_brother_processes: Callable[[list[int]], list[psutil.Process]], + mock__get_sibling_processes: Callable[[list[int]], list[psutil.Process]], create_activity_generator: Callable[[bool, bool, bool], _ActivityGenerator], ): activity_generator = create_activity_generator(network=False, cpu=True, disk=False) - mock__get_brother_processes([activity_generator.get_pid()]) + mock__get_sibling_processes([activity_generator.get_pid()]) with activity_monitor.CPUUsageMonitor(0.5, busy_threshold=5) as cpu_usage_monitor: # wait for monitor to trigger await asyncio.sleep(1) # must still result busy - assert cpu_usage_monitor.get_total_cpu_usage() > 0 + assert cpu_usage_monitor.get_total_cpu_usage_over_1_second() > 0 assert cpu_usage_monitor.is_busy is True async def test_disk_usage_monitor_not_busy( socket_server: None, - mock__get_brother_processes: Callable[[list[int]], list[psutil.Process]], + mock__get_sibling_processes: Callable[[list[int]], list[psutil.Process]], create_activity_generator: Callable[[bool, bool, bool], _ActivityGenerator], ): activity_generator = create_activity_generator(network=False, cpu=False, disk=False) - mock__get_brother_processes([activity_generator.get_pid()]) + mock__get_sibling_processes([activity_generator.get_pid()]) with activity_monitor.DiskUsageMonitor( 0.5, read_usage_threshold=0, write_usage_threshold=0 @@ -107,11 +107,11 @@ async def test_disk_usage_monitor_not_busy( async def test_disk_usage_monitor_still_busy( socket_server: None, - mock__get_brother_processes: Callable[[list[int]], list[psutil.Process]], + mock__get_sibling_processes: Callable[[list[int]], list[psutil.Process]], create_activity_generator: Callable[[bool, bool, bool], _ActivityGenerator], ): activity_generator = create_activity_generator(network=False, cpu=False, disk=True) - mock__get_brother_processes([activity_generator.get_pid()]) + mock__get_sibling_processes([activity_generator.get_pid()]) with activity_monitor.DiskUsageMonitor( 0.5, read_usage_threshold=0, write_usage_threshold=0 @@ -215,13 +215,13 @@ async def test_tornado_server_ok( async def test_activity_monitor_becomes_not_busy( mock_check_interval: None, socket_server: None, - mock__get_brother_processes: Callable[[list[int]], list[psutil.Process]], + mock__get_sibling_processes: Callable[[list[int]], list[psutil.Process]], create_activity_generator: Callable[[bool, bool, bool], _ActivityGenerator], tornado_server: None, server_url: str, ): activity_generator = create_activity_generator(network=False, cpu=False, disk=False) - mock__get_brother_processes([activity_generator.get_pid()]) + mock__get_sibling_processes([activity_generator.get_pid()]) async for attempt in AsyncRetrying( stop=stop_after_delay(10), wait=wait_fixed(0.1), reraise=True From 9045dfaed46dfa9e2fcdda4b6824f7c4d6e50d61 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Mon, 25 Mar 2024 08:34:51 +0100 Subject: [PATCH 37/52] refactor tests --- tests/test_activity_monitor.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/tests/test_activity_monitor.py b/tests/test_activity_monitor.py index dce5523..64d570b 100644 --- a/tests/test_activity_monitor.py +++ b/tests/test_activity_monitor.py @@ -63,7 +63,7 @@ async def test_cpu_usage_monitor_not_busy( stop=stop_after_delay(5), wait=wait_fixed(0.1), reraise=True ): with attempt: - assert cpu_usage_monitor.get_total_cpu_usage_over_1_second() == 0 + assert cpu_usage_monitor.total_cpu_usage == 0 assert cpu_usage_monitor.is_busy is False @@ -80,7 +80,7 @@ async def test_cpu_usage_monitor_still_busy( await asyncio.sleep(1) # must still result busy - assert cpu_usage_monitor.get_total_cpu_usage_over_1_second() > 0 + assert cpu_usage_monitor.total_cpu_usage > 0 assert cpu_usage_monitor.is_busy is True @@ -140,11 +140,12 @@ def mock_jupyter_kernel_monitor(are_kernels_busy: bool) -> Iterable[None]: @pytest.mark.parametrize("are_kernels_busy", [True, False]) -def test_jupyter_kernel_monitor( +async def test_jupyter_kernel_monitor( mock_jupyter_kernel_monitor: None, are_kernels_busy: bool ): kernel_monitor = activity_monitor.JupyterKernelMonitor(1) - assert kernel_monitor._are_kernels_busy() is are_kernels_busy + kernel_monitor._update_kernels_activity() + assert kernel_monitor.are_kernels_busy is are_kernels_busy @pytest_asyncio.fixture From 53544b34a2a0ad04b357ba32af740fce9822286e Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Mon, 25 Mar 2024 09:12:29 +0100 Subject: [PATCH 38/52] refactor with non blocking pattern --- docker/activity_monitor.py | 149 +++++++++++++++++++++------------ tests/test_activity_monitor.py | 5 +- 2 files changed, 99 insertions(+), 55 deletions(-) diff --git a/docker/activity_monitor.py b/docker/activity_monitor.py index cd91823..89164bf 100755 --- a/docker/activity_monitor.py +++ b/docker/activity_monitor.py @@ -2,6 +2,7 @@ import asyncio +import logging import json import psutil import requests @@ -16,6 +17,9 @@ from abc import abstractmethod +_logger = logging.getLogger(__name__) + + LISTEN_PORT: Final[int] = 19597 CHECK_INTERVAL_S: Final[float] = 5 @@ -47,8 +51,10 @@ def _check_if_busy(self) -> bool: def _worker(self) -> None: while self._keep_running: - with suppress(Exception): + try: self.is_busy = self._check_if_busy() + except Exception as e: + _logger.exception("Failed to check if busy") time.sleep(self._poll_interval) def start(self) -> None: @@ -103,12 +109,13 @@ class JupyterKernelMonitor(AbstractIsBusyMonitor): def __init__(self, poll_interval: float) -> None: super().__init__(poll_interval=poll_interval) + self.are_kernels_busy: bool = False def _get(self, path: str) -> dict: r = requests.get(f"{self.BASE_URL}{path}", headers=self.HEADERS) return r.json() - def _are_kernels_busy(self) -> bool: + def _update_kernels_activity(self) -> None: json_response = self._get("/api/kernels") are_kernels_busy = False @@ -120,10 +127,11 @@ def _are_kernels_busy(self) -> bool: if kernel_info["execution_state"] != "idle": are_kernels_busy = True - return are_kernels_busy + self.are_kernels_busy = are_kernels_busy def _check_if_busy(self) -> bool: - return self._are_kernels_busy() + self._update_kernels_activity() + return self.are_kernels_busy ProcessID: TypeAlias = int @@ -132,7 +140,9 @@ def _check_if_busy(self) -> bool: class CPUUsageMonitor(AbstractIsBusyMonitor): - CPU_USAGE_MONITORING_INTERVAL_S: Final[float] = 1 + """At regular intervals computes the total CPU usage + and averages over 1 second. + """ def __init__(self, poll_interval: float, *, busy_threshold: float): super().__init__(poll_interval=poll_interval) @@ -142,11 +152,13 @@ def __init__(self, poll_interval: float, *, busy_threshold: float): self._last_sample: dict[ProcessID, tuple[TimeSeconds, PercentCPU]] = ( self._sample_total_cpu_usage() ) + self.total_cpu_usage: float = 0 + @staticmethod def _sample_cpu_usage( - self, process: psutil.Process + process: psutil.Process, ) -> tuple[ProcessID, tuple[TimeSeconds, PercentCPU]]: - # returns a tuple[pid, tuple[time, percent_cpu_usage]] + """returns: tuple[pid, tuple[time, percent_cpu_usage]]""" return (process.pid, (time.time(), process.cpu_percent())) def _sample_total_cpu_usage( @@ -159,7 +171,7 @@ def _sample_total_cpu_usage( return dict([f.result() for f in as_completed(futures)]) @staticmethod - def get_cpu_over_1_second( + def _get_cpu_over_1_second( last: tuple[TimeSeconds, PercentCPU], current: tuple[TimeSeconds, PercentCPU] ) -> float: interval = current[0] - last[0] @@ -167,7 +179,7 @@ def get_cpu_over_1_second( # cpu_over_1_second[%] = 1[s] * measured_cpu_in_interval[%] / interval[s] return measured_cpu_in_interval / interval - def get_total_cpu_usage_over_1_second(self) -> float: + def _update_total_cpu_usage(self) -> None: current_sample = self._sample_total_cpu_usage() total_cpu: float = 0 @@ -176,20 +188,24 @@ def get_total_cpu_usage_over_1_second(self) -> float: continue # skip if not found last_time_and_cpu_usage = self._last_sample[pid] - total_cpu += self.get_cpu_over_1_second( + total_cpu += self._get_cpu_over_1_second( last_time_and_cpu_usage, time_and_cpu_usage ) self._last_sample = current_sample # replace - return total_cpu + + self.total_cpu_usage = total_cpu def _check_if_busy(self) -> bool: - return self.get_total_cpu_usage_over_1_second() > self.busy_threshold + self._update_total_cpu_usage() + return self.total_cpu_usage > self.busy_threshold -class DiskUsageMonitor(AbstractIsBusyMonitor): - DISK_USAGE_MONITORING_INTERVAL_S: Final[float] = 1 +BytesRead: TypeAlias = int +BytesWrite: TypeAlias = int + +class DiskUsageMonitor(AbstractIsBusyMonitor): def __init__( self, poll_interval: float, @@ -200,52 +216,74 @@ def __init__( super().__init__(poll_interval=poll_interval) self.read_usage_threshold = read_usage_threshold self.write_usage_threshold = write_usage_threshold - self.executor = ThreadPoolExecutor(max_workers=THREAD_EXECUTOR_WORKERS) - - # TODO: can we refactor these to take advantage of the internal sleep? - # and then report partial counters instead of doing it like it is now - def _get_process_disk_usage(self, proc: psutil.Process) -> tuple[int, int]: - # between the two calls it can fail! - # rewrite to make it better! - # store previous counters -> measure and update again - io_start = proc.io_counters() - time.sleep(self.DISK_USAGE_MONITORING_INTERVAL_S) - io_end = proc.io_counters() - - # Calculate the differences - read_bytes = io_end.read_bytes - io_start.read_bytes - write_bytes = io_end.write_bytes - io_start.write_bytes - return read_bytes, write_bytes - - def get_total_disk_usage(self) -> tuple[int, int]: + + self._last_sample: dict[ + ProcessID, tuple[TimeSeconds, BytesRead, BytesWrite] + ] = self._sample_total_disk_usage() + + self.total_bytes_read: int = 0 + self.total_bytes_write: int = 0 + + @staticmethod + def _sample_disk_usage( + process: psutil.Process, + ) -> tuple[ProcessID, tuple[TimeSeconds, BytesRead, BytesWrite]]: + counters = process.io_counters() + return (process.pid, (time.time(), counters.read_bytes, counters.write_bytes)) + + def _sample_total_disk_usage( + self, + ) -> dict[ProcessID, tuple[TimeSeconds, BytesRead, BytesWrite]]: futures = [ - self.thread_executor.submit(self._get_process_disk_usage, x) - for x in _get_sibling_processes() + self.thread_executor.submit(self._sample_disk_usage, p) + for p in _get_sibling_processes() ] + return dict([f.result() for f in as_completed(futures)]) - disk_usage: list[tuple[int, int]] = [ - future.result() for future in as_completed(futures) - ] - read_bytes: int = 0 - write_bytes: int = 0 - for read, write in disk_usage: - read_bytes += read - write_bytes += write + @staticmethod + def _get_bytes_over_one_second( + last: tuple[TimeSeconds, BytesRead, BytesWrite], + current: tuple[TimeSeconds, BytesRead, BytesWrite], + ) -> tuple[BytesRead, BytesWrite]: + interval = current[0] - last[0] + measured_bytes_read_in_interval = current[1] + measured_bytes_write_in_interval = current[2] + + # bytes_*_1_second[%] = 1[s] * measured_bytes_*_in_interval[%] / interval[s] + bytes_read_over_1_second = int(measured_bytes_read_in_interval / interval) + bytes_write_over_1_second = int(measured_bytes_write_in_interval / interval) + return bytes_read_over_1_second, bytes_write_over_1_second + + def _update_total_disk_usage(self) -> None: + current_sample = self._sample_total_disk_usage() + + total_bytes_read: int = 0 + total_bytes_write: int = 0 + for pid, time_and_disk_usage in current_sample.items(): + if pid not in self._last_sample: + continue # skip if not found - return read_bytes, write_bytes + last_time_and_disk_usage = self._last_sample[pid] + + bytes_read, bytes_write = self._get_bytes_over_one_second( + last_time_and_disk_usage, time_and_disk_usage + ) + total_bytes_read += bytes_read + total_bytes_write += bytes_write + + self._last_sample = current_sample # replace + + self.total_bytes_read = total_bytes_read + self.total_bytes_write = total_bytes_write def _check_if_busy(self) -> bool: - read_bytes, write_bytes = self.get_total_disk_usage() + self._update_total_disk_usage() return ( - read_bytes > self.read_usage_threshold - or write_bytes > self.write_usage_threshold + self.total_bytes_read > self.read_usage_threshold + or self.total_bytes_write > self.write_usage_threshold ) -class NetworkUsageMonitor(AbstractIsBusyMonitor): - NETWORK_USAGE_MONITORING_INTERVAL_S: Final[float] = 1 - - # Measure at regular intervals (is busy calls) class ActivityManager: @@ -255,7 +293,9 @@ def __init__(self, interval: float) -> None: self.jupyter_kernel_monitor = JupyterKernelMonitor(CHECK_INTERVAL_S) self.cpu_usage_monitor = CPUUsageMonitor( - CHECK_INTERVAL_S, busy_threshold=BUSY_USAGE_THRESHOLD_CPU + # TODO: interval could be 1 second now + CHECK_INTERVAL_S, + busy_threshold=BUSY_USAGE_THRESHOLD_CPU, ) self.disk_usage_monitor = DiskUsageMonitor( CHECK_INTERVAL_S, @@ -305,11 +345,14 @@ async def get(self): "seconds_inactive": self.activity_manager.get_idle_seconds(), "cpu_usage": { "is_busy": self.activity_manager.cpu_usage_monitor.is_busy, - "total": self.activity_manager.cpu_usage_monitor.get_total_cpu_usage_over_1_second(), + "total": self.activity_manager.cpu_usage_monitor.total_cpu_usage, }, "disk_usage": { "is_busy": self.activity_manager.disk_usage_monitor.is_busy, - "total": self.activity_manager.disk_usage_monitor.get_total_disk_usage(), + "total": { + "bytes_read": self.activity_manager.disk_usage_monitor.total_bytes_read, + "bytes_write": self.activity_manager.disk_usage_monitor.total_bytes_write, + }, }, "kernel_monitor": { "is_busy": self.activity_manager.jupyter_kernel_monitor.is_busy diff --git a/tests/test_activity_monitor.py b/tests/test_activity_monitor.py index 64d570b..b146231 100644 --- a/tests/test_activity_monitor.py +++ b/tests/test_activity_monitor.py @@ -99,7 +99,8 @@ async def test_disk_usage_monitor_not_busy( stop=stop_after_delay(5), wait=wait_fixed(0.1), reraise=True ): with attempt: - read_bytes, write_bytes = disk_usage_monitor.get_total_disk_usage() + read_bytes = disk_usage_monitor.total_bytes_read + write_bytes = disk_usage_monitor.total_bytes_write assert read_bytes == 0 assert write_bytes == 0 assert disk_usage_monitor.is_busy is False @@ -118,7 +119,7 @@ async def test_disk_usage_monitor_still_busy( ) as disk_usage_monitor: # wait for monitor to trigger await asyncio.sleep(1) - _, write_bytes = disk_usage_monitor.get_total_disk_usage() + write_bytes = disk_usage_monitor.total_bytes_write # NOTE: due to os disk cache reading is not reliable not testing it assert write_bytes > 0 From e7b82847807a1096efd3b86aa46c52cdc9bfa020 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Mon, 25 Mar 2024 10:50:27 +0100 Subject: [PATCH 39/52] added network_manager and tests --- docker/activity_monitor.py | 113 ++++++++++++++++++++++++++++++--- tests/test_activity_monitor.py | 75 ++++++++++++++++++---- 2 files changed, 167 insertions(+), 21 deletions(-) diff --git a/docker/activity_monitor.py b/docker/activity_monitor.py index 89164bf..be6b74e 100755 --- a/docker/activity_monitor.py +++ b/docker/activity_monitor.py @@ -22,12 +22,15 @@ LISTEN_PORT: Final[int] = 19597 -CHECK_INTERVAL_S: Final[float] = 5 +KERNEL_CHECK_INTERVAL_S: Final[float] = 5 +CHECK_INTERVAL_S: Final[float] = 1 THREAD_EXECUTOR_WORKERS: Final[int] = 10 BUSY_USAGE_THRESHOLD_CPU: Final[float] = 5 # percent in range [0, 100] BUSY_USAGE_THRESHOLD_DISK_READ: Final[int] = 0 # in bytes BUSY_USAGE_THRESHOLD_DISK_WRITE: Final[int] = 0 # in bytes +BUSY_USAGE_THRESHOLD_NETWORK_RECEIVED: Final[int] = 0 # in bytes +BUSY_USAGE_THRESHOLD_NETWORK_SENT: Final[int] = 0 # in bytes # Utilities @@ -152,7 +155,7 @@ def __init__(self, poll_interval: float, *, busy_threshold: float): self._last_sample: dict[ProcessID, tuple[TimeSeconds, PercentCPU]] = ( self._sample_total_cpu_usage() ) - self.total_cpu_usage: float = 0 + self.total_cpu_usage: PercentCPU = 0 @staticmethod def _sample_cpu_usage( @@ -210,8 +213,8 @@ def __init__( self, poll_interval: float, *, - read_usage_threshold: float, - write_usage_threshold: float, + read_usage_threshold: int, + write_usage_threshold: int, ): super().__init__(poll_interval=poll_interval) self.read_usage_threshold = read_usage_threshold @@ -221,8 +224,8 @@ def __init__( ProcessID, tuple[TimeSeconds, BytesRead, BytesWrite] ] = self._sample_total_disk_usage() - self.total_bytes_read: int = 0 - self.total_bytes_write: int = 0 + self.total_bytes_read: BytesRead = 0 + self.total_bytes_write: BytesWrite = 0 @staticmethod def _sample_disk_usage( @@ -284,6 +287,83 @@ def _check_if_busy(self) -> bool: ) +InterfaceName: TypeAlias = str +BytesReceived: TypeAlias = int +BytesSent: TypeAlias = int + + +class NetworkUsageMonitor(AbstractIsBusyMonitor): + _EXCLUDE_INTERFACES: set[InterfaceName] = { + "lo", + } + + def __init__( + self, + poll_interval: float, + *, + received_usage_threshold: int, + sent_usage_threshold: int, + ): + super().__init__(poll_interval=poll_interval) + self.received_usage_threshold = received_usage_threshold + self.sent_usage_threshold = sent_usage_threshold + + self._last_sample: tuple[TimeSeconds, BytesReceived, BytesSent] = ( + self._sample_total_network_usage() + ) + self.bytes_received: BytesReceived = 0 + self.bytes_sent: BytesSent = 0 + + def _sample_total_network_usage( + self, + ) -> tuple[TimeSeconds, BytesReceived, BytesSent]: + net_io_counters = psutil.net_io_counters(pernic=True) + + total_bytes_received: int = 0 + total_bytes_sent: int = 0 + for nic, stats in net_io_counters.items(): + if nic in self._EXCLUDE_INTERFACES: + continue + + total_bytes_received += stats.bytes_recv + total_bytes_sent += stats.bytes_sent + + return time.time(), total_bytes_received, total_bytes_sent + + @staticmethod + def _get_bytes_over_one_second( + last: tuple[TimeSeconds, BytesReceived, BytesSent], + current: tuple[TimeSeconds, BytesReceived, BytesSent], + ) -> tuple[BytesReceived, BytesSent]: + interval = current[0] - last[0] + measured_bytes_received_in_interval = current[1] - last[1] + measured_bytes_sent_in_interval = current[2] - last[2] + + # bytes_*_1_second[%] = 1[s] * measured_bytes_*_in_interval[%] / interval[s] + bytes_received_over_1_second = int( + measured_bytes_received_in_interval / interval + ) + bytes_sent_over_1_second = int(measured_bytes_sent_in_interval / interval) + return bytes_received_over_1_second, bytes_sent_over_1_second + + def _update_total_network_usage(self) -> None: + current_sample = self._sample_total_network_usage() + + bytes_received, bytes_sent = self._get_bytes_over_one_second( + self._last_sample, current_sample + ) + + self._last_sample = current_sample # replace + + self.bytes_received = bytes_received + self.bytes_sent = bytes_sent + + def _check_if_busy(self) -> bool: + self._update_total_network_usage() + return ( + self.bytes_received > self.received_usage_threshold + or self.bytes_sent > self.sent_usage_threshold + ) class ActivityManager: @@ -291,9 +371,8 @@ def __init__(self, interval: float) -> None: self.interval = interval self.last_idle: datetime | None = None - self.jupyter_kernel_monitor = JupyterKernelMonitor(CHECK_INTERVAL_S) + self.jupyter_kernel_monitor = JupyterKernelMonitor(KERNEL_CHECK_INTERVAL_S) self.cpu_usage_monitor = CPUUsageMonitor( - # TODO: interval could be 1 second now CHECK_INTERVAL_S, busy_threshold=BUSY_USAGE_THRESHOLD_CPU, ) @@ -302,12 +381,18 @@ def __init__(self, interval: float) -> None: read_usage_threshold=BUSY_USAGE_THRESHOLD_DISK_READ, write_usage_threshold=BUSY_USAGE_THRESHOLD_DISK_WRITE, ) + self.network_monitor = NetworkUsageMonitor( + CHECK_INTERVAL_S, + received_usage_threshold=BUSY_USAGE_THRESHOLD_NETWORK_RECEIVED, + sent_usage_threshold=BUSY_USAGE_THRESHOLD_NETWORK_SENT, + ) def check(self): is_busy = ( self.jupyter_kernel_monitor.is_busy or self.cpu_usage_monitor.is_busy or self.disk_usage_monitor.is_busy + or self.network_monitor.is_busy ) if is_busy: @@ -327,6 +412,7 @@ async def run(self): self.jupyter_kernel_monitor.start() self.cpu_usage_monitor.start() self.disk_usage_monitor.start() + self.network_monitor.start() while True: with suppress(Exception): self.check() @@ -350,8 +436,15 @@ async def get(self): "disk_usage": { "is_busy": self.activity_manager.disk_usage_monitor.is_busy, "total": { - "bytes_read": self.activity_manager.disk_usage_monitor.total_bytes_read, - "bytes_write": self.activity_manager.disk_usage_monitor.total_bytes_write, + "bytes_read_per_second": self.activity_manager.disk_usage_monitor.total_bytes_read, + "bytes_write_per_second": self.activity_manager.disk_usage_monitor.total_bytes_write, + }, + }, + "network_usage": { + "is_busy": self.activity_manager.network_monitor.is_busy, + "total": { + "bytes_received_per_second": self.activity_manager.network_monitor.bytes_received, + "bytes_sent_per_second": self.activity_manager.network_monitor.bytes_sent, }, }, "kernel_monitor": { diff --git a/tests/test_activity_monitor.py b/tests/test_activity_monitor.py index b146231..79828a7 100644 --- a/tests/test_activity_monitor.py +++ b/tests/test_activity_monitor.py @@ -11,7 +11,7 @@ import requests_mock from queue import Queue -from typing import Callable, Iterable, TYPE_CHECKING +from typing import Callable, Final, Iterable, TYPE_CHECKING from pytest_mock import MockFixture from tenacity import AsyncRetrying from tenacity.stop import stop_after_delay @@ -127,6 +127,52 @@ async def test_disk_usage_monitor_still_busy( assert disk_usage_monitor.is_busy is True +async def test_network_usage_monitor_not_busy( + socket_server: None, + mock__get_sibling_processes: Callable[[list[int]], list[psutil.Process]], + create_activity_generator: Callable[[bool, bool, bool], _ActivityGenerator], +): + activity_generator = create_activity_generator(network=False, cpu=False, disk=False) + mock__get_sibling_processes([activity_generator.get_pid()]) + + with activity_monitor.NetworkUsageMonitor( + 0.5, received_usage_threshold=0, sent_usage_threshold=0 + ) as network_usage_monitor: + async for attempt in AsyncRetrying( + stop=stop_after_delay(5), wait=wait_fixed(0.1), reraise=True + ): + with attempt: + assert network_usage_monitor.bytes_received == 0 + assert network_usage_monitor.bytes_sent == 0 + assert network_usage_monitor.is_busy is False + + +@pytest.fixture +def mock_network_monitor_exclude_interfaces(mocker: MockFixture) -> None: + mocker.patch("activity_monitor.NetworkUsageMonitor._EXCLUDE_INTERFACES", new=set()) + assert activity_monitor.NetworkUsageMonitor._EXCLUDE_INTERFACES == set() + + +async def test_network_usage_monitor_still_busy( + mock_network_monitor_exclude_interfaces: None, + socket_server: None, + mock__get_sibling_processes: Callable[[list[int]], list[psutil.Process]], + create_activity_generator: Callable[[bool, bool, bool], _ActivityGenerator], +): + activity_generator = create_activity_generator(network=True, cpu=False, disk=False) + mock__get_sibling_processes([activity_generator.get_pid()]) + + with activity_monitor.NetworkUsageMonitor( + 0.5, received_usage_threshold=0, sent_usage_threshold=0 + ) as network_usage_monitor: + # wait for monitor to trigger + await asyncio.sleep(1) + + assert network_usage_monitor.bytes_received > 0 + assert network_usage_monitor.bytes_sent > 0 + assert network_usage_monitor.is_busy is True + + @pytest.fixture def mock_jupyter_kernel_monitor(are_kernels_busy: bool) -> Iterable[None]: with requests_mock.Mocker(real_http=True) as m: @@ -199,23 +245,29 @@ def _queue_stopper() -> None: requests.get(f"{server_url}/", timeout=1) -@pytest.fixture -def mock_check_interval(mocker: MockFixture) -> None: - mocker.patch("activity_monitor.CHECK_INTERVAL_S", new=1) - assert activity_monitor.CHECK_INTERVAL_S == 1 - - @pytest.mark.parametrize("are_kernels_busy", [False]) -async def test_tornado_server_ok( - mock_check_interval: None, tornado_server: None, server_url: str -): +async def test_tornado_server_ok(tornado_server: None, server_url: str): result = requests.get(f"{server_url}/", timeout=5) assert result.status_code == 200 +_BIG_THRESHOLD: Final[int] = int(1e10) + + +@pytest.fixture +def mock_activity_manager_config(mocker: MockFixture) -> None: + mocker.patch("activity_monitor.CHECK_INTERVAL_S", 1) + mocker.patch("activity_monitor.KERNEL_CHECK_INTERVAL_S", 1) + + mocker.patch( + "activity_monitor.BUSY_USAGE_THRESHOLD_NETWORK_RECEIVED", _BIG_THRESHOLD + ) + mocker.patch("activity_monitor.BUSY_USAGE_THRESHOLD_NETWORK_SENT", _BIG_THRESHOLD) + + @pytest.mark.parametrize("are_kernels_busy", [False]) async def test_activity_monitor_becomes_not_busy( - mock_check_interval: None, + mock_activity_manager_config: None, socket_server: None, mock__get_sibling_processes: Callable[[list[int]], list[psutil.Process]], create_activity_generator: Callable[[bool, bool, bool], _ActivityGenerator], @@ -236,6 +288,7 @@ async def test_activity_monitor_becomes_not_busy( assert debug_response["cpu_usage"]["is_busy"] is False assert debug_response["disk_usage"]["is_busy"] is False assert debug_response["kernel_monitor"]["is_busy"] is False + assert debug_response["network_usage"]["is_busy"] is False result = requests.get(f"{server_url}/", timeout=2) assert result.status_code == 200 From 8994bf8ecc8ea6e0a1a06727f50c9fcb1c99fc44 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Mon, 25 Mar 2024 11:12:05 +0100 Subject: [PATCH 40/52] fix issues --- docker/activity_monitor.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/docker/activity_monitor.py b/docker/activity_monitor.py index be6b74e..3cfdae6 100755 --- a/docker/activity_monitor.py +++ b/docker/activity_monitor.py @@ -13,7 +13,7 @@ from concurrent.futures import ThreadPoolExecutor, as_completed from contextlib import suppress from datetime import datetime -from typing import Final, TypeAlias +from typing import Final from abc import abstractmethod @@ -27,8 +27,8 @@ THREAD_EXECUTOR_WORKERS: Final[int] = 10 BUSY_USAGE_THRESHOLD_CPU: Final[float] = 5 # percent in range [0, 100] -BUSY_USAGE_THRESHOLD_DISK_READ: Final[int] = 0 # in bytes -BUSY_USAGE_THRESHOLD_DISK_WRITE: Final[int] = 0 # in bytes +BUSY_USAGE_THRESHOLD_DISK_READ: Final[int] = 512 * 1000 # in bytes +BUSY_USAGE_THRESHOLD_DISK_WRITE: Final[int] = 512 * 1000 # in bytes BUSY_USAGE_THRESHOLD_NETWORK_RECEIVED: Final[int] = 0 # in bytes BUSY_USAGE_THRESHOLD_NETWORK_SENT: Final[int] = 0 # in bytes @@ -137,9 +137,9 @@ def _check_if_busy(self) -> bool: return self.are_kernels_busy -ProcessID: TypeAlias = int -TimeSeconds: TypeAlias = float -PercentCPU: TypeAlias = float +ProcessID = int +TimeSeconds = float +PercentCPU = float class CPUUsageMonitor(AbstractIsBusyMonitor): @@ -204,8 +204,8 @@ def _check_if_busy(self) -> bool: return self.total_cpu_usage > self.busy_threshold -BytesRead: TypeAlias = int -BytesWrite: TypeAlias = int +BytesRead = int +BytesWrite = int class DiskUsageMonitor(AbstractIsBusyMonitor): @@ -287,9 +287,9 @@ def _check_if_busy(self) -> bool: ) -InterfaceName: TypeAlias = str -BytesReceived: TypeAlias = int -BytesSent: TypeAlias = int +InterfaceName = str +BytesReceived = int +BytesSent = int class NetworkUsageMonitor(AbstractIsBusyMonitor): From 699f427bc9af890632513631186c5db3d3c5e08e Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Mon, 25 Mar 2024 11:14:29 +0100 Subject: [PATCH 41/52] updated thresholds --- docker/activity_monitor.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/docker/activity_monitor.py b/docker/activity_monitor.py index 3cfdae6..fc16f12 100755 --- a/docker/activity_monitor.py +++ b/docker/activity_monitor.py @@ -26,11 +26,13 @@ CHECK_INTERVAL_S: Final[float] = 1 THREAD_EXECUTOR_WORKERS: Final[int] = 10 +_KB: Final[int] = 1024 + BUSY_USAGE_THRESHOLD_CPU: Final[float] = 5 # percent in range [0, 100] -BUSY_USAGE_THRESHOLD_DISK_READ: Final[int] = 512 * 1000 # in bytes -BUSY_USAGE_THRESHOLD_DISK_WRITE: Final[int] = 512 * 1000 # in bytes -BUSY_USAGE_THRESHOLD_NETWORK_RECEIVED: Final[int] = 0 # in bytes -BUSY_USAGE_THRESHOLD_NETWORK_SENT: Final[int] = 0 # in bytes +BUSY_USAGE_THRESHOLD_DISK_READ: Final[int] = 512 * _KB +BUSY_USAGE_THRESHOLD_DISK_WRITE: Final[int] = 512 * _KB +BUSY_USAGE_THRESHOLD_NETWORK_RECEIVED: Final[int] = 1 * _KB +BUSY_USAGE_THRESHOLD_NETWORK_SENT: Final[int] = 1 * _KB # Utilities From 69d19989007b92ce15b6f50e7f0830f3f7ff9145 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Mon, 25 Mar 2024 11:28:13 +0100 Subject: [PATCH 42/52] refactor limits --- docker/activity_monitor.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/docker/activity_monitor.py b/docker/activity_monitor.py index fc16f12..1d2736e 100755 --- a/docker/activity_monitor.py +++ b/docker/activity_monitor.py @@ -28,9 +28,9 @@ _KB: Final[int] = 1024 -BUSY_USAGE_THRESHOLD_CPU: Final[float] = 5 # percent in range [0, 100] -BUSY_USAGE_THRESHOLD_DISK_READ: Final[int] = 512 * _KB -BUSY_USAGE_THRESHOLD_DISK_WRITE: Final[int] = 512 * _KB +BUSY_USAGE_THRESHOLD_CPU: Final[float] = 0.5 # percent in range [0, 100] +BUSY_USAGE_THRESHOLD_DISK_READ: Final[int] = 0 +BUSY_USAGE_THRESHOLD_DISK_WRITE: Final[int] = 0 BUSY_USAGE_THRESHOLD_NETWORK_RECEIVED: Final[int] = 1 * _KB BUSY_USAGE_THRESHOLD_NETWORK_SENT: Final[int] = 1 * _KB @@ -251,8 +251,8 @@ def _get_bytes_over_one_second( current: tuple[TimeSeconds, BytesRead, BytesWrite], ) -> tuple[BytesRead, BytesWrite]: interval = current[0] - last[0] - measured_bytes_read_in_interval = current[1] - measured_bytes_write_in_interval = current[2] + measured_bytes_read_in_interval = current[1] - last[1] + measured_bytes_write_in_interval = current[2] - last[2] # bytes_*_1_second[%] = 1[s] * measured_bytes_*_in_interval[%] / interval[s] bytes_read_over_1_second = int(measured_bytes_read_in_interval / interval) From f113c7951db0ae359e5659c32a611f1655721aee Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Mon, 25 Mar 2024 11:51:16 +0100 Subject: [PATCH 43/52] refactor to use threading --- docker/activity_monitor.py | 39 +++++++++++++++++++++++++++++++------- 1 file changed, 32 insertions(+), 7 deletions(-) diff --git a/docker/activity_monitor.py b/docker/activity_monitor.py index 1d2736e..ddeb251 100755 --- a/docker/activity_monitor.py +++ b/docker/activity_monitor.py @@ -63,7 +63,11 @@ def _worker(self) -> None: time.sleep(self._poll_interval) def start(self) -> None: - self._thread = Thread(target=self._worker, daemon=True) + self._thread = Thread( + target=self._worker, + daemon=True, + name=f"{self.__class__.__name__}_check_busy", + ) self._thread.start() def stop(self) -> None: @@ -370,6 +374,9 @@ def _check_if_busy(self) -> bool: class ActivityManager: def __init__(self, interval: float) -> None: + self._keep_running: bool = True + self._thread: Thread | None = None + self.interval = interval self.last_idle: datetime | None = None @@ -410,15 +417,33 @@ def get_idle_seconds(self) -> float: idle_seconds = (datetime.utcnow() - self.last_idle).total_seconds() return idle_seconds if idle_seconds > 0 else 0 - async def run(self): + def _worker(self) -> None: + while self._keep_running: + with suppress(Exception): + self.check() + time.sleep(self.interval) + + def start(self) -> None: self.jupyter_kernel_monitor.start() self.cpu_usage_monitor.start() self.disk_usage_monitor.start() self.network_monitor.start() - while True: - with suppress(Exception): - self.check() - await asyncio.sleep(self.interval) + + self._thread = Thread( + target=self._worker, + daemon=True, + name=f"{self.__class__.__name__}_check_busy", + ) + self._thread.start() + + def stop(self) -> None: + self.jupyter_kernel_monitor.stop() + self.cpu_usage_monitor.stop() + self.disk_usage_monitor.stop() + self.network_monitor.stop() + + self._keep_running = False + self._thread.join() class DebugHandler(tornado.web.RequestHandler): @@ -470,13 +495,13 @@ async def get(self): async def make_app() -> tornado.web.Application: activity_manager = ActivityManager(CHECK_INTERVAL_S) + activity_manager.start() app = tornado.web.Application( [ (r"/", MainHandler, {"activity_manager": activity_manager}), (r"/debug", DebugHandler, {"activity_manager": activity_manager}), ] ) - asyncio.create_task(activity_manager.run()) return app From 04c8de701270c04e3a1171307aecfdd02e082089 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Mon, 25 Mar 2024 13:28:14 +0100 Subject: [PATCH 44/52] replaced tornado with builtin server --- docker/activity_monitor.py | 131 +++++++++++++++++---------------- tests/test_activity_monitor.py | 56 ++++++-------- 2 files changed, 91 insertions(+), 96 deletions(-) diff --git a/docker/activity_monitor.py b/docker/activity_monitor.py index ddeb251..702c1a8 100755 --- a/docker/activity_monitor.py +++ b/docker/activity_monitor.py @@ -1,20 +1,19 @@ #!/home/jovyan/.venv/bin/python -import asyncio -import logging import json +import logging import psutil import requests -import tornado import time -from threading import Thread +from abc import abstractmethod from concurrent.futures import ThreadPoolExecutor, as_completed from contextlib import suppress from datetime import datetime +from http.server import HTTPServer, BaseHTTPRequestHandler +from threading import Thread from typing import Final -from abc import abstractmethod _logger = logging.getLogger(__name__) @@ -446,70 +445,78 @@ def stop(self) -> None: self._thread.join() -class DebugHandler(tornado.web.RequestHandler): - def initialize(self, activity_manager: ActivityManager): - self.activity_manager: ActivityManager = activity_manager - - async def get(self): - assert self.activity_manager - self.write( - json.dumps( - { - "seconds_inactive": self.activity_manager.get_idle_seconds(), - "cpu_usage": { - "is_busy": self.activity_manager.cpu_usage_monitor.is_busy, - "total": self.activity_manager.cpu_usage_monitor.total_cpu_usage, - }, - "disk_usage": { - "is_busy": self.activity_manager.disk_usage_monitor.is_busy, - "total": { - "bytes_read_per_second": self.activity_manager.disk_usage_monitor.total_bytes_read, - "bytes_write_per_second": self.activity_manager.disk_usage_monitor.total_bytes_write, - }, - }, - "network_usage": { - "is_busy": self.activity_manager.network_monitor.is_busy, - "total": { - "bytes_received_per_second": self.activity_manager.network_monitor.bytes_received, - "bytes_sent_per_second": self.activity_manager.network_monitor.bytes_sent, - }, - }, - "kernel_monitor": { - "is_busy": self.activity_manager.jupyter_kernel_monitor.is_busy - }, - } - ) - ) +def _get_response_debug(activity_manager: ActivityManager) -> dict: + return { + "seconds_inactive": activity_manager.get_idle_seconds(), + "cpu_usage": { + "is_busy": activity_manager.cpu_usage_monitor.is_busy, + "total": activity_manager.cpu_usage_monitor.total_cpu_usage, + }, + "disk_usage": { + "is_busy": activity_manager.disk_usage_monitor.is_busy, + "total": { + "bytes_read_per_second": activity_manager.disk_usage_monitor.total_bytes_read, + "bytes_write_per_second": activity_manager.disk_usage_monitor.total_bytes_write, + }, + }, + "network_usage": { + "is_busy": activity_manager.network_monitor.is_busy, + "total": { + "bytes_received_per_second": activity_manager.network_monitor.bytes_received, + "bytes_sent_per_second": activity_manager.network_monitor.bytes_sent, + }, + }, + "kernel_monitor": {"is_busy": activity_manager.jupyter_kernel_monitor.is_busy}, + } -class MainHandler(tornado.web.RequestHandler): - def initialize(self, activity_manager: ActivityManager): - self.activity_manager: ActivityManager = activity_manager +def _get_response_root(activity_manager: ActivityManager) -> dict: + return {"seconds_inactive": activity_manager.get_idle_seconds()} - async def get(self): - assert self.activity_manager - self.write( - json.dumps({"seconds_inactive": self.activity_manager.get_idle_seconds()}) - ) +class ServerState: + pass -async def make_app() -> tornado.web.Application: - activity_manager = ActivityManager(CHECK_INTERVAL_S) - activity_manager.start() - app = tornado.web.Application( - [ - (r"/", MainHandler, {"activity_manager": activity_manager}), - (r"/debug", DebugHandler, {"activity_manager": activity_manager}), - ] - ) - return app + +class HTTPServerWithState(HTTPServer): + def __init__(self, server_address, RequestHandlerClass, state): + self.state = state # application's state + super().__init__(server_address, RequestHandlerClass) + + +class JSONRequestHandler(BaseHTTPRequestHandler): + def _send_response(self, code: int, data: dict) -> None: + self.send_response(code) + self.send_header("Content-type", "application/json") + self.end_headers() + self.wfile.write(json.dumps(data).encode("utf-8")) + + def do_GET(self): + state = self.server.state + + if self.path == "/": # The root endpoint + self._send_response(200, _get_response_root(state.activity_manager)) + elif self.path == "/debug": # The debug endpoint + self._send_response(200, _get_response_debug(state.activity_manager)) + else: # Handle case where the endpoint is not found + self._send_response( + 404, _get_response_debug({"error": "Resource not found"}) + ) + + +def make_server(port: int) -> HTTPServerWithState: + state = ServerState() + state.activity_manager = ActivityManager(CHECK_INTERVAL_S) + state.activity_manager.start() + + server_address = ("", port) # Listen on all interfaces, port 8000 + return HTTPServerWithState(server_address, JSONRequestHandler, state) -async def main(): - app = await make_app() - app.listen(19597) - await asyncio.Event().wait() +def main(): + http_server = make_server(LISTEN_PORT) + http_server.serve_forever() if __name__ == "__main__": - asyncio.run(main()) + main() diff --git a/tests/test_activity_monitor.py b/tests/test_activity_monitor.py index 79828a7..61962e2 100644 --- a/tests/test_activity_monitor.py +++ b/tests/test_activity_monitor.py @@ -1,16 +1,13 @@ import asyncio -import pytest -import psutil -import tornado.web import json -import tornado.httpserver -import tornado.ioloop -import threading +import psutil +import pytest import pytest_asyncio import requests import requests_mock +import threading +import time -from queue import Queue from typing import Callable, Final, Iterable, TYPE_CHECKING from pytest_mock import MockFixture from tenacity import AsyncRetrying @@ -127,7 +124,16 @@ async def test_disk_usage_monitor_still_busy( assert disk_usage_monitor.is_busy is True +@pytest.fixture +def mock_no_network_activity(mocker: MockFixture) -> None: + mocker.patch( + "activity_monitor.NetworkUsageMonitor._sample_total_network_usage", + side_effect=lambda: (time.time(), 0, 0), + ) + + async def test_network_usage_monitor_not_busy( + mock_no_network_activity: None, socket_server: None, mock__get_sibling_processes: Callable[[list[int]], list[psutil.Process]], create_activity_generator: Callable[[bool, bool, bool], _ActivityGenerator], @@ -201,29 +207,11 @@ async def server_url() -> str: @pytest_asyncio.fixture -async def tornado_server(mock_jupyter_kernel_monitor: None, server_url: str) -> None: - app = await activity_monitor.make_app() - - stop_queue = Queue() - - def _run_server_worker(): - http_server = tornado.httpserver.HTTPServer(app) - http_server.listen(activity_monitor.LISTEN_PORT) - current_io_loop = tornado.ioloop.IOLoop.current() - - def _queue_stopper() -> None: - stop_queue.get() - current_io_loop.stop() - - stopping_thread = threading.Thread(target=_queue_stopper, daemon=True) - stopping_thread.start() - - current_io_loop.start() - stopping_thread.join() +async def http_server(mock_jupyter_kernel_monitor: None, server_url: str) -> None: + server = activity_monitor.make_server(activity_monitor.LISTEN_PORT) - # cleanly shut down tornado server and loop - current_io_loop.close() - http_server.stop() + def _run_server_worker() -> None: + server.serve_forever() thread = threading.Thread(target=_run_server_worker, daemon=True) thread.start() @@ -238,15 +226,15 @@ def _queue_stopper() -> None: yield None - stop_queue.put(None) - thread.join(timeout=1) + server.shutdown() + server.server_close() - with pytest.raises(requests.exceptions.ReadTimeout): + with pytest.raises(requests.exceptions.RequestException): requests.get(f"{server_url}/", timeout=1) @pytest.mark.parametrize("are_kernels_busy", [False]) -async def test_tornado_server_ok(tornado_server: None, server_url: str): +async def test_http_server_ok(http_server: None, server_url: str): result = requests.get(f"{server_url}/", timeout=5) assert result.status_code == 200 @@ -271,7 +259,7 @@ async def test_activity_monitor_becomes_not_busy( socket_server: None, mock__get_sibling_processes: Callable[[list[int]], list[psutil.Process]], create_activity_generator: Callable[[bool, bool, bool], _ActivityGenerator], - tornado_server: None, + http_server: None, server_url: str, ): activity_generator = create_activity_generator(network=False, cpu=False, disk=False) From bec3433ca5ae5f1d946d9658e67f8dcf198166e3 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Mon, 25 Mar 2024 13:34:41 +0100 Subject: [PATCH 45/52] removing the need to specify the interpreter --- .osparc/jupyter-math/runtime.yml | 2 +- docker/activity.py | 6 +----- docker/activity_monitor.py | 3 --- docker/entrypoint.bash | 2 +- 4 files changed, 3 insertions(+), 10 deletions(-) diff --git a/.osparc/jupyter-math/runtime.yml b/.osparc/jupyter-math/runtime.yml index 9498aaf..75d2568 100644 --- a/.osparc/jupyter-math/runtime.yml +++ b/.osparc/jupyter-math/runtime.yml @@ -21,5 +21,5 @@ paths-mapping: callbacks-mapping: inactivity: service: container - command: "/docker/activity.py" + command: ["python", "/docker/activity.py"] timeout: 1 \ No newline at end of file diff --git a/docker/activity.py b/docker/activity.py index 51173a6..c8c7989 100644 --- a/docker/activity.py +++ b/docker/activity.py @@ -1,8 +1,4 @@ -#!/home/jovyan/.venv/bin/python - -# prints the result of the activity command - import requests r = requests.get("http://localhost:19597") -print(r.text) \ No newline at end of file +print(r.text) diff --git a/docker/activity_monitor.py b/docker/activity_monitor.py index 702c1a8..7b763ac 100755 --- a/docker/activity_monitor.py +++ b/docker/activity_monitor.py @@ -1,6 +1,3 @@ -#!/home/jovyan/.venv/bin/python - - import json import logging import psutil diff --git a/docker/entrypoint.bash b/docker/entrypoint.bash index ac73777..56b83c7 100755 --- a/docker/entrypoint.bash +++ b/docker/entrypoint.bash @@ -74,5 +74,5 @@ chmod gu-w "/home/${NB_USER}/work" echo echo "$INFO" "Starting notebook ..." -exec gosu "$NB_USER" /docker/activity_monitor.py & +exec gosu "$NB_USER" python /docker/activity_monitor.py & exec gosu "$NB_USER" /docker/boot_notebook.bash From 9ba6c700f811005d12220eb42c5185d00ecfda1e Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Tue, 26 Mar 2024 16:19:59 +0100 Subject: [PATCH 46/52] update to use service-activity-monitor --- .coverage | Bin 0 -> 53248 bytes .gitignore | 1 - .osparc/jupyter-math/runtime.yml | 2 +- Dockerfile | 22 +- Makefile | 12 - README.md | 9 - docker/activity.py | 4 - docker/activity_monitor.py | 519 ------------------------------- docker/entrypoint.bash | 2 +- requirements/test.in | 14 - requirements/test.txt | 54 ---- scripts/ci/run_tests.sh | 12 - tests/_import_utils.py | 13 - tests/conftest.py | 135 -------- tests/test_activity_monitor.py | 284 ----------------- 15 files changed, 22 insertions(+), 1061 deletions(-) create mode 100644 .coverage delete mode 100644 docker/activity.py delete mode 100755 docker/activity_monitor.py delete mode 100644 requirements/test.in delete mode 100644 requirements/test.txt delete mode 100755 scripts/ci/run_tests.sh delete mode 100644 tests/_import_utils.py delete mode 100644 tests/conftest.py delete mode 100644 tests/test_activity_monitor.py diff --git a/.coverage b/.coverage new file mode 100644 index 0000000000000000000000000000000000000000..dd919e7937a3272aad8a3e2c2f90661cde9cec99 GIT binary patch literal 53248 zcmeI)&uiOe7zc34PVLySQAa5zgp&8NrdgACVTH1dk!4#BWreO=8H2&x*YfLFjU~B~ zoF9j7xE%xrWB9us>?IU@R>C zRJJ}eO1Uo$>yz>&Yrgb#`Siq(CAaX!#5Z|au;~Og2tWV=5cq!!^e&h3mB~r-=8Z_! z+bRmAtpfdAe*68EV2*3{NqoFKVcq&6dcfC^F0s>dnB znI!wH2vk!A%Cl9dqd4ODet6&I!3&L))h~HzQr!z;~;WFWQb5SJMlf$nia)HkI9tRAbKy&M{iL5#C^JsI048 zp{qJ)a;*WKe&#)LCdre`O$yHQQ`UY)zBFZyWA?gkoyf0s-3~Vn<ORM|GXuE>K(C-7WSebNR~DlzFod zccY}Wl3)3uE|j+99@A)g%2hJ#2I)ky-z1sl=+Bbr7)j1hb>{QA%JS5x>LlrEi^*Guu^7dq`@6x2tkL*y$e54=pyJ~BGUAAd9=}0G_Axm?xUI7FrY)DEh>wY(i`!BzY zyC<(b{djc&{nx2IrJ9YC)9`-J4?~BRRbIBj*V3b6rX@q3g~Az8_x-k#o?aU$$Ayrt z>4YMV;ptUrsb5C8#bvCn&%`mMiwjTlS7^|sanTDOb%nWss zCrD|s?KViE84t}Yxz>}M9!MavRQzh?pinIIy-?VlN`oRVP2tWV=5P$## zAOHafKmY;|fWUuGAZupKJb(VrSic$8@AQNX0uX=z1Rwwb2tWV=5P$##AOL}*DNxE} zPg&`|c)Vkxu00Izz00bZa0SG_<0uX=z1P-ad(pWEB_!po5asR(V+OP-<0uX=z z1Rwwb2tWV=5P$##An+k;`8P=m`Qz%M-00bZa0SG_<0uX=z1Rwwb2teR) z31qY7g8u&hfnhy3T(Kb<2tWV=5P$##AOHafKmY;|fB*#c7kJ4WD-=HZW5@aaJM-5Y z=eLc`&fPm}>$jVC{`&db+h1+`uzL4#`1ddO-}vfs$FO$xFB4}W009U<00Izz z00bZa0SG_<0uVTo0%P=7g82J?pR{|8|caeV*) literal 0 HcmV?d00001 diff --git a/.gitignore b/.gitignore index 7c0d726..6f3e694 100644 --- a/.gitignore +++ b/.gitignore @@ -54,4 +54,3 @@ docker-compose.yml validation/ *.ignore.* -.coverage \ No newline at end of file diff --git a/.osparc/jupyter-math/runtime.yml b/.osparc/jupyter-math/runtime.yml index 75d2568..3eaad03 100644 --- a/.osparc/jupyter-math/runtime.yml +++ b/.osparc/jupyter-math/runtime.yml @@ -21,5 +21,5 @@ paths-mapping: callbacks-mapping: inactivity: service: container - command: ["python", "/docker/activity.py"] + command: ["python", "/usr/local/bin/service-monitor/activity.py"] timeout: 1 \ No newline at end of file diff --git a/Dockerfile b/Dockerfile index 4831188..861839e 100644 --- a/Dockerfile +++ b/Dockerfile @@ -92,8 +92,26 @@ ENV JP_LSP_VIRTUAL_DIR="/home/${NB_USER}/.virtual_documents" # Copying boot scripts COPY --chown=$NB_UID:$NB_GID docker /docker -RUN chmod +x /docker/activity.py \ - && chmod +x /docker/activity_monitor.py +# install service activity monitor +ARG ACTIVITY_MONITOR_VERSION=v0.0.1 + +# Detection thresholds for application +ENV ACTIVITY_MONITOR_BUSY_THRESHOLD_CPU_PERCENT=0.5 +ENV ACTIVITY_MONITOR_BUSY_THRESHOLD_DISK_READ_BPS=0 +ENV ACTIVITY_MONITOR_BUSY_THRESHOLD_DISK_WRITE_BPS=0 +ENV ACTIVITY_MONITOR_BUSY_THRESHOLD_NETWORK_RECEIVE_BPS=1024 +ENV ACTIVITY_MONITOR_BUSY_THRESHOLD_NETWORK_SENT_BPS=1024 + +# install service activity monitor +RUN apt-get update && \ + apt-get install -y curl && \ + # install using curl + curl -sSL https://raw.githubusercontent.com/ITISFoundation/service-activity-monitor/main/scripts/install.sh | \ + bash -s -- ${ACTIVITY_MONITOR_VERSION} && \ + # cleanup and remove curl + apt-get purge -y --auto-remove curl && \ + rm -rf /var/lib/apt/lists/* + RUN echo 'export PATH="/home/${NB_USER}/.venv/bin:$PATH"' >> "/home/${NB_USER}/.bashrc" diff --git a/Makefile b/Makefile index 15bf5c5..5402f42 100644 --- a/Makefile +++ b/Makefile @@ -67,18 +67,6 @@ publish-local: ## push to local throw away registry to test integration docker push registry:5000/simcore/services/dynamic/$(DOCKER_IMAGE_NAME):$(DOCKER_IMAGE_TAG) @curl registry:5000/v2/_catalog | jq -.PHONY: install-test -install-test: ## install dependencies for testing - pip install -r requirements/test.txt - -.PHONY: tests-dev -tests-dev: ## run tests in development mode - .venv/bin/pytest --pdb -vvv tests - -.PHONY: tests-ci -tests-ci: ## run testds in the CI - .venv/bin/pytest -vvv --color=yes --cov-report term --cov=activity_monitor tests - .PHONY: help help: ## this colorful help @echo "Recipes for '$(notdir $(CURDIR))':" diff --git a/README.md b/README.md index 516169d..096624f 100644 --- a/README.md +++ b/README.md @@ -29,15 +29,6 @@ If you already have a local copy of **o2S2PARC** running a make publish-local ``` -Setup for test development locally: - -```shell -make devenv -source .venv/bin/activate -make tests-dev -``` - - ### Testing manually After a new service version has been published on the master deployment, it can be manually tested. For example a Template, called "Test Jupyter-math 2.0.9 ipywidgets" can be used for internal testing on the master deployment. diff --git a/docker/activity.py b/docker/activity.py deleted file mode 100644 index c8c7989..0000000 --- a/docker/activity.py +++ /dev/null @@ -1,4 +0,0 @@ -import requests - -r = requests.get("http://localhost:19597") -print(r.text) diff --git a/docker/activity_monitor.py b/docker/activity_monitor.py deleted file mode 100755 index 7b763ac..0000000 --- a/docker/activity_monitor.py +++ /dev/null @@ -1,519 +0,0 @@ -import json -import logging -import psutil -import requests -import time - -from abc import abstractmethod -from concurrent.futures import ThreadPoolExecutor, as_completed -from contextlib import suppress -from datetime import datetime -from http.server import HTTPServer, BaseHTTPRequestHandler -from threading import Thread -from typing import Final - - -_logger = logging.getLogger(__name__) - - -LISTEN_PORT: Final[int] = 19597 - -KERNEL_CHECK_INTERVAL_S: Final[float] = 5 -CHECK_INTERVAL_S: Final[float] = 1 -THREAD_EXECUTOR_WORKERS: Final[int] = 10 - -_KB: Final[int] = 1024 - -BUSY_USAGE_THRESHOLD_CPU: Final[float] = 0.5 # percent in range [0, 100] -BUSY_USAGE_THRESHOLD_DISK_READ: Final[int] = 0 -BUSY_USAGE_THRESHOLD_DISK_WRITE: Final[int] = 0 -BUSY_USAGE_THRESHOLD_NETWORK_RECEIVED: Final[int] = 1 * _KB -BUSY_USAGE_THRESHOLD_NETWORK_SENT: Final[int] = 1 * _KB - - -# Utilities -class AbstractIsBusyMonitor: - def __init__(self, poll_interval: float) -> None: - self._poll_interval: float = poll_interval - self._keep_running: bool = True - self._thread: Thread | None = None - - self.is_busy: bool = True - self.thread_executor = ThreadPoolExecutor(max_workers=THREAD_EXECUTOR_WORKERS) - - @abstractmethod - def _check_if_busy(self) -> bool: - """Must be user defined and returns if current - metric is to be considered busy - - Returns: - bool: True if considered busy - """ - - def _worker(self) -> None: - while self._keep_running: - try: - self.is_busy = self._check_if_busy() - except Exception as e: - _logger.exception("Failed to check if busy") - time.sleep(self._poll_interval) - - def start(self) -> None: - self._thread = Thread( - target=self._worker, - daemon=True, - name=f"{self.__class__.__name__}_check_busy", - ) - self._thread.start() - - def stop(self) -> None: - self._keep_running = False - if self._thread: - self._thread.join() - self.thread_executor.shutdown(wait=True) - - def __enter__(self): - self.start() - return self - - def __exit__(self, exc_type, exc_value, traceback): - self.stop() - - -def __get_children_processes_recursive(pid) -> list[psutil.Process]: - try: - return psutil.Process(pid).children(recursive=True) - except psutil.NoSuchProcess: - return [] - - -def _get_sibling_processes() -> list[psutil.Process]: - # Returns the CPU usage of all processes except this one. - # ASSUMPTIONS: - # - `CURRENT_PROC` is a child of root process - # - `CURRENT_PROC` does not create any child processes - # - # It looks for its brothers (and their children) p1 to pN in order - # to compute real CPU usage. - # - CURRENT_PROC - # - p1 - # ... - # - pN - current_process = psutil.Process() - parent_pid = current_process.ppid() - all_children = __get_children_processes_recursive(parent_pid) - return [c for c in all_children if c.pid != current_process.pid] - - -# Monitors - - -class JupyterKernelMonitor(AbstractIsBusyMonitor): - BASE_URL = "http://localhost:8888" - HEADERS = {"accept": "application/json"} - - def __init__(self, poll_interval: float) -> None: - super().__init__(poll_interval=poll_interval) - self.are_kernels_busy: bool = False - - def _get(self, path: str) -> dict: - r = requests.get(f"{self.BASE_URL}{path}", headers=self.HEADERS) - return r.json() - - def _update_kernels_activity(self) -> None: - json_response = self._get("/api/kernels") - - are_kernels_busy = False - - for kernel_data in json_response: - kernel_id = kernel_data["id"] - - kernel_info = self._get(f"/api/kernels/{kernel_id}") - if kernel_info["execution_state"] != "idle": - are_kernels_busy = True - - self.are_kernels_busy = are_kernels_busy - - def _check_if_busy(self) -> bool: - self._update_kernels_activity() - return self.are_kernels_busy - - -ProcessID = int -TimeSeconds = float -PercentCPU = float - - -class CPUUsageMonitor(AbstractIsBusyMonitor): - """At regular intervals computes the total CPU usage - and averages over 1 second. - """ - - def __init__(self, poll_interval: float, *, busy_threshold: float): - super().__init__(poll_interval=poll_interval) - self.busy_threshold = busy_threshold - - # snapshot - self._last_sample: dict[ProcessID, tuple[TimeSeconds, PercentCPU]] = ( - self._sample_total_cpu_usage() - ) - self.total_cpu_usage: PercentCPU = 0 - - @staticmethod - def _sample_cpu_usage( - process: psutil.Process, - ) -> tuple[ProcessID, tuple[TimeSeconds, PercentCPU]]: - """returns: tuple[pid, tuple[time, percent_cpu_usage]]""" - return (process.pid, (time.time(), process.cpu_percent())) - - def _sample_total_cpu_usage( - self, - ) -> dict[ProcessID, tuple[TimeSeconds, PercentCPU]]: - futures = [ - self.thread_executor.submit(self._sample_cpu_usage, p) - for p in _get_sibling_processes() - ] - return dict([f.result() for f in as_completed(futures)]) - - @staticmethod - def _get_cpu_over_1_second( - last: tuple[TimeSeconds, PercentCPU], current: tuple[TimeSeconds, PercentCPU] - ) -> float: - interval = current[0] - last[0] - measured_cpu_in_interval = current[1] - # cpu_over_1_second[%] = 1[s] * measured_cpu_in_interval[%] / interval[s] - return measured_cpu_in_interval / interval - - def _update_total_cpu_usage(self) -> None: - current_sample = self._sample_total_cpu_usage() - - total_cpu: float = 0 - for pid, time_and_cpu_usage in current_sample.items(): - if pid not in self._last_sample: - continue # skip if not found - - last_time_and_cpu_usage = self._last_sample[pid] - total_cpu += self._get_cpu_over_1_second( - last_time_and_cpu_usage, time_and_cpu_usage - ) - - self._last_sample = current_sample # replace - - self.total_cpu_usage = total_cpu - - def _check_if_busy(self) -> bool: - self._update_total_cpu_usage() - return self.total_cpu_usage > self.busy_threshold - - -BytesRead = int -BytesWrite = int - - -class DiskUsageMonitor(AbstractIsBusyMonitor): - def __init__( - self, - poll_interval: float, - *, - read_usage_threshold: int, - write_usage_threshold: int, - ): - super().__init__(poll_interval=poll_interval) - self.read_usage_threshold = read_usage_threshold - self.write_usage_threshold = write_usage_threshold - - self._last_sample: dict[ - ProcessID, tuple[TimeSeconds, BytesRead, BytesWrite] - ] = self._sample_total_disk_usage() - - self.total_bytes_read: BytesRead = 0 - self.total_bytes_write: BytesWrite = 0 - - @staticmethod - def _sample_disk_usage( - process: psutil.Process, - ) -> tuple[ProcessID, tuple[TimeSeconds, BytesRead, BytesWrite]]: - counters = process.io_counters() - return (process.pid, (time.time(), counters.read_bytes, counters.write_bytes)) - - def _sample_total_disk_usage( - self, - ) -> dict[ProcessID, tuple[TimeSeconds, BytesRead, BytesWrite]]: - futures = [ - self.thread_executor.submit(self._sample_disk_usage, p) - for p in _get_sibling_processes() - ] - return dict([f.result() for f in as_completed(futures)]) - - @staticmethod - def _get_bytes_over_one_second( - last: tuple[TimeSeconds, BytesRead, BytesWrite], - current: tuple[TimeSeconds, BytesRead, BytesWrite], - ) -> tuple[BytesRead, BytesWrite]: - interval = current[0] - last[0] - measured_bytes_read_in_interval = current[1] - last[1] - measured_bytes_write_in_interval = current[2] - last[2] - - # bytes_*_1_second[%] = 1[s] * measured_bytes_*_in_interval[%] / interval[s] - bytes_read_over_1_second = int(measured_bytes_read_in_interval / interval) - bytes_write_over_1_second = int(measured_bytes_write_in_interval / interval) - return bytes_read_over_1_second, bytes_write_over_1_second - - def _update_total_disk_usage(self) -> None: - current_sample = self._sample_total_disk_usage() - - total_bytes_read: int = 0 - total_bytes_write: int = 0 - for pid, time_and_disk_usage in current_sample.items(): - if pid not in self._last_sample: - continue # skip if not found - - last_time_and_disk_usage = self._last_sample[pid] - - bytes_read, bytes_write = self._get_bytes_over_one_second( - last_time_and_disk_usage, time_and_disk_usage - ) - total_bytes_read += bytes_read - total_bytes_write += bytes_write - - self._last_sample = current_sample # replace - - self.total_bytes_read = total_bytes_read - self.total_bytes_write = total_bytes_write - - def _check_if_busy(self) -> bool: - self._update_total_disk_usage() - return ( - self.total_bytes_read > self.read_usage_threshold - or self.total_bytes_write > self.write_usage_threshold - ) - - -InterfaceName = str -BytesReceived = int -BytesSent = int - - -class NetworkUsageMonitor(AbstractIsBusyMonitor): - _EXCLUDE_INTERFACES: set[InterfaceName] = { - "lo", - } - - def __init__( - self, - poll_interval: float, - *, - received_usage_threshold: int, - sent_usage_threshold: int, - ): - super().__init__(poll_interval=poll_interval) - self.received_usage_threshold = received_usage_threshold - self.sent_usage_threshold = sent_usage_threshold - - self._last_sample: tuple[TimeSeconds, BytesReceived, BytesSent] = ( - self._sample_total_network_usage() - ) - self.bytes_received: BytesReceived = 0 - self.bytes_sent: BytesSent = 0 - - def _sample_total_network_usage( - self, - ) -> tuple[TimeSeconds, BytesReceived, BytesSent]: - net_io_counters = psutil.net_io_counters(pernic=True) - - total_bytes_received: int = 0 - total_bytes_sent: int = 0 - for nic, stats in net_io_counters.items(): - if nic in self._EXCLUDE_INTERFACES: - continue - - total_bytes_received += stats.bytes_recv - total_bytes_sent += stats.bytes_sent - - return time.time(), total_bytes_received, total_bytes_sent - - @staticmethod - def _get_bytes_over_one_second( - last: tuple[TimeSeconds, BytesReceived, BytesSent], - current: tuple[TimeSeconds, BytesReceived, BytesSent], - ) -> tuple[BytesReceived, BytesSent]: - interval = current[0] - last[0] - measured_bytes_received_in_interval = current[1] - last[1] - measured_bytes_sent_in_interval = current[2] - last[2] - - # bytes_*_1_second[%] = 1[s] * measured_bytes_*_in_interval[%] / interval[s] - bytes_received_over_1_second = int( - measured_bytes_received_in_interval / interval - ) - bytes_sent_over_1_second = int(measured_bytes_sent_in_interval / interval) - return bytes_received_over_1_second, bytes_sent_over_1_second - - def _update_total_network_usage(self) -> None: - current_sample = self._sample_total_network_usage() - - bytes_received, bytes_sent = self._get_bytes_over_one_second( - self._last_sample, current_sample - ) - - self._last_sample = current_sample # replace - - self.bytes_received = bytes_received - self.bytes_sent = bytes_sent - - def _check_if_busy(self) -> bool: - self._update_total_network_usage() - return ( - self.bytes_received > self.received_usage_threshold - or self.bytes_sent > self.sent_usage_threshold - ) - - -class ActivityManager: - def __init__(self, interval: float) -> None: - self._keep_running: bool = True - self._thread: Thread | None = None - - self.interval = interval - self.last_idle: datetime | None = None - - self.jupyter_kernel_monitor = JupyterKernelMonitor(KERNEL_CHECK_INTERVAL_S) - self.cpu_usage_monitor = CPUUsageMonitor( - CHECK_INTERVAL_S, - busy_threshold=BUSY_USAGE_THRESHOLD_CPU, - ) - self.disk_usage_monitor = DiskUsageMonitor( - CHECK_INTERVAL_S, - read_usage_threshold=BUSY_USAGE_THRESHOLD_DISK_READ, - write_usage_threshold=BUSY_USAGE_THRESHOLD_DISK_WRITE, - ) - self.network_monitor = NetworkUsageMonitor( - CHECK_INTERVAL_S, - received_usage_threshold=BUSY_USAGE_THRESHOLD_NETWORK_RECEIVED, - sent_usage_threshold=BUSY_USAGE_THRESHOLD_NETWORK_SENT, - ) - - def check(self): - is_busy = ( - self.jupyter_kernel_monitor.is_busy - or self.cpu_usage_monitor.is_busy - or self.disk_usage_monitor.is_busy - or self.network_monitor.is_busy - ) - - if is_busy: - self.last_idle = None - - if not is_busy and self.last_idle is None: - self.last_idle = datetime.utcnow() - - def get_idle_seconds(self) -> float: - if self.last_idle is None: - return 0 - - idle_seconds = (datetime.utcnow() - self.last_idle).total_seconds() - return idle_seconds if idle_seconds > 0 else 0 - - def _worker(self) -> None: - while self._keep_running: - with suppress(Exception): - self.check() - time.sleep(self.interval) - - def start(self) -> None: - self.jupyter_kernel_monitor.start() - self.cpu_usage_monitor.start() - self.disk_usage_monitor.start() - self.network_monitor.start() - - self._thread = Thread( - target=self._worker, - daemon=True, - name=f"{self.__class__.__name__}_check_busy", - ) - self._thread.start() - - def stop(self) -> None: - self.jupyter_kernel_monitor.stop() - self.cpu_usage_monitor.stop() - self.disk_usage_monitor.stop() - self.network_monitor.stop() - - self._keep_running = False - self._thread.join() - - -def _get_response_debug(activity_manager: ActivityManager) -> dict: - return { - "seconds_inactive": activity_manager.get_idle_seconds(), - "cpu_usage": { - "is_busy": activity_manager.cpu_usage_monitor.is_busy, - "total": activity_manager.cpu_usage_monitor.total_cpu_usage, - }, - "disk_usage": { - "is_busy": activity_manager.disk_usage_monitor.is_busy, - "total": { - "bytes_read_per_second": activity_manager.disk_usage_monitor.total_bytes_read, - "bytes_write_per_second": activity_manager.disk_usage_monitor.total_bytes_write, - }, - }, - "network_usage": { - "is_busy": activity_manager.network_monitor.is_busy, - "total": { - "bytes_received_per_second": activity_manager.network_monitor.bytes_received, - "bytes_sent_per_second": activity_manager.network_monitor.bytes_sent, - }, - }, - "kernel_monitor": {"is_busy": activity_manager.jupyter_kernel_monitor.is_busy}, - } - - -def _get_response_root(activity_manager: ActivityManager) -> dict: - return {"seconds_inactive": activity_manager.get_idle_seconds()} - - -class ServerState: - pass - - -class HTTPServerWithState(HTTPServer): - def __init__(self, server_address, RequestHandlerClass, state): - self.state = state # application's state - super().__init__(server_address, RequestHandlerClass) - - -class JSONRequestHandler(BaseHTTPRequestHandler): - def _send_response(self, code: int, data: dict) -> None: - self.send_response(code) - self.send_header("Content-type", "application/json") - self.end_headers() - self.wfile.write(json.dumps(data).encode("utf-8")) - - def do_GET(self): - state = self.server.state - - if self.path == "/": # The root endpoint - self._send_response(200, _get_response_root(state.activity_manager)) - elif self.path == "/debug": # The debug endpoint - self._send_response(200, _get_response_debug(state.activity_manager)) - else: # Handle case where the endpoint is not found - self._send_response( - 404, _get_response_debug({"error": "Resource not found"}) - ) - - -def make_server(port: int) -> HTTPServerWithState: - state = ServerState() - state.activity_manager = ActivityManager(CHECK_INTERVAL_S) - state.activity_manager.start() - - server_address = ("", port) # Listen on all interfaces, port 8000 - return HTTPServerWithState(server_address, JSONRequestHandler, state) - - -def main(): - http_server = make_server(LISTEN_PORT) - http_server.serve_forever() - - -if __name__ == "__main__": - main() diff --git a/docker/entrypoint.bash b/docker/entrypoint.bash index 56b83c7..2029a36 100755 --- a/docker/entrypoint.bash +++ b/docker/entrypoint.bash @@ -74,5 +74,5 @@ chmod gu-w "/home/${NB_USER}/work" echo echo "$INFO" "Starting notebook ..." -exec gosu "$NB_USER" python /docker/activity_monitor.py & +exec gosu "$NB_USER" python /usr/local/bin/service-monitor/activity_monitor.py & exec gosu "$NB_USER" /docker/boot_notebook.bash diff --git a/requirements/test.in b/requirements/test.in deleted file mode 100644 index 202a1b1..0000000 --- a/requirements/test.in +++ /dev/null @@ -1,14 +0,0 @@ -# from jupyter - -psutil -tornado - -# testing - -pytest -pytest-asyncio -pytest-cov -pytest-mock -requests -requests-mock -tenacity \ No newline at end of file diff --git a/requirements/test.txt b/requirements/test.txt deleted file mode 100644 index 43fae43..0000000 --- a/requirements/test.txt +++ /dev/null @@ -1,54 +0,0 @@ -# -# This file is autogenerated by pip-compile with Python 3.10 -# by the following command: -# -# pip-compile --output-file=requirements/test.txt requirements/test.in -# -certifi==2024.2.2 - # via requests -charset-normalizer==3.3.2 - # via requests -coverage[toml]==7.4.4 - # via pytest-cov -exceptiongroup==1.2.0 - # via pytest -idna==3.6 - # via requests -iniconfig==2.0.0 - # via pytest -packaging==24.0 - # via pytest -pluggy==1.4.0 - # via pytest -psutil==5.9.8 - # via -r requirements/test.in -pytest==8.1.1 - # via - # -r requirements/test.in - # pytest-asyncio - # pytest-cov - # pytest-mock -pytest-asyncio==0.23.6 - # via -r requirements/test.in -pytest-cov==4.1.0 - # via -r requirements/test.in -pytest-mock==3.12.0 - # via -r requirements/test.in -requests==2.31.0 - # via - # -r requirements/test.in - # requests-mock -requests-mock==1.11.0 - # via -r requirements/test.in -six==1.16.0 - # via requests-mock -tenacity==8.2.3 - # via -r requirements/test.in -tomli==2.0.1 - # via - # coverage - # pytest -tornado==6.4 - # via -r requirements/test.in -urllib3==2.2.1 - # via requests diff --git a/scripts/ci/run_tests.sh b/scripts/ci/run_tests.sh deleted file mode 100755 index e1d4c8f..0000000 --- a/scripts/ci/run_tests.sh +++ /dev/null @@ -1,12 +0,0 @@ -#/bin/sh - -# http://redsymbol.net/articles/unofficial-bash-strict-mode/ -set -o errexit # abort on nonzero exitstatus -set -o nounset # abort on unbound variable -set -o pipefail # don't hide errors within pipes -IFS=$'\n\t' - -make .venv -source .venv/bin/activate -make install-test -make tests-ci \ No newline at end of file diff --git a/tests/_import_utils.py b/tests/_import_utils.py deleted file mode 100644 index 33681c4..0000000 --- a/tests/_import_utils.py +++ /dev/null @@ -1,13 +0,0 @@ -import sys -from pathlib import Path - -_CURRENT_DIR = ( - Path(sys.argv[0] if __name__ == "__main__" else __file__).resolve().parent -) - - -def allow_imports() -> None: - path = (_CURRENT_DIR / "..." / ".." / ".." / "docker").absolute().resolve() - sys.path.append(f"{path}") - - import activity_monitor diff --git a/tests/conftest.py b/tests/conftest.py deleted file mode 100644 index 78984a4..0000000 --- a/tests/conftest.py +++ /dev/null @@ -1,135 +0,0 @@ -import ctypes -import pytest -import socket -import threading -import time - -from concurrent.futures import ThreadPoolExecutor, wait -from multiprocessing import Array, Process -from tempfile import NamedTemporaryFile - -from typing import Callable, Final, Iterable - - -_LOCAL_LISTEN_PORT: Final[int] = 12345 - - -class _ListenSocketServer: - def __init__(self): - self.server_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) - self.server_socket.bind(("localhost", _LOCAL_LISTEN_PORT)) - self.server_socket.listen(100) # max number of connections - self._process: Process | None = None - - def start(self): - self._process = Process(target=self._accept_clients, daemon=True) - self._process.start() - - def stop(self): - if self._process: - self._process.terminate() - self._process.join() - - def _accept_clients(self): - while True: - client_socket, _ = self.server_socket.accept() - threading.Thread( - target=self._handle_client, daemon=True, args=(client_socket,) - ).start() - - def _handle_client(self, client_socket): - try: - while True: - data = client_socket.recv(1024) - if not data: - break - finally: - client_socket.close() - - -@pytest.fixture -def socket_server() -> None: - socket_server = _ListenSocketServer() - socket_server.start() - yield None - socket_server.stop() - - -class _ActivityGenerator: - def __init__(self, *, network: bool, cpu: bool, disk: bool) -> None: - self._process: Process | None = None - - _keep_running = True - self.shared_array = Array(ctypes.c_bool, 4) - self.shared_array[0] = network - self.shared_array[1] = cpu - self.shared_array[2] = disk - self.shared_array[3] = _keep_running - - def __load_cpu(self) -> None: - for _ in range(1000000): - pass - - def __load_network(self) -> None: - client_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) - client_socket.connect(("localhost", _LOCAL_LISTEN_PORT)) - client_socket.sendall("mock_message_to_send".encode()) - client_socket.close() - - def __load_disk(self) -> None: - with NamedTemporaryFile() as temp_file: - temp_file.write(b"0" * 1024 * 1024) # 1MB - temp_file.read() - - def _run(self) -> None: - with ThreadPoolExecutor(max_workers=3) as executor: - while self.shared_array[3]: - futures = [] - if self.shared_array[0]: - futures.append(executor.submit(self.__load_network)) - if self.shared_array[1]: - futures.append(executor.submit(self.__load_cpu)) - if self.shared_array[2]: - futures.append(executor.submit(self.__load_disk)) - - wait(futures) - time.sleep(0.1) - - def __enter__(self): - self.start() - return self - - def __exit__(self, exc_type, exc_val, exc_tb): - self.stop() - - def start(self) -> None: - self._process = Process(target=self._run, daemon=True) - self._process.start() - - def stop(self) -> None: - _keep_running = False - self.shared_array[3] = _keep_running - if self._process: - self._process.join() - - def get_pid(self) -> int: - assert self._process - return self._process.pid - - -@pytest.fixture -def create_activity_generator() -> ( - Iterable[Callable[[bool, bool, bool], _ActivityGenerator]] -): - created: list[_ActivityGenerator] = [] - - def _(*, network: bool, cpu: bool, disk: bool) -> _ActivityGenerator: - instance = _ActivityGenerator(network=network, cpu=cpu, disk=disk) - instance.start() - created.append(instance) - return instance - - yield _ - - for instance in created: - instance.stop() diff --git a/tests/test_activity_monitor.py b/tests/test_activity_monitor.py deleted file mode 100644 index 61962e2..0000000 --- a/tests/test_activity_monitor.py +++ /dev/null @@ -1,284 +0,0 @@ -import asyncio -import json -import psutil -import pytest -import pytest_asyncio -import requests -import requests_mock -import threading -import time - -from typing import Callable, Final, Iterable, TYPE_CHECKING -from pytest_mock import MockFixture -from tenacity import AsyncRetrying -from tenacity.stop import stop_after_delay -from tenacity.wait import wait_fixed -from conftest import _ActivityGenerator - - -if TYPE_CHECKING: - from ..docker import activity_monitor -else: - from _import_utils import allow_imports - - allow_imports() - import activity_monitor - -pytestmark = pytest.mark.asyncio - - -@pytest.fixture -def mock__get_sibling_processes( - mocker: MockFixture, -) -> Callable[[list[int]], list[psutil.Process]]: - def _get_processes(pids: list[int]) -> list[psutil.Process]: - results = [] - for pid in pids: - proc = psutil.Process(pid) - assert proc.status() - results.append(proc) - return results - - def _(pids: list[int]) -> None: - mocker.patch( - "activity_monitor._get_sibling_processes", return_value=_get_processes(pids) - ) - - return _ - - -async def test_cpu_usage_monitor_not_busy( - socket_server: None, - mock__get_sibling_processes: Callable[[list[int]], list[psutil.Process]], - create_activity_generator: Callable[[bool, bool, bool], _ActivityGenerator], -): - activity_generator = create_activity_generator(network=False, cpu=False, disk=False) - mock__get_sibling_processes([activity_generator.get_pid()]) - - with activity_monitor.CPUUsageMonitor(1, busy_threshold=5) as cpu_usage_monitor: - async for attempt in AsyncRetrying( - stop=stop_after_delay(5), wait=wait_fixed(0.1), reraise=True - ): - with attempt: - assert cpu_usage_monitor.total_cpu_usage == 0 - assert cpu_usage_monitor.is_busy is False - - -async def test_cpu_usage_monitor_still_busy( - socket_server: None, - mock__get_sibling_processes: Callable[[list[int]], list[psutil.Process]], - create_activity_generator: Callable[[bool, bool, bool], _ActivityGenerator], -): - activity_generator = create_activity_generator(network=False, cpu=True, disk=False) - mock__get_sibling_processes([activity_generator.get_pid()]) - - with activity_monitor.CPUUsageMonitor(0.5, busy_threshold=5) as cpu_usage_monitor: - # wait for monitor to trigger - await asyncio.sleep(1) - - # must still result busy - assert cpu_usage_monitor.total_cpu_usage > 0 - assert cpu_usage_monitor.is_busy is True - - -async def test_disk_usage_monitor_not_busy( - socket_server: None, - mock__get_sibling_processes: Callable[[list[int]], list[psutil.Process]], - create_activity_generator: Callable[[bool, bool, bool], _ActivityGenerator], -): - activity_generator = create_activity_generator(network=False, cpu=False, disk=False) - mock__get_sibling_processes([activity_generator.get_pid()]) - - with activity_monitor.DiskUsageMonitor( - 0.5, read_usage_threshold=0, write_usage_threshold=0 - ) as disk_usage_monitor: - async for attempt in AsyncRetrying( - stop=stop_after_delay(5), wait=wait_fixed(0.1), reraise=True - ): - with attempt: - read_bytes = disk_usage_monitor.total_bytes_read - write_bytes = disk_usage_monitor.total_bytes_write - assert read_bytes == 0 - assert write_bytes == 0 - assert disk_usage_monitor.is_busy is False - - -async def test_disk_usage_monitor_still_busy( - socket_server: None, - mock__get_sibling_processes: Callable[[list[int]], list[psutil.Process]], - create_activity_generator: Callable[[bool, bool, bool], _ActivityGenerator], -): - activity_generator = create_activity_generator(network=False, cpu=False, disk=True) - mock__get_sibling_processes([activity_generator.get_pid()]) - - with activity_monitor.DiskUsageMonitor( - 0.5, read_usage_threshold=0, write_usage_threshold=0 - ) as disk_usage_monitor: - # wait for monitor to trigger - await asyncio.sleep(1) - write_bytes = disk_usage_monitor.total_bytes_write - # NOTE: due to os disk cache reading is not reliable not testing it - assert write_bytes > 0 - - # must still result busy - assert disk_usage_monitor.is_busy is True - - -@pytest.fixture -def mock_no_network_activity(mocker: MockFixture) -> None: - mocker.patch( - "activity_monitor.NetworkUsageMonitor._sample_total_network_usage", - side_effect=lambda: (time.time(), 0, 0), - ) - - -async def test_network_usage_monitor_not_busy( - mock_no_network_activity: None, - socket_server: None, - mock__get_sibling_processes: Callable[[list[int]], list[psutil.Process]], - create_activity_generator: Callable[[bool, bool, bool], _ActivityGenerator], -): - activity_generator = create_activity_generator(network=False, cpu=False, disk=False) - mock__get_sibling_processes([activity_generator.get_pid()]) - - with activity_monitor.NetworkUsageMonitor( - 0.5, received_usage_threshold=0, sent_usage_threshold=0 - ) as network_usage_monitor: - async for attempt in AsyncRetrying( - stop=stop_after_delay(5), wait=wait_fixed(0.1), reraise=True - ): - with attempt: - assert network_usage_monitor.bytes_received == 0 - assert network_usage_monitor.bytes_sent == 0 - assert network_usage_monitor.is_busy is False - - -@pytest.fixture -def mock_network_monitor_exclude_interfaces(mocker: MockFixture) -> None: - mocker.patch("activity_monitor.NetworkUsageMonitor._EXCLUDE_INTERFACES", new=set()) - assert activity_monitor.NetworkUsageMonitor._EXCLUDE_INTERFACES == set() - - -async def test_network_usage_monitor_still_busy( - mock_network_monitor_exclude_interfaces: None, - socket_server: None, - mock__get_sibling_processes: Callable[[list[int]], list[psutil.Process]], - create_activity_generator: Callable[[bool, bool, bool], _ActivityGenerator], -): - activity_generator = create_activity_generator(network=True, cpu=False, disk=False) - mock__get_sibling_processes([activity_generator.get_pid()]) - - with activity_monitor.NetworkUsageMonitor( - 0.5, received_usage_threshold=0, sent_usage_threshold=0 - ) as network_usage_monitor: - # wait for monitor to trigger - await asyncio.sleep(1) - - assert network_usage_monitor.bytes_received > 0 - assert network_usage_monitor.bytes_sent > 0 - assert network_usage_monitor.is_busy is True - - -@pytest.fixture -def mock_jupyter_kernel_monitor(are_kernels_busy: bool) -> Iterable[None]: - with requests_mock.Mocker(real_http=True) as m: - m.get("http://localhost:8888/api/kernels", text=json.dumps([{"id": "atest1"}])) - m.get( - "http://localhost:8888/api/kernels/atest1", - text=json.dumps( - {"execution_state": "running" if are_kernels_busy else "idle"} - ), - ) - yield - - -@pytest.mark.parametrize("are_kernels_busy", [True, False]) -async def test_jupyter_kernel_monitor( - mock_jupyter_kernel_monitor: None, are_kernels_busy: bool -): - kernel_monitor = activity_monitor.JupyterKernelMonitor(1) - kernel_monitor._update_kernels_activity() - assert kernel_monitor.are_kernels_busy is are_kernels_busy - - -@pytest_asyncio.fixture -async def server_url() -> str: - return f"http://localhost:{activity_monitor.LISTEN_PORT}" - - -@pytest_asyncio.fixture -async def http_server(mock_jupyter_kernel_monitor: None, server_url: str) -> None: - server = activity_monitor.make_server(activity_monitor.LISTEN_PORT) - - def _run_server_worker() -> None: - server.serve_forever() - - thread = threading.Thread(target=_run_server_worker, daemon=True) - thread.start() - - # ensure server is running - async for attempt in AsyncRetrying( - stop=stop_after_delay(3), wait=wait_fixed(0.1), reraise=True - ): - with attempt: - result = requests.get(f"{server_url}/", timeout=1) - assert result.status_code == 200, result.text - - yield None - - server.shutdown() - server.server_close() - - with pytest.raises(requests.exceptions.RequestException): - requests.get(f"{server_url}/", timeout=1) - - -@pytest.mark.parametrize("are_kernels_busy", [False]) -async def test_http_server_ok(http_server: None, server_url: str): - result = requests.get(f"{server_url}/", timeout=5) - assert result.status_code == 200 - - -_BIG_THRESHOLD: Final[int] = int(1e10) - - -@pytest.fixture -def mock_activity_manager_config(mocker: MockFixture) -> None: - mocker.patch("activity_monitor.CHECK_INTERVAL_S", 1) - mocker.patch("activity_monitor.KERNEL_CHECK_INTERVAL_S", 1) - - mocker.patch( - "activity_monitor.BUSY_USAGE_THRESHOLD_NETWORK_RECEIVED", _BIG_THRESHOLD - ) - mocker.patch("activity_monitor.BUSY_USAGE_THRESHOLD_NETWORK_SENT", _BIG_THRESHOLD) - - -@pytest.mark.parametrize("are_kernels_busy", [False]) -async def test_activity_monitor_becomes_not_busy( - mock_activity_manager_config: None, - socket_server: None, - mock__get_sibling_processes: Callable[[list[int]], list[psutil.Process]], - create_activity_generator: Callable[[bool, bool, bool], _ActivityGenerator], - http_server: None, - server_url: str, -): - activity_generator = create_activity_generator(network=False, cpu=False, disk=False) - mock__get_sibling_processes([activity_generator.get_pid()]) - - async for attempt in AsyncRetrying( - stop=stop_after_delay(10), wait=wait_fixed(0.1), reraise=True - ): - with attempt: - # check that all become not busy - result = requests.get(f"{server_url}/debug", timeout=5) - assert result.status_code == 200 - debug_response = result.json() - assert debug_response["cpu_usage"]["is_busy"] is False - assert debug_response["disk_usage"]["is_busy"] is False - assert debug_response["kernel_monitor"]["is_busy"] is False - assert debug_response["network_usage"]["is_busy"] is False - - result = requests.get(f"{server_url}/", timeout=2) - assert result.status_code == 200 - response = result.json() - assert response["seconds_inactive"] > 0 From 9e551388da31f53b91c84876c8b3a1ac990606a6 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Tue, 2 Apr 2024 10:37:04 +0200 Subject: [PATCH 47/52] remove unused --- .github/workflows/check-image.yml | 2 -- 1 file changed, 2 deletions(-) diff --git a/.github/workflows/check-image.yml b/.github/workflows/check-image.yml index 71a8e44..168acd3 100644 --- a/.github/workflows/check-image.yml +++ b/.github/workflows/check-image.yml @@ -8,8 +8,6 @@ jobs: steps: - name: Checkout repo content uses: actions/checkout@v2 - - name: run tests - run: ./scripts/ci/run_tests.sh - name: ooil version uses: docker://itisfoundation/ci-service-integration-library:v1.0.3-dev-10 with: From c188c2a5d9281b0e4c43b540b6ad1cca716e0de0 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Tue, 2 Apr 2024 10:37:40 +0200 Subject: [PATCH 48/52] added todo --- Dockerfile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Dockerfile b/Dockerfile index 861839e..d592037 100644 --- a/Dockerfile +++ b/Dockerfile @@ -102,6 +102,8 @@ ENV ACTIVITY_MONITOR_BUSY_THRESHOLD_DISK_WRITE_BPS=0 ENV ACTIVITY_MONITOR_BUSY_THRESHOLD_NETWORK_RECEIVE_BPS=1024 ENV ACTIVITY_MONITOR_BUSY_THRESHOLD_NETWORK_SENT_BPS=1024 +# TODO: above values require some tewaking otherwise this will always be considered in use + # install service activity monitor RUN apt-get update && \ apt-get install -y curl && \ From f67ba5455410e6538253ec388ff13add363cd97d Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Fri, 5 Apr 2024 16:16:43 +0200 Subject: [PATCH 49/52] upgrade build tooling --- .github/workflows/check-image.yml | 6 +++--- Makefile | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/check-image.yml b/.github/workflows/check-image.yml index 168acd3..314c38c 100644 --- a/.github/workflows/check-image.yml +++ b/.github/workflows/check-image.yml @@ -9,14 +9,14 @@ jobs: - name: Checkout repo content uses: actions/checkout@v2 - name: ooil version - uses: docker://itisfoundation/ci-service-integration-library:v1.0.3-dev-10 + uses: docker://itisfoundation/ci-service-integration-library:v1.0.4 with: args: ooil --version - name: Assemble docker compose spec - uses: docker://itisfoundation/ci-service-integration-library:v1.0.3-dev-10 + uses: docker://itisfoundation/ci-service-integration-library:v1.0.4 with: args: ooil compose - name: Build all images if multiple - uses: docker://itisfoundation/ci-service-integration-library:v1.0.3-dev-10 + uses: docker://itisfoundation/ci-service-integration-library:v1.0.4 with: args: docker compose build diff --git a/Makefile b/Makefile index 5402f42..2e8ff12 100644 --- a/Makefile +++ b/Makefile @@ -36,7 +36,7 @@ define _bumpversion # upgrades as $(subst $(1),,$@) version, commits and tags @docker run -it --rm -v $(PWD):/${DOCKER_IMAGE_NAME} \ -u $(shell id -u):$(shell id -g) \ - itisfoundation/ci-service-integration-library:v1.0.3-dev-10 \ + itisfoundation/ci-service-integration-library:v1.0.4 \ sh -c "cd /${DOCKER_IMAGE_NAME} && bump2version --verbose --list --config-file $(1) $(subst $(2),,$@)" endef @@ -50,7 +50,7 @@ version-patch version-minor version-major: .bumpversion.cfg ## increases service compose-spec: ## runs ooil to assemble the docker-compose.yml file @docker run -it --rm -v $(PWD):/${DOCKER_IMAGE_NAME} \ -u $(shell id -u):$(shell id -g) \ - itisfoundation/ci-service-integration-library:v1.0.3-dev-10 \ + itisfoundation/ci-service-integration-library:v1.0.4 \ sh -c "cd /${DOCKER_IMAGE_NAME} && ooil compose" build: | compose-spec ## build docker image From 72cb231852cc771f5f3b8f7814863f02ef8df699 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Mon, 8 Apr 2024 11:32:11 +0200 Subject: [PATCH 50/52] math reacts to inactivity --- Dockerfile | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/Dockerfile b/Dockerfile index d592037..1c1c156 100644 --- a/Dockerfile +++ b/Dockerfile @@ -93,7 +93,7 @@ ENV JP_LSP_VIRTUAL_DIR="/home/${NB_USER}/.virtual_documents" COPY --chown=$NB_UID:$NB_GID docker /docker # install service activity monitor -ARG ACTIVITY_MONITOR_VERSION=v0.0.1 +ARG ACTIVITY_MONITOR_VERSION=v0.0.3 # Detection thresholds for application ENV ACTIVITY_MONITOR_BUSY_THRESHOLD_CPU_PERCENT=0.5 @@ -102,8 +102,6 @@ ENV ACTIVITY_MONITOR_BUSY_THRESHOLD_DISK_WRITE_BPS=0 ENV ACTIVITY_MONITOR_BUSY_THRESHOLD_NETWORK_RECEIVE_BPS=1024 ENV ACTIVITY_MONITOR_BUSY_THRESHOLD_NETWORK_SENT_BPS=1024 -# TODO: above values require some tewaking otherwise this will always be considered in use - # install service activity monitor RUN apt-get update && \ apt-get install -y curl && \ From 350fa7dbe6f7e96b2411e73a180289e7822b17f1 Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Mon, 8 Apr 2024 11:44:35 +0200 Subject: [PATCH 51/52] remove unused --- Makefile | 1 - 1 file changed, 1 deletion(-) diff --git a/Makefile b/Makefile index 2e8ff12..46c439e 100644 --- a/Makefile +++ b/Makefile @@ -29,7 +29,6 @@ devenv: .venv ## create a python virtual environment with tools to dev, run and requirements: devenv ## runs pip-tools to build requirements.txt that will be installed in the JupyterLab # freezes requirements pip-compile kernels/python-maths/requirements.in --resolver=backtracking --output-file kernels/python-maths/requirements.txt - pip-compile requirements/test.in --resolver=backtracking --output-file requirements/test.txt # Builds new service version ---------------------------------------------------------------------------- define _bumpversion From c021506509894ee87511049a11a29d2b77d4f6aa Mon Sep 17 00:00:00 2001 From: Andrei Neagu Date: Mon, 8 Apr 2024 13:21:44 +0200 Subject: [PATCH 52/52] bump service version as major --- .bumpversion.cfg | 2 +- .osparc/jupyter-math/metadata.yml | 2 +- Makefile | 2 +- docker-compose-local.yml | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.bumpversion.cfg b/.bumpversion.cfg index 040bfc9..7d423a5 100644 --- a/.bumpversion.cfg +++ b/.bumpversion.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 2.0.10 +current_version = 3.0.0 commit = False message = service version: {current_version} → {new_version} tag = False diff --git a/.osparc/jupyter-math/metadata.yml b/.osparc/jupyter-math/metadata.yml index b1d70ce..d89bb6a 100644 --- a/.osparc/jupyter-math/metadata.yml +++ b/.osparc/jupyter-math/metadata.yml @@ -9,7 +9,7 @@ description: " key: simcore/services/dynamic/jupyter-math -version: 2.0.10 +version: 3.0.0 integration-version: 2.0.0 type: dynamic authors: diff --git a/Makefile b/Makefile index 46c439e..aff600c 100644 --- a/Makefile +++ b/Makefile @@ -4,7 +4,7 @@ SHELL = /bin/sh .DEFAULT_GOAL := help export DOCKER_IMAGE_NAME ?= jupyter-math -export DOCKER_IMAGE_TAG ?= 2.0.10 +export DOCKER_IMAGE_TAG ?= 3.0.0 # PYTHON ENVIRON --------------------------------------------------------------------------------------- diff --git a/docker-compose-local.yml b/docker-compose-local.yml index 5e6a377..6bc8810 100644 --- a/docker-compose-local.yml +++ b/docker-compose-local.yml @@ -1,7 +1,7 @@ version: '3.7' services: jupyter-math: - image: simcore/services/dynamic/jupyter-math:2.0.10 + image: simcore/services/dynamic/jupyter-math:3.0.0 ports: - "8888:8888" environment: