diff --git a/.github/ISSUE_TEMPLATE/bug_report.yaml b/.github/ISSUE_TEMPLATE/bug_report.yaml new file mode 100644 index 0000000..be847d5 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/bug_report.yaml @@ -0,0 +1,57 @@ +name: Bug Report +description: File a bug report +labels: ["Type: Bug", "Status: Triage"] +body: + - type: markdown + attributes: + value: > + Thanks for taking the time to fill out this bug report! Before submitting your issue, please make + sure you are using the latest version of the charm. If not, please switch to the latest version of this charm + before posting your report to make sure it's not already solved. + - type: textarea + id: bug-description + attributes: + label: Bug Description + description: > + Provide a description of the issue you are facing. If applicable, add screenshots to help explain the problem. + validations: + required: true + - type: textarea + id: reproduction + attributes: + label: To Reproduce + description: > + Please provide a step-by-step instruction of how to reproduce the behavior. + placeholder: | + 1. `juju deploy ...` + 2. `juju relate ...` + 3. `juju status --relations` + validations: + required: true + - type: textarea + id: environment + attributes: + label: Environment + description: > + We need to know a bit more about the context in which you run the charm. + - Are you running Juju locally, on lxd, in multipass or on some other platform? + - What track and channel you deployed the charm from (ie. `latest/edge` or similar). + - Version of any applicable components, like the juju snap, the model controller, lxd, microk8s, and/or multipass. + validations: + required: true + - type: textarea + id: logs + attributes: + label: Relevant log output + description: > + Please copy and paste any relevant log output. This will be automatically formatted into code, so no need for backticks. + Fetch the logs using `juju debug-log --replay`. Additional details on how to retrieve logs are available in the juju + documentation at https://juju.is/docs/olm/juju-logs. + render: shell + validations: + required: true + - type: textarea + id: additional-context + attributes: + label: Additional context + diff --git a/.github/ISSUE_TEMPLATE/enhancement_proposal.yaml b/.github/ISSUE_TEMPLATE/enhancement_proposal.yaml new file mode 100644 index 0000000..9587b38 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/enhancement_proposal.yaml @@ -0,0 +1,17 @@ +name: Enhancement Proposal +description: File an enhancement proposal +labels: ["Type: Enhancement", "Status: Triage"] +body: + - type: markdown + attributes: + value: > + Thank you for taking the time to fill out this enhancement proposal! Before submitting your proposal, please + make sure there isn't a pre-existing similar proposal. If there is, please join that discussion instead. + - type: textarea + id: enhancement-proposal + attributes: + label: Enhancement Proposal + description: > + Describe the enhancement you would like to see in as much detail as needed. + validations: + required: true diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md new file mode 100644 index 0000000..fbc730e --- /dev/null +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -0,0 +1,23 @@ +## Description + +> Provide a description of the purpose of this pull request, as well as its +motivation and context. Is it a new feature? A bug fix? Does it address an existing issue? + +## How was the code tested? + +> Describe the conditions under which the code has been tested. +> * Did you run the defined integration and units under `tests/`? +> * Did you write new tests? Where are they located in the repository? +> * Which undercloud did you use to perform the tests? LXD, vSphere, AWS, etc. +> * What operating system did you test the charms on? Ubuntu 22.04, Ubuntu 20.04, CentOS 7, etc. + +## Related issues and/or tasks + +> Link any related issues or project board tasks to this pull request. + +## Checklist + +- [ ] I am the author of these changes, or I have the rights to submit them. +- [ ] I have added the relevant changes to the README and/or documentation. +- [ ] I have self reviewed my own code. +- [ ] All requested changes and/or review comments have been resolved. diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml new file mode 100644 index 0000000..9823807 --- /dev/null +++ b/.github/workflows/ci.yaml @@ -0,0 +1,41 @@ +# Copyright 2023 Canonical Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: nvidia charm tests +on: + workflow_call: + pull_request: + +jobs: + inclusive-naming-check: + name: Inclusive naming check + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v3 + - name: Run tests + uses: get-woke/woke-action@v0 + with: + fail-on-error: true + + lint: + name: Lint + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v3 + - name: Install dependencies + run: python3 -m pip install tox + - name: Run linters + run: tox -e lint diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml new file mode 100644 index 0000000..dc0a8a2 --- /dev/null +++ b/.github/workflows/release.yaml @@ -0,0 +1,42 @@ +# Copyright 2023 Canonical Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: Release to latest/edge + +on: + push: + branches: + - main + +jobs: + ci-tests: + uses: ./.github/workflows/ci.yaml + + release-to-charmhub: + name: Release to CharmHub + needs: + - ci-tests + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v3 + - name: Select charmhub channel + uses: canonical/charming-actions/channel@2.2.0 + id: channel + - name: Upload charm to charmhub + uses: canonical/charming-actions/upload-charm@2.2.0 + with: + credentials: "${{ secrets.CHARMCRAFT_AUTH }}" + github-token: "${{ secrets.GITHUB_TOKEN }}" + channel: "${{ steps.channel.outputs.name }}" diff --git a/src/nvidia_ops_manager.py b/src/nvidia_ops_manager.py index 9603a06..4290af8 100644 --- a/src/nvidia_ops_manager.py +++ b/src/nvidia_ops_manager.py @@ -12,7 +12,7 @@ def os_release(): """Return /etc/os-release as a dict.""" os_release_data = Path("/etc/os-release").read_text() os_release_list = [item.split("=") for item in os_release_data.strip().split("\n")] - return {k: v for k, v in os_release_list} + return {k: v.strip('"') for k, v in os_release_list} class NvidiaDriverOpsError(Exception): @@ -67,8 +67,8 @@ class NvidiaOpsManagerUbuntu(NvidiaOpsManagerBase): def __init__(self): self._id = self.OS_RELEASE["ID"] - self._os = self.OS_RELEASE["VERSION_ID"].strip(".") - self._distribution = f"{self._id}{self._os}" + self._version_id = self.OS_RELEASE["VERSION_ID"].replace(".", "") + self._distribution = f"{self._id}{self._version_id}" self._cuda_keyring_url = ( "https://developer.download.nvidia.com/compute/cuda/" f"repos/{self._distribution}/{self._arch}/cuda-keyring_1.0-1_all.deb" @@ -94,16 +94,16 @@ def _install_cuda_keyring(self) -> None: ) with tempfile.TemporaryDirectory() as tmpdir: - cuda_keyring_deb = f"{tmpdir}/cuda_keyring.deb" + cuda_keyring_deb = f"{tmpdir}/cuda-keyring.deb" Path(cuda_keyring_deb).write_bytes(r.content) try: run(["dpkg", "-i", cuda_keyring_deb]) except CalledProcessError: raise NvidiaDriverOpsError("Error installing cuda keyring .deb.") - try: - run(["apt-get", "update"]) - except CalledProcessError: - raise NvidiaDriverOpsError("Error running `apt-get update`.") + try: + run(["apt-get", "update"]) + except CalledProcessError: + raise NvidiaDriverOpsError("Error running `apt-get update`.") def _install_cuda_drivers(self) -> None: """Install the cuda drivers.""" @@ -139,11 +139,12 @@ def version(self) -> str: except CalledProcessError: raise NvidiaDriverOpsError("Error running `apt-cache policy cuda-drivers.") + version = "" for line in p.decode().strip().split("\n"): if "Installed" in line: version = line.split("Installed: ")[1] - else: - raise NvidiaDriverOpsError("Error locating cuda-drivers package version.") + if not version: + raise NvidiaDriverOpsError("Error locating cuda-drivers package version.") return version