Skip to content

Commit

Permalink
adding plain TPM plugin as well, some README updates
Browse files Browse the repository at this point in the history
  • Loading branch information
mheese committed Jun 15, 2023
1 parent 9e5d9ea commit bd7dfc8
Show file tree
Hide file tree
Showing 5 changed files with 310 additions and 19 deletions.
19 changes: 17 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
# k8s-tpm-device-plugin

[![License: Apache 2.0](https://img.shields.io/badge/license-Apache%202-blue)](https://www.apache.org/licenses/LICENSE-2.0)

This is a Kubernetes device plugin to make TPM devices accessible from Kubernetes pods without the need to run pods in privileged mode.
The initial goal for this plugin was to enable the [rust keylime agent](https://github.com/keylime/rust-keylime/) to run on Kubernetes.
The initial goal for this plugin was to enable the [rust keylime agent](https://github.com/keylime/rust-keylime/) to run on Kubernetes (without the need of privileged mode).

## Overview

Expand All @@ -17,6 +19,19 @@ This value is configurable and can be overwritten at installation time with for
By default up to 64 pods can gain access to the `/dev/tpmrm0` device on a host.
Note that this number is totally arbitrary, and can unfortunately not be handled differently because of the way how devices are allocated by the Kubernetes device manager.

## Requirements

The following requirements must be met to run the TPM device plugin:

- obviously, it only makes sense to run this device plugin on nodes which offer a TPM device (duh) - use a custom `nodeSelector` for the `DaemonSet` if you need to be selective
- Kubernetes 1.26 is a requirement at this point because this is when the device manager went GA - I'm afraid that if you have device plugins deployed as a feature before and you want to use this plugin, you need to modify the helm chart in this repo to remove this requirement.

## Known Compatibility

So far the plugin has only been tested on vanilla Kubernetes clusters running on Fedora nodes.
It would be great to get confirmation contributions from folks who were successfully using this on some of the common cloud platforms.
Please open an issue and/or PR for it!

## Installation

The TPM device plugin must be deployed as a Kubernetes DaemonSet.
Expand All @@ -39,7 +54,7 @@ This is the preferred methodYou can request the `/dev/tpmrm0` device like the fo
githedgehog.com/tpmrm: 1
```
In edge cases, and when you truly need it, you can similarly request the `/dev/tpm0` device like this (_NOTE: not implemented yet!_):
In edge cases, and when you truly need it, you can similarly request the `/dev/tpm0` device like this:

```yaml
resources:
Expand Down
43 changes: 30 additions & 13 deletions cmd/k8s-tpm-device-plugin/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ import (
"syscall"

"go.githedgehog.com/k8s-tpm-device-plugin/internal/plugin"
"go.githedgehog.com/k8s-tpm-device-plugin/internal/plugin/tpm"
"go.githedgehog.com/k8s-tpm-device-plugin/internal/plugin/tpmrm"
"go.githedgehog.com/k8s-tpm-device-plugin/pkg/version"

Expand Down Expand Up @@ -163,13 +164,20 @@ func run(cliCtx *cli.Context, l *zap.Logger) error {
sigCh := make(chan os.Signal, 1)
signal.Notify(sigCh, syscall.SIGHUP, syscall.SIGINT, syscall.SIGTERM, syscall.SIGQUIT)

p, err := tpmrm.New(l, cliCtx.Uint("num-tpmrm-devices"), cliCtx.Bool("pass-tpm2tools-tcti-env-var"))
p1, err := tpmrm.New(l, cliCtx.Uint("num-tpmrm-devices"), cliCtx.Bool("pass-tpm2tools-tcti-env-var"))
if err != nil {
return fmt.Errorf("TPM Device plugin create: %w", err)
return fmt.Errorf("tpmrm: device plugin create: %w", err)
}
p2, err := tpm.New(l, cliCtx.Bool("pass-tpm2tools-tcti-env-var"))
if err != nil {
return fmt.Errorf("tpm: device plugin create: %w", err)
}
// start plugin
if err := p.Start(ctx); err != nil {
return fmt.Errorf("TPM Device plugin failed to start on startup: %w", err)
if err := p1.Start(ctx); err != nil {
return fmt.Errorf("%s: device plugin failed to start on startup: %w", p1.Name(), err)
}
if err := p2.Start(ctx); err != nil {
return fmt.Errorf("%s: device plugin failed to start on startup: %w", p2.Name(), err)
}

runLoop:
Expand All @@ -180,7 +188,7 @@ runLoop:
l.Debug("fsnotify event", zap.Reflect("event", event))
if event.Name == pluginapi.KubeletSocket && event.Op&fsnotify.Create == fsnotify.Create {
l.Info("fsnotifiy: kubelet socket created, restarting...", zap.String("kubeletSocket", pluginapi.KubeletSocket))
if err := restart(ctx, p); err != nil {
if err := restart(ctx, p1, p2); err != nil {
return err
}
}
Expand All @@ -192,7 +200,7 @@ runLoop:
switch s {
case syscall.SIGHUP:
l.Info("SIGHUP signal received, restarting...")
if err := restart(ctx, p); err != nil {
if err := restart(ctx, p1, p2); err != nil {
return err
}
default:
Expand All @@ -203,19 +211,28 @@ runLoop:
}

// stop plugin on regular shutdown
if err := p.Stop(ctx); err != nil {
return fmt.Errorf("failed to stop TPM device plugin on shutdown: %w", err)
if err := p1.Stop(ctx); err != nil {
return fmt.Errorf("%s: failed to stop device plugin on shutdown: %w", p1.Name(), err)
}
if err := p2.Stop(ctx); err != nil {
return fmt.Errorf("%s: failed to stop device plugin on shutdown: %w", p2.Name(), err)
}

return nil
}

func restart(ctx context.Context, p plugin.Interface) error {
if err := p.Stop(ctx); err != nil {
return fmt.Errorf("failed to stop TPM device plugin on restart: %w", err)
func restart(ctx context.Context, p1, p2 plugin.Interface) error {
if err := p1.Stop(ctx); err != nil {
return fmt.Errorf("%s: failed to stop device plugin on restart: %w", p1.Name(), err)
}
if err := p2.Stop(ctx); err != nil {
return fmt.Errorf("%s: failed to stop device plugin on restart: %w", p2.Name(), err)
}
if err := p1.Start(ctx); err != nil {
return fmt.Errorf("%s: Device plugin failed to start on restart: %w", p1.Name(), err)
}
if err := p.Start(ctx); err != nil {
return fmt.Errorf("TPM Device plugin failed to start on restart: %w", err)
if err := p2.Start(ctx); err != nil {
return fmt.Errorf("%s: Device plugin failed to start on restart: %w", p2.Name(), err)
}
return nil
}
1 change: 1 addition & 0 deletions internal/plugin/interface.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ package plugin
import "context"

type Interface interface {
Name() string
Start(context.Context) error
Stop(context.Context) error
}
255 changes: 255 additions & 0 deletions internal/plugin/tpm/plugin.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,255 @@
/*
Copyright 2023 Hedgehog SONiC Foundation
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package tpm

import (
"context"
"errors"
"fmt"
"net"
"os"
"path/filepath"
"time"

"go.uber.org/zap"
"google.golang.org/grpc"
"google.golang.org/grpc/credentials/insecure"

"go.githedgehog.com/k8s-tpm-device-plugin/internal/plugin"

pluginapi "k8s.io/kubelet/pkg/apis/deviceplugin/v1beta1"
)

const (
tpmID = "tpm0"
tpmSocketName = "hh-tpm.sock"
)

var (
connectionTimeout = time.Second * 5
registerTimeout = time.Second * 30
errUnimplmented = errors.New("plugin does not implement this method")
)

func UnimplementedError(str string) error {
return fmt.Errorf("%w: %s", errUnimplmented, str)
}

type tpmDevicePlugin struct {
l *zap.Logger
tctiEnvVar bool
socketPath string
server *grpc.Server
stopCh chan struct{}
}

var _ plugin.Interface = &tpmDevicePlugin{}
var _ pluginapi.DevicePluginServer = &tpmDevicePlugin{}

func New(l *zap.Logger, tctiEnvVar bool) (plugin.Interface, error) {
return &tpmDevicePlugin{
l: l.With(zap.String("plugin", "tpm")),
tctiEnvVar: tctiEnvVar,
socketPath: filepath.Join(pluginapi.DevicePluginPath, tpmSocketName),
// will be initialized by Start()
server: nil,
stopCh: nil,
}, nil
}

func (p *tpmDevicePlugin) init() {
p.server = grpc.NewServer()
p.stopCh = make(chan struct{})
}

func (p *tpmDevicePlugin) cleanup() {
close(p.stopCh)
p.server = nil
p.stopCh = nil
}

func (p *tpmDevicePlugin) Name() string {
return "tpm"
}

// Start implements Interface
func (p *tpmDevicePlugin) Start(ctx context.Context) error {
// caller safeguard
if p == nil {
return nil
}
p.init()

if err := p.Serve(ctx); err != nil {
return err
}
p.l.Info("TPM Device Plugin server started")
if err := p.Register(ctx); err != nil {
return err
}
p.l.Info("TPM Device Plugin registered with kubelet")

return nil
}

// Stop implements Interface
func (p *tpmDevicePlugin) Stop(context.Context) error {
// caller safeguard
if p == nil || p.server == nil {
return nil
}
p.l.Info("Stopping gRPC server", zap.String("socket", p.socketPath))
p.server.Stop()
if err := os.Remove(p.socketPath); err != nil && !os.IsNotExist(err) {
return fmt.Errorf("removing socket path %s: %w", p.socketPath, err)
}
p.cleanup()
return nil
}

func (p *tpmDevicePlugin) Serve(ctx context.Context) error {
// listen on unix socket
// NOTE: no need to close the listener as the gRPC methods close the listener automatically
if err := os.Remove(p.socketPath); err != nil && !os.IsNotExist(err) {
return fmt.Errorf("removing socket path %s: %w", p.socketPath, err)
}
var lc net.ListenConfig
l, err := lc.Listen(ctx, "unix", p.socketPath)
if err != nil {
return fmt.Errorf("listening on unix socket %s: %w", p.socketPath, err)
}
p.l.Info("Listening on unix socket for gRPC server now", zap.String("socket", p.socketPath))

// register the device plugin server API with the grpc server
pluginapi.RegisterDevicePluginServer(p.server, p)

// now run the gRPC server
go func() {
for {
p.l.Info("Starting gRPC server now...")
err := p.server.Serve(l)
// err is nil when Stop() or GracefulStop() were called
if err == nil {
p.l.Info("Stopped gRPC server")
return
}
p.l.Error("gRPC server crashed", zap.Error(err))
}
}()

// connect to the gRPC server in blocking mode to ensure it is up before we return here
subCtx, cancel := context.WithTimeout(ctx, connectionTimeout)
defer cancel()
conn, err := grpc.DialContext(subCtx, "unix:"+p.socketPath, grpc.WithTransportCredentials(insecure.NewCredentials()), grpc.WithBlock())
if err != nil {
return fmt.Errorf("gRPC server did not start within timeout %v: %w", connectionTimeout, err)
}
conn.Close() // nolint: errcheck

p.l.Info("Started gRPC server")
return nil
}

func (p *tpmDevicePlugin) Register(ctx context.Context) error {
// connect to kubelet socket
connCtx, connCancel := context.WithTimeout(ctx, connectionTimeout)
defer connCancel()
conn, err := grpc.DialContext(connCtx, "unix:"+pluginapi.KubeletSocket, grpc.WithTransportCredentials(insecure.NewCredentials()), grpc.WithBlock())
if err != nil {
return fmt.Errorf("connecting to kubelet socket at %s: %w", pluginapi.KubeletSocket, err)
}

client := pluginapi.NewRegistrationClient(conn)

regCtx, regCancel := context.WithTimeout(ctx, registerTimeout)
defer regCancel()
if _, err := client.Register(regCtx, &pluginapi.RegisterRequest{
Version: pluginapi.Version,
Endpoint: tpmSocketName,
ResourceName: "githedgehog.com/tpm",
Options: &pluginapi.DevicePluginOptions{
PreStartRequired: false,
GetPreferredAllocationAvailable: false,
},
}); err != nil {
return fmt.Errorf("gRPC register call: %w", err)
}

return nil
}

// Allocate implements v1beta1.DevicePluginServer
func (p *tpmDevicePlugin) Allocate(_ context.Context, allocateRequest *pluginapi.AllocateRequest) (*pluginapi.AllocateResponse, error) {
p.l.Debug("Allocate() call", zap.Reflect("allocateRequest", allocateRequest))
resp := &pluginapi.AllocateResponse{}
for _, req := range allocateRequest.ContainerRequests {
p.l.Debug("allocate ContainerRequest", zap.Reflect("creq", req))
var envs map[string]string
if p.tctiEnvVar {
envs = map[string]string{
"TPM2TOOLS_TCTI": "device:/dev/tpm0",
}
}
cresp := &pluginapi.ContainerAllocateResponse{
Envs: envs,
Devices: []*pluginapi.DeviceSpec{
{
ContainerPath: "/dev/tpm0",
HostPath: "/dev/tpm0",
Permissions: "rwm",
},
},
}
resp.ContainerResponses = append(resp.ContainerResponses, cresp)
}
return resp, nil
}

// GetDevicePluginOptions implements v1beta1.DevicePluginServer
func (*tpmDevicePlugin) GetDevicePluginOptions(context.Context, *pluginapi.Empty) (*pluginapi.DevicePluginOptions, error) {
return &pluginapi.DevicePluginOptions{
PreStartRequired: false,
GetPreferredAllocationAvailable: false,
}, nil
}

// GetPreferredAllocation implements v1beta1.DevicePluginServer
func (p *tpmDevicePlugin) GetPreferredAllocation(_ context.Context, _ *pluginapi.PreferredAllocationRequest) (*pluginapi.PreferredAllocationResponse, error) {
p.l.Debug("GetPreferredAllocation() is unimplemented for this plugin")
return nil, UnimplementedError("GetPreferredAllocation")
}

// ListAndWatch implements v1beta1.DevicePluginServer
func (p *tpmDevicePlugin) ListAndWatch(_ *pluginapi.Empty, s pluginapi.DevicePlugin_ListAndWatchServer) error {
s.Send(&pluginapi.ListAndWatchResponse{Devices: []*pluginapi.Device{
{
ID: tpmID,
Health: pluginapi.Healthy,
},
}})

// TODO: there is nothing we are doing at the moment to check if the TPM is healthy or not
<-p.stopCh

return nil
}

// PreStartContainer implements v1beta1.DevicePluginServer
func (p *tpmDevicePlugin) PreStartContainer(context.Context, *pluginapi.PreStartContainerRequest) (*pluginapi.PreStartContainerResponse, error) {
p.l.Debug("PreStartContainer() is unimplemented for this plugin")
return &pluginapi.PreStartContainerResponse{}, nil
}
Loading

0 comments on commit bd7dfc8

Please sign in to comment.