diff --git a/.gitignore b/.gitignore index db922810..dc077217 100644 --- a/.gitignore +++ b/.gitignore @@ -7,6 +7,7 @@ *.d *.out got +gob libstorage.paw .site/ site/ diff --git a/drivers/storage/vmw/vmci/connection_types.h b/drivers/storage/vmw/vmci/connection_types.h new file mode 100644 index 00000000..e280be35 --- /dev/null +++ b/drivers/storage/vmw/vmci/connection_types.h @@ -0,0 +1,63 @@ +// Copyright 2016 VMware, Inc. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Shared info (magic, err. codes, etc) on vSocket command channel + +#ifndef _CONNECTION_TYPES_H_ +#define _CONNECTION_TYPES_H_ + +#define MAGIC 0xbadbeef + +// -1 always indicates failure +#define CONN_FAILURE (-1) + +// 0 is usually success. Note: sometimes we return socket FD on success +#define CONN_SUCCESS (0) + +// First non privileged port +#define START_NON_PRIVILEGED_PORT 1024 + +/* + * Check and set errno helper + * Useful when send/recv gets us less than we wanted, and we want to set errno + * for the caller to know about the protocol Issue + */ + #define CHECK_ERRNO(_ret) {if (_ret >= 0 && errno == 0) { errno = EBADMSG; }} + +/* + * This function acquires and returns address family for vSockets. + * On failure returns -1 an sets errno (if not set by VMCISock_GetAFValue ()) + * + * The address family for vSockets must be acquired, it is not static. + * The code opens and keeps FD to /dev/vsock to indicate to the kernel + * that VMCI driver is used by this process. + * Needs to be called once per process. + * is expected to be closed by process completion + */ +static inline int +vsock_get_family(void) +{ + static int af = -1; + + errno = 0; + if (af == -1) { // TODO: for multi-thread will need a lock. Issue #35 + af = VMCISock_GetAFValue(); + } + if (af == -1 && errno == 0) { + errno = EAFNOSUPPORT; // report "family not supported" upstairs + } + return af; +} + +#endif // _CONNECTION_TYPES_H_ diff --git a/drivers/storage/vmw/vmci/vmci.go b/drivers/storage/vmw/vmci/vmci.go new file mode 100644 index 00000000..a427f229 --- /dev/null +++ b/drivers/storage/vmw/vmci/vmci.go @@ -0,0 +1,174 @@ +// Copyright 2016 VMware, Inc. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// The default (ESX) implementation of the VmdkCmdRunner interface. +// This implementation sends synchronous commands to and receives responses +// from ESX. + +// +build linux darwin +// +build !libstorage_storage_driver libstorage_storage_driver_vmdk libstorage_storage_driver_photon + +package vmci + +import ( + "encoding/json" + "errors" + "fmt" + "os" + "sync" + "syscall" + "time" + "unsafe" + + log "github.com/Sirupsen/logrus" +) + +/* +#include "vmci_client.h" +*/ +import "C" + +// EsxVmdkCmd struct - empty , we use it only to implement VmdkCmdRunner +// interface +type EsxVmdkCmd struct { + Mtx *sync.Mutex // For serialization of Run comand/response +} + +const ( + commBackendName string = "vsocket" + maxRetryCount = 5 + // Server side understand protocol version. If you are changing + // client/server protocol we use over VMCI, PLEASE DO NOT FORGET TO CHANGE + // IT FOR SERVER in file ! + clientProtocolVersion = "2" +) + +// A request to be passed to ESX service +type requestToVmci struct { + Ops string `json:"cmd"` + Details VolumeInfo `json:"details"` + Version string `json:"version,omitempty"` +} + +// VolumeInfo we get about the volume from upstairs +type VolumeInfo struct { + Name string `json:"Name"` + Options map[string]string `json:"Opts,omitempty"` +} + +type vmciError struct { + Error string `json:",omitempty"` +} + +// EsxPort used to connect to ESX, passed in as command line param +var EsxPort = 1019 + +// Run command Guest VM requests on ESX via vmdkops_serv.py listening on +// vSocket +// * +// * For each request: +// * - Establishes a vSocket connection +// * - Sends json string up to ESX +// * - waits for reply and returns resulting JSON or an error +func (vmdkCmd EsxVmdkCmd) Run( + cmd string, name string, opts map[string]string) ([]byte, error) { + + vmdkCmd.Mtx.Lock() + defer vmdkCmd.Mtx.Unlock() + protocolVersion := os.Getenv("VDVS_TEST_PROTOCOL_VERSION") + log.Debugf("Run get request: version=%s", protocolVersion) + if protocolVersion == "" { + protocolVersion = clientProtocolVersion + } + jsonStr, err := json.Marshal(&requestToVmci{ + Ops: cmd, + Details: VolumeInfo{Name: name, Options: opts}, + Version: protocolVersion}) + if err != nil { + return nil, fmt.Errorf("Failed to marshal json: %v", err) + } + + cmdS := C.CString(string(jsonStr)) + defer C.free(unsafe.Pointer(cmdS)) + + beS := C.CString(commBackendName) + defer C.free(unsafe.Pointer(beS)) + + // Get the response data in json + ans := (*C.be_answer)(C.calloc(1, C.sizeof_struct_be_answer)) + defer C.free(unsafe.Pointer(ans)) + + var ret C.be_sock_status + for i := 0; i <= maxRetryCount; i++ { + ret, err = C.Vmci_GetReply(C.int(EsxPort), cmdS, beS, ans) + if ret == 0 { + // Received no error, exit loop. + // C.Vmci_GetReply indicates success/faulure by value. + // Cgo interface adds based on errno. We do not explicitly + // reset errno in our code. Still, we do not want a stale errno + // to confuse this code into thinking there was an error even when + // ret==0, so explicitly declare success on value only, and + break + } + + var msg string + if err != nil { + var errno syscall.Errno + errno = err.(syscall.Errno) + msg = fmt.Sprintf("Run '%s' failed: %v (errno=%d) - %s", + cmd, err, int(errno), C.GoString(&ans.errBuf[0])) + if i < maxRetryCount { + log.Warnf(msg + " Retrying...") + time.Sleep(time.Second * 1) + continue + } + if errno == syscall.ECONNRESET || errno == syscall.ETIMEDOUT { + msg += " Cannot communicate with ESX, please refer to the " + + "FAQ " + + "https://github.com/vmware/docker-volume-vsphere/wiki#faq" + } + } else { + msg = fmt.Sprintf("Internal issue: ret != 0 but errno is not "+ + "set. Cancelling operation - %s ", C.GoString(&ans.errBuf[0])) + } + + log.Warnf(msg) + return nil, errors.New(msg) + } + + response := []byte(C.GoString(ans.buf)) + C.Vmci_FreeBuf(ans) + + err = unmarshalError(response) + if err != nil && len(err.Error()) != 0 { + return nil, err + } + // There was no error, so return the slice containing the json response + return response, nil +} + +func unmarshalError(str []byte) error { + // Unmarshalling null always succeeds + if string(str) == "null" { + return nil + } + errStruct := vmciError{} + err := json.Unmarshal(str, &errStruct) + if err != nil { + // We didn't unmarshal an error, so there is no error ;) + return nil + } + // Return the unmarshaled error string as an `error` + return errors.New(errStruct.Error) +} diff --git a/drivers/storage/vmw/vmci/vmci_client.c b/drivers/storage/vmw/vmci/vmci_client.c new file mode 100644 index 00000000..b34fae11 --- /dev/null +++ b/drivers/storage/vmw/vmci/vmci_client.c @@ -0,0 +1,302 @@ +// Copyright 2016 VMware, Inc. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + + +// +// VMCI sockets communication - client side. +// +// Called mainly from Go code. +// +// API: Exposes only Vmci_GetReply. The call is blocking. +// +// +#include +#include +#include +#include +#include + +#include "vmci_client.h" +#include "vmci_sockets.h" +#include "connection_types.h" + +be_funcs backends[] = + { + { + VSOCKET_BE_NAME, "vSocket Communication Backend v0.1", vsock_init, + vsock_release, vsock_get_reply }, + { + DUMMY_BE_NAME, "Dummy Communication Backend", dummy_init, dummy_release, + dummy_get_reply }, + + { + 0 } }; + + +// Get backend by name +static be_funcs * +get_backend(const char *shortName) +{ + be_funcs *be = backends; + while (be && be->shortName && *be->shortName) { + if (strcmp(shortName, be->shortName) == 0) { + return be; + } + be++; + } + return NULL; +} + +// "dummy" interface implementation +// Used for manual testing mainly, +// to make sure data arrives to backend +//---------------------------------- +static be_sock_status +dummy_init(be_sock_id *id, int cid, int port) +{ + // printf connecting + printf("dummy_init: connected.\n"); + return CONN_SUCCESS; +} + +static void +dummy_release(be_sock_id *id) +{ + printf("dummy_release: released.\n"); +} + +static be_sock_status +dummy_get_reply(be_sock_id *id, be_request *r, be_answer* a) +{ + printf("dummy_get_reply: got request %s.\n", r->msg); + printf("dummy_get_reply: replying empty (for now).\n"); + a->buf = strdup("none"); + + return CONN_SUCCESS; +} + + +// vsocket interface implementation +//--------------------------------- + + + +// Create and connect VMCI socket. +// return CONN_SUCCESS (0) or CONN_FAILURE (-1) +static be_sock_status +vsock_init(be_sock_id *id, int cid, int port) +{ + static int round_robin = START_CLIENT_PORT; // Round robin client bind port + int ret; + int af; // family id + int sock; // socket id + + if ((af = vsock_get_family()) == -1) { + return CONN_FAILURE; + } + sock = socket(af, SOCK_STREAM, 0); + if (sock == -1) { + return CONN_FAILURE; + } + + id->sock_id = sock; + memset(&id->addr, 0, sizeof id->addr); + id->addr.svm_family = af; + id->addr.svm_cid = VMCISock_GetLocalCID(); + + int retryCount = 0; + + while (retryCount++ < BIND_RETRY_COUNT) { + id->addr.svm_port = round_robin; + if (round_robin == MAX_CLIENT_PORT) { + round_robin = START_CLIENT_PORT; + } else { + round_robin++; + } + + assert((round_robin >= START_CLIENT_PORT) && (round_robin <= MAX_CLIENT_PORT)); + + // Bind a port. If less than 1024 it insures the client is capable of + // binding a port lower than 1024 which is typically a root process or + // a process given capabilities by root. + ret = bind(sock, (const struct sockaddr *) &id->addr, sizeof id->addr); + if (ret == 0) { + break; + } + } + + if (ret != 0) { + int old_errno = errno; + vsock_release(id); + errno = old_errno; + return CONN_FAILURE; + } + + // Connect to the server. + memset(&id->addr, 0, sizeof id->addr); + id->addr.svm_family = af; + id->addr.svm_cid = cid; + id->addr.svm_port = port; + + ret = connect(sock, (const struct sockaddr *) &id->addr, sizeof id->addr); + if (ret != 0) { + int old_errno = errno; + vsock_release(id); + errno = old_errno; + return CONN_FAILURE; + } + + return CONN_SUCCESS; +} + +// +// Send request (r->msg) and wait for reply. +// returns 0 on success , -1 (or potentially errno) on error +// On success , allocates a->buf ( caller needs to free it) and placed reply there +// Expects r and a to be allocated by the caller. +// +// +static be_sock_status +vsock_get_reply(be_sock_id *s, be_request *r, be_answer* a) +{ + int ret; + uint32_t b; // smallish buffer + + // Try to send a message to the server. + b = MAGIC; + ret = send(s->sock_id, &b, sizeof b, 0); + if (ret == -1 || ret != sizeof b) { + CHECK_ERRNO(ret); + return CONN_FAILURE; + } + + ret = send(s->sock_id, &r->mlen, sizeof r->mlen, 0); + if (ret == -1 || ret != sizeof r->mlen) { + CHECK_ERRNO(ret); + return CONN_FAILURE; + } + + ret = send(s->sock_id, r->msg, r->mlen, 0); + if (ret == -1 || ret != r->mlen) { + CHECK_ERRNO(ret); + return CONN_FAILURE; + } + + // Now get the reply (blocking, wait on ESX-side execution): + // MAGIC: + b = 0; + ret = recv(s->sock_id, &b, sizeof b, 0); + if (ret == -1 || ret != sizeof b ) { + CHECK_ERRNO(ret); + snprintf(a->errBuf, ERR_BUF_LEN, "Failed to receive magic data: received %d expected %d bytes\n", + ret, sizeof b); + return CONN_FAILURE; + } + if (b != MAGIC) { + snprintf(a->errBuf, ERR_BUF_LEN, "Wrong magic: got 0x%x expected 0x%x\n", b, MAGIC); + errno = EBADMSG; + return CONN_FAILURE; + } + + // length + ret = recv(s->sock_id, &b, sizeof b, 0); + if (ret == -1 || ret != sizeof b) { + CHECK_ERRNO(ret); + snprintf(a->errBuf, ERR_BUF_LEN, "Failed to receive data len : ret %d (%s)\n", + ret, strerror(errno)); + return CONN_FAILURE; + } + + // Alloc the recv buffer, skip retries on mem. alloc error + a->buf = calloc(1, b); + if (!a->buf) { + snprintf(a->errBuf, ERR_BUF_LEN, "Failed to allocate memory len : %d\n", b); + errno = ENOMEM; + return CONN_FAILURE; + } + + ret = recv(s->sock_id, a->buf, b, 0); + if (ret == -1 || ret != b) { + free(a->buf); + a->buf = NULL; + CHECK_ERRNO(ret); + snprintf(a->errBuf, ERR_BUF_LEN, "Failed to receive message data: received %d expected %d\n", + ret, b); + return CONN_FAILURE; + } + return CONN_SUCCESS; +} + +// release socket and vmci info +static void +vsock_release(be_sock_id *id) +{ + close(id->sock_id); +} + +// +// Handle one request using BE interface +// Yes, we DO create and bind socket for each request - it's management +// so we can afford overhead, and it allows connection to be stateless. +// +static be_sock_status +host_request(be_funcs *be, be_request* req, be_answer* ans, int cid, int port) +{ + + int vmciFd; + int af; + be_sock_id id; + be_sock_status ret; + + if ((ret = be->init_sock(&id, cid, port)) != 0) { + return ret; + } + + ret = be->get_reply(&id, req, ans); + be->release_sock(&id); + return ret; +} + +// +// +// Entry point for vsocket requests. +// Returns NULL for success, -1 for err, and sets errno if needed +// is allocated upstairs +// +const be_sock_status +Vmci_GetReply(int port, const char* json_request, const char* be_name, + be_answer* ans) +{ + be_request req; + be_funcs *be = get_backend(be_name); + + if (be == NULL) { + errno = ENXIO; // reusing "no such device or adress" for wrong BE name + return CONN_FAILURE; + } + + req.mlen = strnlen(json_request, MAXBUF) + 1; + req.msg = json_request; + + return host_request(be, &req, ans, ESX_VMCI_CID, port); +} + +void +Vmci_FreeBuf(be_answer *ans) +{ + if (ans && ans->buf) { + free(ans->buf); + ans->buf = NULL; + } +} diff --git a/drivers/storage/vmw/vmci/vmci_client.h b/drivers/storage/vmw/vmci/vmci_client.h new file mode 100644 index 00000000..2103907a --- /dev/null +++ b/drivers/storage/vmw/vmci/vmci_client.h @@ -0,0 +1,155 @@ +// Copyright 2016 VMware, Inc. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + + +// +// VMCI sockets communication - client side. +// +// Called mainly from Go code. +// +// API: Exposes only Vmci_GetReply. The call is blocking. +// +// +#pragma once + +#include +#include +#include +#include +#include + +#include "vmci_sockets.h" +#include "connection_types.h" + +#define ERR_BUF_LEN 512 + +// operations status. 0 is OK +typedef int be_sock_status; + +// +// Booking structure for opened VMCI / vSocket +// +typedef struct { + int sock_id; // socket id for socket APIs + struct sockaddr_vm addr; // held here for bookkeeping and reporting +} be_sock_id; + +// +// Protocol message structure: request and reply +// + +typedef struct be_request { + uint32_t mlen; // length of message (including trailing \0) + const char *msg; // null-terminated immutable JSON string. +} be_request; + +#define MAXBUF 1024 * 1024 // Safety limit. We do not expect json string > 1M +#define MAX_CLIENT_PORT 1023 // Last privileged port +#define START_CLIENT_PORT 100 // Where to start client port + +// Retry entire range on bind failures +#define BIND_RETRY_COUNT (MAX_CLIENT_PORT - START_CLIENT_PORT) + +typedef struct be_answer { + char *buf; // response buffer + char errBuf[ERR_BUF_LEN]; // error response buffer +} be_answer; + +// +// Interface for communication to "command execution" server. +// +typedef struct be_funcs { + const char *shortName; // name of the interaface (key to access it) + const char *name; // longer explanation (human help) + + // init the channel, return status and ID + be_sock_status + (*init_sock)(be_sock_id *id, int cid, int port); + // release the channel - clean up + void + (*release_sock)(be_sock_id *id); + + // send a request and get reply - blocking + be_sock_status + (*get_reply)(be_sock_id *id, be_request *r, be_answer* a); +} be_funcs; + +// support communication interfaces +#define VSOCKET_BE_NAME "vsocket" // backend to communicate via vSocket +#define ESX_VMCI_CID 2 // ESX host VMCI CID ("address") +#define DUMMY_BE_NAME "dummy" // backend which only returns OK, for unit test + + +// Get backend by name +static be_funcs * +get_backend(const char *shortName); + +// "dummy" interface implementation +// Used for manual testing mainly, +// to make sure data arrives to backend +//---------------------------------- +static be_sock_status +dummy_init(be_sock_id *id, int cid, int port); + +static void +dummy_release(be_sock_id *id); + +static be_sock_status +dummy_get_reply(be_sock_id *id, be_request *r, be_answer* a); + + +// vsocket interface implementation +//--------------------------------- + + + +// Create and connect VMCI socket. +// return CONN_SUCCESS (0) or CONN_FAILURE (-1) +static be_sock_status +vsock_init(be_sock_id *id, int cid, int port); + +// +// Send request (r->msg) and wait for reply. +// returns 0 on success , -1 (or potentially errno) on error +// On success , allocates a->buf ( caller needs to free it) and placed reply there +// Expects r and a to be allocated by the caller. +// +// +static be_sock_status +vsock_get_reply(be_sock_id *s, be_request *r, be_answer* a); + +// release socket and vmci info +static void +vsock_release(be_sock_id *id); + +// +// Handle one request using BE interface +// Yes, we DO create and bind socket for each request - it's management +// so we can afford overhead, and it allows connection to be stateless. +// +static be_sock_status +host_request(be_funcs *be, be_request* req, be_answer* ans, int cid, int port); + +// +// +// Entry point for vsocket requests. +// Returns NULL for success, -1 for err, and sets errno if needed +// is allocated upstairs +// +const be_sock_status +Vmci_GetReply(int port, const char* json_request, const char* be_name, + be_answer* ans); + +void +Vmci_FreeBuf(be_answer *ans); diff --git a/drivers/storage/vmw/vmci/vmci_server.h b/drivers/storage/vmw/vmci/vmci_server.h new file mode 100644 index 00000000..25a87264 --- /dev/null +++ b/drivers/storage/vmw/vmci/vmci_server.h @@ -0,0 +1,268 @@ +// Copyright 2016 VMware, Inc. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// +// Simple C library to do VMCI / vSocket listen +// +// Based on vsocket usage example so quite clumsy. + +// TODO: return meaningful error codes. Issue #206 + +#pragma once + +#include +#include +#include +#include +#include + +#include "vmci_sockets.h" +#include "connection_types.h" + + +// SO_QSIZE maximum number of connections (requests) in socket queue. +int SO_QSIZE = 128; + +// Returns vSocket to listen on, or -1. +// errno indicates the reason for a failure, if any. +int +vmci_init(unsigned int port) +{ + struct sockaddr_vm addr; + socklen_t addrLen; + int ret; + int socket_fd; // socket to open + int saved_errno; // buffer for retaining errno + int af = vsock_get_family(); // socket family for vSockets communication + + if (af == -1) { + return CONN_FAILURE; + } + + /* + * Open a STREAM socket using our address family. + */ + + socket_fd = socket(af, SOCK_STREAM, 0); + if (socket_fd == -1) { + perror("Failed to open socket"); + return CONN_FAILURE; + } + + /* + * Bind to an address on which we will listen for client connections. We + * use VMADDR_CID_ANY, which is the vSockets equivalent of INADDR_ANY, and + * we listen on port 15000. + */ + + memset(&addr, 0, sizeof addr); + addr.svm_family = af; + addr.svm_cid = VMADDR_CID_ANY; + addr.svm_port = port; + ret = bind(socket_fd, (const struct sockaddr *) &addr, sizeof addr); + if (ret == -1) { + saved_errno = errno; + perror("Failed to bind socket"); + close(socket_fd); + errno = saved_errno; + return CONN_FAILURE; + } + + /* + * listen for client connections. + */ + ret = listen(socket_fd, SO_QSIZE); + if (ret == -1) { + perror("Failed to listen on socket"); + return CONN_FAILURE; + } + + return socket_fd; +} + +// Returns vSocket to communicate on (which needs to be closed later), +// or -1 on error +int +vmci_get_one_op(const int s, // socket to listen on + uint32_t *vmid, // cartel ID for VM + char *buf, // external buffer to return json string + const int bsize // buffer size + ) +{ + int ret; // keep return values here + uint32_t b; // smallish buffer + int saved_errno; // retain errno when needed + socklen_t addrLen; + struct sockaddr_vm addr; + int client_socket = -1; // connected socket to talk to client + int af = vsock_get_family(); // socket family for vSockets communication + + if (af == -1) { + return CONN_FAILURE; + } + + addrLen = sizeof addr; + client_socket = accept(s, (struct sockaddr *) &addr, &addrLen); + if (client_socket == -1) { + perror("Failed to accept connection"); + return CONN_FAILURE; + } + + if (addr.svm_port >= START_NON_PRIVILEGED_PORT) { + fprintf(stderr, "Connection from non root port=%d, cid=%d\n", addr.svm_port, addr.svm_cid); + close(client_socket); + errno=ECONNABORTED; + return CONN_FAILURE; + } + + // get VMID. We really get CartelID for VM, but it will make do + socklen_t len = sizeof(*vmid); + if (getsockopt(client_socket, af, SO_VMCI_PEER_HOST_VM_ID, vmid, &len) == -1 || len + != sizeof(*vmid)) { + perror("sockopt SO_VMCI_PEER_HOST_VM_ID failed, continuing..."); + // will still try to recv message to know what was there - so no return + } + + /* + * Try to receive a message from the client. + * the message has MAGIC, length, and the actual data. + * + */ + + // get magic: + b = 0; + ret = recv(client_socket, &b, sizeof b, 0); + if (ret == -1 || b != MAGIC) { + saved_errno = errno; + fprintf(stderr, + "Failed to receive magic: ret %d (%s) got 0x%x (expected 0x%x)\n", + ret, strerror(errno), b, MAGIC); + close(client_socket); + errno = saved_errno; + CHECK_ERRNO(ret); + return CONN_FAILURE; + } + + // get length: + b = 0; + ret = recv(client_socket, &b, sizeof b, 0); + if (ret == -1) { + saved_errno = errno; + fprintf(stderr, "Failed to receive len: ret %d (%s) got %d\n", ret, + strerror(errno), b); + close(client_socket); + errno = saved_errno; + return CONN_FAILURE; + } + + if (b > bsize) { + fprintf(stderr, "Query is too large: %d (max %d)\n", b, bsize); + close(client_socket); + errno = ERANGE; // result too large for the buffer + return CONN_FAILURE; + } + + memset(buf, 0, b); + ret = recv(client_socket, buf, b, 0); + if (ret != b) { + saved_errno = errno; + fprintf(stderr, "Failed to receive content: ret %d (%s) expected %d\n", + ret, strerror(errno), b); + close(client_socket); + errno = saved_errno; + CHECK_ERRNO(ret); + return CONN_FAILURE; + } + // do protocol sanity check + if (strlen(buf) + 1 != b) { + fprintf(stderr, "Protocol error: len mismatch, expected %d, got %d\n", + strlen(buf), b); + close(client_socket); + errno = EBADMSG; + return CONN_FAILURE; + } + + return client_socket; +} + +// Sends a single reply on a socket. +// Returns 0 on OK and -1 on error (errno is set in this case. +// For errors, "reply" contains extra error info (specific for vmci_reply) +int +vmci_reply(const int client_socket, // socket to use + const char *reply // (json) to send back + ) +{ + int ret; // keep return values here + int saved_errno; // retain errno when needed + uint32_t b; // smallish buffer + + // Just being paranoid... + if (reply == NULL) { + reply = "OK"; + } + + /* + * And send one word back. + */ + + b = MAGIC; + ret = send(client_socket, &b, sizeof(b), 0); + if (ret != sizeof(b)) { + saved_errno = errno; + reply = "Failed to send magic"; + fprintf(stderr, "%s: ret %d (%s) expected size %d\n", reply, ret, + strerror(errno), sizeof(b)); + goto failed; + } + + b = strlen(reply) + 1; // send the string and trailing \0 + ret = send(client_socket, &b, sizeof(b), 0); + if (ret != sizeof(b)) { + saved_errno = errno; + reply = "Failed to send len"; + fprintf(stderr, "%s: ret %d (%s) expected size %d\n", reply, ret, + strerror(errno), sizeof(b)); + goto failed; + } + + ret = send(client_socket, reply, b, 0); + if (b != ret) { + saved_errno = errno; + fprintf(stderr, "Failed to send content: ret %d (%s) expected size %d\n", + ret, strerror(errno), b); + goto failed; + } + + // success + close(client_socket); + return CONN_SUCCESS; + + // failure +failed: + close(client_socket); + errno = saved_errno; + CHECK_ERRNO(ret); + return CONN_FAILURE; +} + +// Closes a socket. +void +vmci_close(int s) +{ + + if (s != -1) { + close(s); + } +} diff --git a/drivers/storage/vmw/vmci/vmci_sockets.h b/drivers/storage/vmw/vmci/vmci_sockets.h new file mode 100644 index 00000000..be163fa9 --- /dev/null +++ b/drivers/storage/vmw/vmci/vmci_sockets.h @@ -0,0 +1,844 @@ +// Copyright 2016 VMware, Inc. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + + +/* ********************************************************** + * Copyright (c) 2007-2015 VMware, Inc. All rights reserved. + * **********************************************************/ + +/* + * vmci_sockets.h -- + * + * vSockets public constants and types. + */ +#pragma once + +#ifndef _VMCI_SOCKETS_H_ +#define _VMCI_SOCKETS_H_ + + +#if defined(_WIN32) +# if !defined(NT_INCLUDED) +# include +# endif // !NT_INCLUDED +#else // _WIN32 +#if defined(linux) && !defined(VMKERNEL) +# if !defined(__KERNEL__) +# include +# endif // __KERNEL__ +#else // linux && !VMKERNEL +# if defined(__APPLE__) +# include +# include +# endif // __APPLE__ +#endif // linux && !VMKERNEL +#endif + +/** + * \brief Option name for STREAM socket buffer size. + * + * Use as the option name in \c setsockopt(3) or \c getsockopt(3) to set + * or get an \c unsigned \c long \c long that specifies the size of the + * buffer underlying a vSockets STREAM socket. + * + * \note Value is clamped to the MIN and MAX. + * + * \see VMCISock_GetAFValueFd() + * \see SO_VMCI_BUFFER_MIN_SIZE + * \see SO_VMCI_BUFFER_MAX_SIZE + * + * An example is given below. + * + * \code + * int vmciFd; + * int af = VMCISock_GetAFValueFd(&vmciFd); + * unsigned long long val = 0x1000; + * int fd = socket(af, SOCK_STREAM, 0); + * setsockopt(fd, af, SO_VMCI_BUFFER_SIZE, &val, sizeof val); + * ... + * close(fd); + * VMCISock_ReleaseAFValueFd(vmciFd); + * \endcode + */ + +#define SO_VMCI_BUFFER_SIZE 0 + +/** + * \brief Option name for STREAM socket minimum buffer size. + * + * Use as the option name in \c setsockopt(3) or \c getsockopt(3) to set + * or get an \c unsigned \c long \c long that specifies the minimum size + * allowed for the buffer underlying a vSockets STREAM socket. + * + * \see VMCISock_GetAFValueFd() + * \see SO_VMCI_BUFFER_SIZE + * \see SO_VMCI_BUFFER_MAX_SIZE + * + * An example is given below. + * + * \code + * int vmciFd; + * int af = VMCISock_GetAFValueFd(&vmciFd); + * unsigned long long val = 0x500; + * int fd = socket(af, SOCK_STREAM, 0); + * setsockopt(fd, af, SO_VMCI_BUFFER_MIN_SIZE, &val, sizeof val); + * ... + * close(fd); + * VMCISock_ReleaseAFValueFd(vmciFd); + * \endcode + */ + +#define SO_VMCI_BUFFER_MIN_SIZE 1 + +/** + * \brief Option name for STREAM socket maximum buffer size. + * + * Use as the option name in \c setsockopt(3) or \c getsockopt(3) to set or + * get an unsigned long long that specifies the maximum size allowed for the + * buffer underlying a vSockets STREAM socket. + * + * \see VMCISock_GetAFValueFd() + * \see SO_VMCI_BUFFER_SIZE + * \see SO_VMCI_BUFFER_MIN_SIZE + * + * An example is given below. + * + * \code + * int vmciFd; + * int af = VMCISock_GetAFValueFd(&vmciFd); + * unsigned long long val = 0x4000; + * int fd = socket(af, SOCK_STREAM, 0); + * setsockopt(fd, af, SO_VMCI_BUFFER_MAX_SIZE, &val, sizeof val); + * ... + * close(fd); + * VMCISock_ReleaseAFValueFd(vmciFd); + * \endcode + */ + +#define SO_VMCI_BUFFER_MAX_SIZE 2 + +/** + * \brief Option name for socket peer's host-specific VM ID. + * + * Use as the option name in \c getsockopt(3) to get a host-specific identifier + * for the peer endpoint's VM. The identifier is a signed integer. + * + * \note Only available for ESX (VMKernel/userworld) endpoints. + * + * An example is given below. + * + * \code + * int vmciFd; + * int af = VMCISock_GetAFValueFd(&vmciFd); + * int id; + * socklen_t len = sizeof id; + * int fd = socket(af, SOCK_DGRAM, 0); + * getsockopt(fd, af, SO_VMCI_PEER_HOST_VM_ID, &id, &len); + * ... + * close(fd); + * VMCISock_ReleaseAFValueFd(vmciFd); + * \endcode + */ + +#define SO_VMCI_PEER_HOST_VM_ID 3 + +/** + * \brief Option name for socket's service label. + * + * Use as the option name in \c setsockopt(3) or \c getsockopt(3) to set or + * get the service label for a socket. The service label is a C-style + * NUL-terminated string. + * + * \note Only available for ESX (VMkernel/userworld) endpoints. + */ + +#define SO_VMCI_SERVICE_LABEL 4 + +/** + * \brief Option name for determining if a socket is trusted. + * + * Use as the option name in \c getsockopt(3) to determine if a socket is + * trusted. The value is a signed integer. + * + * An example is given below. + * + * \code + * int vmciFd; + * int af = VMCISock_GetAFValueFd(&vmciFd); + * int trusted; + * socklen_t len = sizeof trusted; + * int fd = socket(af, SOCK_DGRAM, 0); + * getsockopt(fd, af, SO_VMCI_TRUSTED, &trusted, &len); + * ... + * close(fd); + * VMCISock_ReleaseAFValueFd(vmciFd); + * \endcode + */ + +#define SO_VMCI_TRUSTED 5 + +/** + * \brief Option name for STREAM socket connection timeout. + * + * Use as the option name in \c setsockopt(3) or \c getsockopt(3) to set or + * get the connection timeout for a STREAM socket. The value is platform + * dependent. On ESX, Linux and Mac OS, it is a \c struct \c timeval. + * On Windows, it is a \c DWORD. + * + * An example is given below. + * + * \code + * int vmciFd; + * int af = VMCISock_GetAFValueFd(&vmciFd); + * struct timeval t = { 5, 100000 }; // 5.1 seconds + * int fd = socket(af, SOCK_STREAM, 0); + * setsockopt(fd, af, SO_VMCI_CONNECT_TIMEOUT, &t, sizeof t); + * ... + * close(fd); + * VMCISock_ReleaseAFValueFd(vmciFd); + * \endcode + */ + +#define SO_VMCI_CONNECT_TIMEOUT 6 + +/** + * \brief Option name for using non-blocking send/receive. + * + * Use as the option name for \c setsockopt(3) or \c getsockopt(3) to set or + * get the non-blocking transmit/receive flag for a STREAM socket. This flag + * determines whether \c send() and \c recv() can be called in non-blocking + * contexts for the given socket. The value is a signed integer. + * + * This option is only relevant to kernel endpoints, where descheduling + * the thread of execution is not allowed, for example, while holding a + * spinlock. It is not to be confused with conventional non-blocking socket + * operations. + * + * \note Only available for VMKernel endpoints. + * + * An example is given below. + * + * \code + * int vmciFd; + * int af = VMCISock_GetAFValueFd(&vmciFd); + * int nonblock; + * socklen_t len = sizeof nonblock; + * int fd = socket(af, SOCK_STREAM, 0); + * getsockopt(fd, af, SO_VMCI_NONBLOCK_TXRX, &nonblock, &len); + * ... + * close(fd); + * VMCISock_ReleaseAFValueFd(vmciFd); + * \endcode + */ + +#define SO_VMCI_NONBLOCK_TXRX 7 + +/** + * \brief The vSocket equivalent of INADDR_ANY. + * + * This works for the \c svm_cid field of sockaddr_vm and indicates the + * context ID of the current endpoint. + * + * \see sockaddr_vm + * + * An example is given below. + * + * \code + * int vmciFd; + * int af = VMCISock_GetAFValueFd(&vmciFd); + * struct sockaddr_vm addr; + * int fd = socket(af, SOCK_DGRAM, 0); + * addr.svm_family = af; + * addr.svm_cid = VMADDR_CID_ANY; + * addr.svm_port = 2000; + * bind(fd, &addr, sizeof addr); + * ... + * close(fd); + * VMCISock_ReleaseAFValueFd(vmciFd); + * \endcode + */ + +#define VMADDR_CID_ANY ((unsigned int)-1) + +/** + * \brief Bind to any available port. + * + * Works for the \c svm_port field of sockaddr_vm. + * + * \see sockaddr_vm + * + * An example is given below. + * + * \code + * int vmciFd; + * int af = VMCISock_GetAFValueFd(&vmciFd); + * struct sockaddr_vm addr; + * int fd = socket(af, SOCK_DGRAM, 0); + * addr.svm_family = af; + * addr.svm_cid = VMADDR_CID_ANY; + * addr.svm_port = VMADDR_PORT_ANY; + * bind(fd, &addr, sizeof addr); + * ... + * close(fd); + * VMCISock_ReleaseAFValueFd(vmciFd); + * \endcode + */ + +#define VMADDR_PORT_ANY ((unsigned int)-1) + +/** + * \brief Invalid vSockets version. + * + * \see VMCISock_Version() + */ + +#define VMCI_SOCKETS_INVALID_VERSION ((unsigned int)-1) + +/** + * \brief The epoch (first) component of the vSockets version. + * + * A single byte representing the epoch component of the vSockets version. + * + * \see VMCISock_Version() + * + * An example is given below. + * + * \code + * unsigned int ver = VMCISock_Version(); + * unsigned char epoch = VMCI_SOCKETS_VERSION_EPOCH(ver); + * \endcode + */ + +#define VMCI_SOCKETS_VERSION_EPOCH(_v) (((_v) & 0xFF000000) >> 24) + +/** + * \brief The major (second) component of the vSockets version. + * + * A single byte representing the major component of the vSockets version. + * Typically changes for every major release of a product. + * + * \see VMCISock_Version() + * + * An example is given below. + * + * \code + * unsigned int ver = VMCISock_Version(); + * unsigned char major = VMCI_SOCKETS_VERSION_MAJOR(ver); + * \endcode + */ + +#define VMCI_SOCKETS_VERSION_MAJOR(_v) (((_v) & 0x00FF0000) >> 16) + +/** + * \brief The minor (third) component of the vSockets version. + * + * Two bytes representing the minor component of the vSockets version. + * + * \see VMCISock_Version() + * + * An example is given below. + * + * \code + * unsigned int ver = VMCISock_Version(); + * unsigned short minor = VMCI_SOCKETS_VERSION_MINOR(ver); + * \endcode + */ + +#define VMCI_SOCKETS_VERSION_MINOR(_v) (((_v) & 0x0000FFFF)) + +/** \cond PRIVATE */ +#if defined(_WIN32) || defined(VMKERNEL) + typedef unsigned short sa_family_t; +#endif // _WIN32 + +#if defined(VMKERNEL) + struct sockaddr { + sa_family_t sa_family; + char sa_data[14]; + }; +#endif +/** \endcond */ + +/** + * \brief Address structure for vSockets. + * + * The address family should be set to whatever VMCISock_GetAFValueFd() + * returns. The structure members should all align on their natural + * boundaries without resorting to compiler packing directives. The total + * size of this structure should be exactly the same as that of \c struct + * \c sockaddr. + * + * \see VMCISock_GetAFValueFd() + */ + +struct sockaddr_vm { +#if defined(__APPLE__) + unsigned char svm_len; +#endif // __APPLE__ + + /** \brief Address family. \see VMCISock_GetAFValueFd() */ + sa_family_t svm_family; + + /** \cond PRIVATE */ + unsigned short svm_reserved1; + /** \endcond */ + + /** \brief Port. \see VMADDR_PORT_ANY */ + unsigned int svm_port; + + /** \brief Context ID. \see VMADDR_CID_ANY */ + unsigned int svm_cid; + + /** \cond PRIVATE */ + unsigned char svm_zero[sizeof(struct sockaddr) - +#if defined(__APPLE__) + sizeof(unsigned char) - +#endif // __APPLE__ + sizeof(sa_family_t) - + sizeof(unsigned short) - + sizeof(unsigned int) - + sizeof(unsigned int)]; + /** \endcond */ +}; + +/** \cond PRIVATE */ +struct uuid_2_cid { + unsigned int u2c_context_id; + unsigned int u2c_pad; + char u2c_uuid_string[128]; +}; +/** \endcond */ + +#if defined(_WIN32) +# if !defined(NT_INCLUDED) +# include +# define VMCI_SOCKETS_DEVICE L"\\\\.\\VMCI" +# define VMCI_SOCKETS_VERSION 0x81032058 +# define VMCI_SOCKETS_GET_AF_VALUE 0x81032068 +# define VMCI_SOCKETS_GET_LOCAL_CID 0x8103206c +# define VMCI_SOCKETS_UUID_2_CID 0x810320a4 + + static __inline unsigned int __VMCISock_DeviceIoControl(DWORD cmd) + { + unsigned int val = (unsigned int)-1; + HANDLE device = CreateFileW(VMCI_SOCKETS_DEVICE, GENERIC_READ, 0, NULL, + OPEN_EXISTING, FILE_FLAG_OVERLAPPED, NULL); + if (INVALID_HANDLE_VALUE != device) { + DWORD ioReturn; + DeviceIoControl(device, cmd, &val, sizeof val, &val, sizeof val, + &ioReturn, NULL); + CloseHandle(device); + device = INVALID_HANDLE_VALUE; + } + return val; + } + + static __inline unsigned int VMCISock_Version(void) + { + return __VMCISock_DeviceIoControl(VMCI_SOCKETS_VERSION); + } + + static __inline int VMCISock_GetAFValue(void) + { + return (int)__VMCISock_DeviceIoControl(VMCI_SOCKETS_GET_AF_VALUE); + } + + static __inline int VMCISock_GetAFValueFd(int *outFd) + { + (void)outFd; /* Unused parameter. */ + return VMCISock_GetAFValue(); + } + + static __inline void VMCISock_ReleaseAFValueFd(int fd) + { + (void)fd; /* Unused parameter. */ + } + + static __inline unsigned int VMCISock_GetLocalCID(void) + { + return __VMCISock_DeviceIoControl(VMCI_SOCKETS_GET_LOCAL_CID); + } + + static __inline unsigned int VMCISock_Uuid2ContextId(const char *uuidString) + { + struct uuid_2_cid io; + HANDLE device = CreateFileW(VMCI_SOCKETS_DEVICE, GENERIC_READ, 0, NULL, + OPEN_EXISTING, FILE_FLAG_OVERLAPPED, NULL); + io.u2c_context_id = VMADDR_CID_ANY; + if (INVALID_HANDLE_VALUE != device) { + DWORD ioReturn; + strncpy_s(io.u2c_uuid_string, sizeof io.u2c_uuid_string, + uuidString, _TRUNCATE); + DeviceIoControl(device, VMCI_SOCKETS_UUID_2_CID, &io, sizeof io, + &io, sizeof io, &ioReturn, NULL); + CloseHandle(device); + device = INVALID_HANDLE_VALUE; + } + return io.u2c_context_id; + } +# endif // !NT_INCLUDED +#else // _WIN32 +#if (defined(linux) && !defined(VMKERNEL)) || (defined(__APPLE__)) +# if defined(linux) && defined(__KERNEL__) + void VMCISock_KernelRegister(void); + void VMCISock_KernelDeregister(void); + int VMCISock_GetAFValue(void); + int VMCISock_GetLocalCID(void); +# elif defined(__APPLE__) && (KERNEL) + /* Nothing to define here. */ +# else // __KERNEL__ +# include +# include +# include +# include +# include +# include +# include + +/** \cond PRIVATE */ +# define VMCI_SOCKETS_DEFAULT_DEVICE "/dev/vsock" +# define VMCI_SOCKETS_CLASSIC_ESX_DEVICE "/vmfs/devices/char/vsock/vsock" +# if defined(linux) +# define VMCI_SOCKETS_VERSION 1972 +# define VMCI_SOCKETS_GET_AF_VALUE 1976 +# define VMCI_SOCKETS_GET_LOCAL_CID 1977 +# define VMCI_SOCKETS_UUID_2_CID 1991 +# elif defined(__APPLE__) +# include +# define VMCI_SOCKETS_VERSION _IOR( 'V', 21, unsigned) +# define VMCI_SOCKETS_GET_AF_VALUE _IOR( 'V', 25, int) +# define VMCI_SOCKETS_GET_LOCAL_CID _IOR( 'V', 26, unsigned) +# define VMCI_SOCKETS_UUID_2_CID _IOWR('V', 40, struct uuid_2_cid) +#endif +/** \endcond */ + + /* + *********************************************************************** + * VMCISock_Version */ /** + * + * \brief Retrieve the vSockets version. + * + * Returns the current version of vSockets. The version is a 32-bit + * unsigned integer that consist of three components: the epoch, the + * major version, and the minor version. Use the \c VMCI_SOCKETS_VERSION + * macros to extract the components. + * + * \see VMCI_SOCKETS_VERSION_EPOCH() + * \see VMCI_SOCKETS_VERSION_MAJOR() + * \see VMCI_SOCKETS_VERSION_MINOR() + * + * \retval VMCI_SOCKETS_INVALID_VERSION Not available. + * \retval other The current version. + * + * An example is given below. + * + * \code + * unsigned int ver = VMCISock_Version(); + * if (ver != VMCI_SOCKETS_INVALID_VERSION) { + * printf("vSockets version=%d.%d.%d\n", + * VMCI_SOCKETS_VERSION_EPOCH(ver), + * VMCI_SOCKETS_VERSION_MAJOR(ver), + * VMCI_SOCKETS_VERSION_MINOR(ver)); + * } + * \endcode + * + *********************************************************************** + */ + + static inline unsigned int VMCISock_Version(void) + { + int fd; + unsigned int version; + + fd = open(VMCI_SOCKETS_DEFAULT_DEVICE, O_RDONLY); + if (fd < 0) { + fd = open(VMCI_SOCKETS_CLASSIC_ESX_DEVICE, O_RDONLY); + if (fd < 0) { + return VMCI_SOCKETS_INVALID_VERSION; + } + } + + if (ioctl(fd, VMCI_SOCKETS_VERSION, &version) < 0) { + version = VMCI_SOCKETS_INVALID_VERSION; + } + + close(fd); + return version; + } + + /* + *********************************************************************** + * VMCISock_GetAFValueFd */ /** + * + * \brief Retrieve the address family value for vSockets. + * + * Returns the value to be used for the vSockets address family. + * This value should be used as the domain argument to \c socket(2) (when + * you might otherwise use \c AF_INET). For vSocket-specific options, + * this value should also be used for the level argument to + * \c setsockopt(2) (when you might otherwise use \c SOL_TCP). + * + * \see VMCISock_ReleaseAFValueFd() + * \see sockaddr_vm + * + * \param[out] outFd File descriptor to the VMCI device. The + * address family value is valid until this + * descriptor is closed. This parameter is + * only valid if the return value is not -1. + * Call VMCISock_ReleaseAFValueFd() to close + * this descriptor. + * + * \retval -1 Not available. + * \retval other The address family value. + * + * An example is given below. + * + * \code + * int vmciFd; + * int af = VMCISock_GetAFValueFd(&vmciFd); + * if (af != -1) { + * int fd = socket(af, SOCK_STREAM, 0); + * ... + * close(fd); + * close(vmciFd); + * } + * \endcode + * + *********************************************************************** + */ + + static inline int VMCISock_GetAFValueFd(int *outFd) + { + int fd; + int family = -1; + +#if defined(linux) + /* + * vSockets is now in mainline kernel with address family 40. As part + * of upstreaming, we removed the IOCTL we use below to determine the + * address family. So to handle both a new and old kernel we do this: + * 1. Check if our family already exists by making a socket with it. + * Some weird kernel might claim this too, but it's very unlikely + * (Linus' tree has us at 40, and that's what we care about). + * 2. If that fails, try the normal IOCTL path, since it's probably an + * older kernel with vSockets from Tools. + * 3. If that fails, then vSockets really isn't available. + */ +#define AF_VSOCK_LOCAL 40 + { + int s = socket(AF_VSOCK_LOCAL, SOCK_DGRAM, 0); + if (s != -1) { + close(s); + return AF_VSOCK_LOCAL; + } + } +#undef AF_VSOCK_LOCAL +#endif // linux + + fd = open(VMCI_SOCKETS_DEFAULT_DEVICE, O_RDONLY); + if (fd < 0) { + fd = open(VMCI_SOCKETS_CLASSIC_ESX_DEVICE, O_RDONLY); + if (fd < 0) { + return -1; + } + } + + if (ioctl(fd, VMCI_SOCKETS_GET_AF_VALUE, &family) < 0) { + family = -1; + } + + if (family < 0) { + close(fd); + } else if (outFd) { + *outFd = fd; + } + + return family; + } + + /** \cond PRIVATE */ + /* + *********************************************************************** + * VMCISock_GetAFValue */ /** + * + * \brief Retrieve the address family value for vSockets. + * + * Returns the value to be used for the vSockets address family. + * This value should be used as the domain argument to \c socket(2) (when + * you might otherwise use \c AF_INET). For vSocket-specific options, + * this value should also be used for the level argument to + * \c setsockopt(2) (when you might otherwise use \c SOL_TCP). + * + * \note This function leaves its descriptor to the vsock device open so + * that the socket implementation knows that the socket family is still in + * use. This is done because the address family is registered with the + * kernel on-demand and a notification is needed to unregister the address + * family. Use of this function is thus discouraged; please use + * VMCISock_GetAFValueFd() instead. + * + * \see VMCISock_GetAFValueFd() + * \see sockaddr_vm + * + * \retval -1 Not available. + * \retval other The address family value. + * + * An example is given below. + * + * \code + * int af = VMCISock_GetAFValue(); + * if (af != -1) { + * int fd = socket(af, SOCK_STREAM, 0); + * ... + * close(fd); + * } + * \endcode + * + *********************************************************************** + */ + + static inline int VMCISock_GetAFValue(void) + { + return VMCISock_GetAFValueFd(NULL); + } + /** \endcond PRIVATE */ + + /* + *********************************************************************** + * VMCISock_ReleaseAFValueFd */ /** + * + * \brief Release the file descriptor obtained when retrieving the + * address family value. + * + * Use this to release the file descriptor obtained by calling + * VMCISock_GetAFValueFd(). + * + * \see VMCISock_GetAFValueFd() + * + * \param[in] fd File descriptor to the VMCI device. + * + *********************************************************************** + */ + + static inline void VMCISock_ReleaseAFValueFd(int fd) + { + if (fd >= 0) { + close(fd); + } + } + + /* + *********************************************************************** + * VMCISock_GetLocalCID */ /** + * + * \brief Retrieve the current context ID. + * + * \see VMADDR_CID_ANY + * + * \retval VMADDR_CID_ANY Not available. + * \retval other The current context ID. + * + * An example is given below. + * + * \code + * int vmciFd; + * int af = VMCISock_GetAFValueFd(&vmciFd); + * struct sockaddr_vm addr; + * addr.svm_family = af; + * addr.svm_cid = VMCISock_GetLocalCID(); + * VMCISock_ReleaseAFValueFd(vmciFd); + * \endcode + * + *********************************************************************** + */ + + static inline unsigned int VMCISock_GetLocalCID(void) + { + int fd; + unsigned int contextId; + + fd = open(VMCI_SOCKETS_DEFAULT_DEVICE, O_RDONLY); + if (fd < 0) { + fd = open(VMCI_SOCKETS_CLASSIC_ESX_DEVICE, O_RDONLY); + if (fd < 0) { + return VMADDR_CID_ANY; + } + } + + if (ioctl(fd, VMCI_SOCKETS_GET_LOCAL_CID, &contextId) < 0) { + contextId = VMADDR_CID_ANY; + } + + close(fd); + return contextId; + } + + /* + *********************************************************************** + * VMCISock_Uuid2ContextId */ /** + * + * \brief Retrieve the context ID of a running VM, given a VM's UUID. + * + * Retrieves the context ID of a running virtual machine given that virtual + * machines's unique identifier. The identifier is local to the host and + * its meaning is platform-specific. On ESX, which is currently the only + * supported platform, it is the "bios.uuid" field as specified in the VM's + * VMX file. + * + * \see VMADDR_CID_ANY + * + * \retval VMADDR_CID_ANY Not available. + * \retval other The VM's context ID. + * + * \note Only available for ESX (userworld) endpoints. + * + * An example is given below. + * + * \code + * int vmciFd; + * int af = VMCISock_GetAFValueFd(&vmciFd); + * unsigned int cid = VMCISock_Uuid2ContextId( + * "56 4d 07 d8 cc d5 c4 0d-98 44 dc 1e 8f e0 da f3"); + * VMCISock_ReleaseAFValueFd(vmciFd); + * \endcode + * + *********************************************************************** + */ + + static inline unsigned int VMCISock_Uuid2ContextId(const char *uuidString) + { + int fd; + struct uuid_2_cid io; + + fd = open(VMCI_SOCKETS_DEFAULT_DEVICE, O_RDONLY); + if (fd < 0) { + fd = open(VMCI_SOCKETS_CLASSIC_ESX_DEVICE, O_RDONLY); + if (fd < 0) { + return VMADDR_CID_ANY; + } + } + + strncpy(io.u2c_uuid_string, uuidString, sizeof io.u2c_uuid_string); + if (ioctl(fd, VMCI_SOCKETS_UUID_2_CID, &io) < 0) { + io.u2c_context_id = VMADDR_CID_ANY; + } + + close(fd); + return io.u2c_context_id; + } +# endif // __KERNEL__ +#endif // linux && !VMKERNEL +#endif // _WIN32 + + +#endif // _VMCI_SOCKETS_H_ diff --git a/drivers/storage/vmw/vmci/vmdkops.go b/drivers/storage/vmw/vmci/vmdkops.go new file mode 100644 index 00000000..4095f6eb --- /dev/null +++ b/drivers/storage/vmw/vmci/vmdkops.go @@ -0,0 +1,113 @@ +// Copyright 2016 VMware, Inc. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// +build linux darwin +// +build !libstorage_storage_driver libstorage_storage_driver_vmdk libstorage_storage_driver_photon + +package vmci + +import ( + "encoding/json" + + log "github.com/Sirupsen/logrus" +) + +// +// * VMDK CADD (Create/Attach/Detach/Delete) operations client code. +// * +// **** PREREQUISITES: +// Build: open-vm-tools has to be installed - provided "vmci/vmci_sockets.h" +// Run: open-vm-tools has to be installed +// + +// VmdkCmdRunner interface for sending Vmdk Commands to an ESX server. +type VmdkCmdRunner interface { + Run(cmd string, name string, opts map[string]string) ([]byte, error) +} + +// VmdkOps struct +type VmdkOps struct { + Cmd VmdkCmdRunner // see *_vmdkcmd.go for implementations. +} + +// VolumeData we return to the caller +type VolumeData struct { + Name string + Attributes map[string]string +} + +// Create a volume +func (v VmdkOps) Create(name string, opts map[string]string) error { + log.Debugf("vmdkOp.Create name=%s", name) + _, err := v.Cmd.Run("create", name, opts) + return err +} + +// Remove a volume +func (v VmdkOps) Remove(name string, opts map[string]string) error { + log.Debugf("vmdkOps.Remove name=%s", name) + _, err := v.Cmd.Run("remove", name, opts) + return err +} + +// Attach a volume +func (v VmdkOps) Attach(name string, opts map[string]string) ([]byte, error) { + log.Debugf("vmdkOps.Attach name=%s", name) + str, err := v.Cmd.Run("attach", name, opts) + if err != nil { + return nil, err + } + return str, err +} + +// Detach a volume +func (v VmdkOps) Detach(name string, opts map[string]string) error { + log.Debugf("vmdkOps.Detach name=%s", name) + _, err := v.Cmd.Run("detach", name, opts) + return err +} + +// List all volumes +func (v VmdkOps) List() ([]VolumeData, error) { + log.Debugf("vmdkOps.List") + str, err := v.Cmd.Run("list", "", make(map[string]string)) + if err != nil { + return nil, err + } + + var result []VolumeData + err = json.Unmarshal(str, &result) + if err != nil { + return nil, err + } + return result, nil +} + +// Get for volume +func (v VmdkOps) Get(name string) (map[string]interface{}, error) { + log.Debugf("vmdkOps.Get name=%s", name) + str, err := v.Cmd.Run("get", name, make(map[string]string)) + if err != nil { + return nil, err + } + + var statusMap map[string]interface{} + statusMap = make(map[string]interface{}) + + err = json.Unmarshal(str, &statusMap) + if err != nil { + log.Warn("vmdkOps.Get failed decoding volume status for name=%s", name) + } + return statusMap, nil +} diff --git a/drivers/storage/vmw/vmdk/executor/vmdk_executor.go b/drivers/storage/vmw/vmdk/executor/vmdk_executor.go new file mode 100644 index 00000000..a7814a46 --- /dev/null +++ b/drivers/storage/vmw/vmdk/executor/vmdk_executor.go @@ -0,0 +1,66 @@ +// +build linux darwin +// +build !libstorage_storage_executor libstorage_storage_executor_vmdk + +package executor + +import ( + gofig "github.com/akutz/gofig/types" + + "github.com/codedellemc/libstorage/api/registry" + "github.com/codedellemc/libstorage/api/types" + "github.com/codedellemc/libstorage/drivers/storage/vmw/vmdk" +) + +type driver struct { + config gofig.Config +} + +func init() { + registry.RegisterStorageExecutor(vmdk.Name, newDriver) +} + +func newDriver() types.StorageExecutor { + return &driver{} +} + +func (d *driver) Name() string { + return vmdk.Name +} + +func (d *driver) Supported( + ctx types.Context, + opts types.Store) (bool, error) { + + return true, nil +} + +func (d *driver) Init(ctx types.Context, config gofig.Config) error { + d.config = config + return nil +} + +// InstanceID returns the local system's InstanceID. +func (d *driver) InstanceID( + ctx types.Context, + opts types.Store) (*types.InstanceID, error) { + + iid := &types.InstanceID{Driver: vmdk.Name} + iid.ID = vmdk.Name + return iid, nil +} + +// NextDevice returns the next available device. +func (d *driver) NextDevice( + ctx types.Context, + opts types.Store) (string, error) { + + return "", nil +} + +// LocalDevices returns a map of the system's local devices. +func (d *driver) LocalDevices( + ctx types.Context, + opts *types.LocalDevicesOpts) (*types.LocalDevices, error) { + + return &types.LocalDevices{DeviceMap: map[string]string{}}, nil +} diff --git a/drivers/storage/vmw/vmdk/storage/vmdk_storage.go b/drivers/storage/vmw/vmdk/storage/vmdk_storage.go new file mode 100644 index 00000000..6bf3eb70 --- /dev/null +++ b/drivers/storage/vmw/vmdk/storage/vmdk_storage.go @@ -0,0 +1,190 @@ +// +build !libstorage_storage_driver libstorage_storage_driver_vmdk + +package storage + +import ( + "fmt" + "sync" + + gofig "github.com/akutz/gofig/types" + + "github.com/codedellemc/libstorage/api/registry" + "github.com/codedellemc/libstorage/api/types" + + "github.com/codedellemc/libstorage/drivers/storage/vmw/vmci" + "github.com/codedellemc/libstorage/drivers/storage/vmw/vmdk" + //"github.com/vmware/docker-volume-vsphere/vmdk_plugin/drivers/vmdk/vmdkops" +) + +const ( + minSizeGiB = 1 +) + +type driver struct { + ctx types.Context + config gofig.Config + ops vmci.VmdkOps +} + +func init() { + registry.RegisterStorageDriver(vmdk.Name, newDriver) +} + +func newDriver() types.StorageDriver { + return &driver{} +} + +func (d *driver) Name() string { + return vmdk.Name +} + +func (d *driver) Type(ctx types.Context) (types.StorageType, error) { + return types.Block, nil +} + +func (d *driver) Init(ctx types.Context, config gofig.Config) error { + d.ctx = ctx + d.config = config + d.ops = vmci.VmdkOps{Cmd: vmci.EsxVmdkCmd{Mtx: &sync.Mutex{}}} + return nil +} + +func (d *driver) NextDeviceInfo( + ctx types.Context) (*types.NextDeviceInfo, error) { + return &types.NextDeviceInfo{ + Ignore: true, + }, nil +} + +func (d *driver) InstanceInspect( + ctx types.Context, + opts types.Store) (*types.Instance, error) { + + return nil, nil +} + +func (d *driver) Volumes( + ctx types.Context, + opts *types.VolumesOpts) ([]*types.Volume, error) { + + volData, err := d.ops.List() + if err != nil { + return nil, err + } + + vols := []*types.Volume{} + for _, v := range volData { + vols = append(vols, &types.Volume{ + Name: v.Name, + Fields: v.Attributes, + }) + } + + return vols, nil +} + +func (d *driver) VolumeInspect( + ctx types.Context, + volumeID string, + opts *types.VolumeInspectOpts) (*types.Volume, error) { + + volData, err := d.ops.Get(volumeID) + if err != nil { + return nil, err + } + + fields := map[string]string{} + for k, v := range volData { + fields[k] = fmt.Sprintf("%v", v) + } + + name, _ := fields["name"] + return &types.Volume{Name: name, Fields: fields}, nil +} + +func (d *driver) VolumeCreate( + ctx types.Context, + name string, + opts *types.VolumeCreateOpts) (*types.Volume, error) { + + return nil, nil +} + +func (d *driver) VolumeCreateFromSnapshot( + ctx types.Context, + snapshotID, volumeName string, + opts *types.VolumeCreateOpts) (*types.Volume, error) { + + return nil, nil +} + +func (d *driver) VolumeCopy( + ctx types.Context, + volumeID, volumeName string, + opts types.Store) (*types.Volume, error) { + + return nil, nil +} + +func (d *driver) VolumeSnapshot( + ctx types.Context, + volumeID, snapshotName string, + opts types.Store) (*types.Snapshot, error) { + + return nil, nil +} + +func (d *driver) VolumeRemove( + ctx types.Context, + volumeID string, + opts *types.VolumeRemoveOpts) error { + + return nil +} + +func (d *driver) VolumeAttach( + ctx types.Context, + volumeID string, + opts *types.VolumeAttachOpts) (*types.Volume, string, error) { + + return nil, "", nil +} + +func (d *driver) VolumeDetach( + ctx types.Context, + volumeID string, + opts *types.VolumeDetachOpts) (*types.Volume, error) { + + return nil, nil +} + +func (d *driver) Snapshots( + ctx types.Context, + opts types.Store) ([]*types.Snapshot, error) { + + return nil, nil +} + +func (d *driver) SnapshotInspect( + ctx types.Context, + snapshotID string, + opts types.Store) (*types.Snapshot, error) { + + return nil, nil +} + +func (d *driver) SnapshotCopy( + ctx types.Context, + snapshotID, snapshotName, destinationID string, + opts types.Store) (*types.Snapshot, error) { + + return nil, nil +} + +func (d *driver) SnapshotRemove( + ctx types.Context, + snapshotID string, + opts types.Store) error { + + return nil +} diff --git a/drivers/storage/vmw/vmdk/tests/coverage.mk b/drivers/storage/vmw/vmdk/tests/coverage.mk new file mode 100644 index 00000000..4142d7fe --- /dev/null +++ b/drivers/storage/vmw/vmdk/tests/coverage.mk @@ -0,0 +1,2 @@ +VMDK_COVERPKG := $(ROOT_IMPORT_PATH)/drivers/storage/vmw/vmdk +TEST_COVERPKG_./drivers/storage/vmw/vmdk/tests := $(VMDK_COVERPKG),$(VMDK_COVERPKG)/executor diff --git a/drivers/storage/vmw/vmdk/tests/vmdk_test.go b/drivers/storage/vmw/vmdk/tests/vmdk_test.go new file mode 100644 index 00000000..1dc6ec2f --- /dev/null +++ b/drivers/storage/vmw/vmdk/tests/vmdk_test.go @@ -0,0 +1,74 @@ +// +build linux darwin +// +build !libstorage_storage_driver libstorage_storage_driver_vmdk + +package tests + +import ( + "io/ioutil" + "os" + "sync" + "testing" + + log "github.com/Sirupsen/logrus" + + "github.com/codedellemc/libstorage/api/context" + "github.com/codedellemc/libstorage/api/registry" + "github.com/codedellemc/libstorage/api/server" + "github.com/codedellemc/libstorage/api/types" + "github.com/codedellemc/libstorage/api/utils" + + "github.com/codedellemc/libstorage/drivers/storage/vmw/vmci" + //vmci "github.com/vmware/docker-volume-vsphere/vmdk_plugin/drivers/vmdk/vmdkops" +) + +var tCtx types.Context + +func TestMain(m *testing.M) { + log.SetLevel(log.DebugLevel) + + tmpDir, err := ioutil.TempDir("", "") + if err != nil { + panic(err) + } + defer os.RemoveAll(tmpDir) + + tCtx = context.Background() + pathConfig := utils.NewPathConfig(tCtx, tmpDir, "") + tCtx = context.WithValue(tCtx, context.PathConfigKey, pathConfig) + registry.ProcessRegisteredConfigs(tCtx) + + server.CloseOnAbort() + os.Exit(m.Run()) +} + +func TestVolumeInspect(t *testing.T) { + ops := vmci.VmdkOps{Cmd: vmci.EsxVmdkCmd{Mtx: &sync.Mutex{}}} + data, err := ops.Get("vmdkops") + if err != nil { + t.Fatal(err) + } + for k, v := range data { + t.Logf("%s=%v", k, v) + } +} + +func TestVolumeCreate(t *testing.T) { + ops := vmci.VmdkOps{Cmd: vmci.EsxVmdkCmd{Mtx: &sync.Mutex{}}} + if err := ops.Create("vmdkops", map[string]string{}); err != nil { + t.Fatal(err) + } +} + +func TestVolumeList(t *testing.T) { + ops := vmci.VmdkOps{Cmd: vmci.EsxVmdkCmd{Mtx: &sync.Mutex{}}} + data, err := ops.List() + if err != nil { + t.Fatal(err) + } + for _, volData := range data { + t.Logf("volume=%s", volData.Name) + for k, v := range volData.Attributes { + t.Logf("volume=%s %s=%s", volData.Name, k, v) + } + } +} diff --git a/drivers/storage/vmw/vmdk/vmdk.go b/drivers/storage/vmw/vmdk/vmdk.go new file mode 100644 index 00000000..28889d89 --- /dev/null +++ b/drivers/storage/vmw/vmdk/vmdk.go @@ -0,0 +1,9 @@ +// +build linux darwin +// +build !libstorage_storage_driver libstorage_storage_driver_vmdk + +package vmdk + +const ( + // Name is the name of the driver. + Name = "vmdk" +) diff --git a/drivers/storage/vmw/vmw.go b/drivers/storage/vmw/vmw.go new file mode 100644 index 00000000..0d8597af --- /dev/null +++ b/drivers/storage/vmw/vmw.go @@ -0,0 +1,4 @@ +// +build linux +// +!libstorage_storage_driver libstorage_storage_driver_vmdk libstorage_storage_driver_photon + +package vmw diff --git a/glide.lock b/glide.lock index 1a3a3c72..52f29734 100644 --- a/glide.lock +++ b/glide.lock @@ -1,5 +1,5 @@ -hash: 6b3161a1086d56cfb657e5b47370ed70cce0aa592d1b095cb885c3e4763f1eb5 -updated: 2017-04-20T15:53:52.320351451-05:00 +hash: 00c0f612bf400a84e6a27e3753be74b9a0923bedbf4fddca1231ba3ac2781b61 +updated: 2017-04-23T17:39:55.834788477-05:00 imports: - name: cloud.google.com/go version: e4de3dc4493f142c5833f3185e1182025a61f805 @@ -241,6 +241,8 @@ imports: - assert - name: github.com/tent/http-link-go version: ac974c61c2f990f4115b119354b5e0b47550e888 +- name: github.com/vmware/docker-volume-vsphere + version: 15d313ab6b337a59fe45f11f94e605197ce2bb9b - name: golang.org/x/crypto version: 453249f01cfeb54c3d549ddb75ff152ca243f9d8 repo: https://github.com/golang/crypto.git diff --git a/glide.yaml b/glide.yaml index e97f8c4e..6e14af0e 100644 --- a/glide.yaml +++ b/glide.yaml @@ -92,6 +92,10 @@ import: ref: 96a0db67ea8209453cfa694bdf03de202d6dd8f8 repo: https://github.com/codenrhoden/go-vhd +### VMware + - package: github.com/vmware/docker-volume-vsphere + version: 0.13 + ################################################################################ ## Build System Tools ## diff --git a/imports/executors/imports_executor_linux.go b/imports/executors/imports_executor_linux.go new file mode 100644 index 00000000..732253dc --- /dev/null +++ b/imports/executors/imports_executor_linux.go @@ -0,0 +1,9 @@ +// +build linux +// +build !libstorage_storage_driver + +package remote + +import ( + // import to load + _ "github.com/codedellemc/libstorage/drivers/storage/vmdk/storage" +) diff --git a/imports/executors/imports_executor_vmdk.go b/imports/executors/imports_executor_vmdk.go new file mode 100644 index 00000000..477e4224 --- /dev/null +++ b/imports/executors/imports_executor_vmdk.go @@ -0,0 +1,9 @@ +// +build linux +// +build libstorage_storage_executor,libstorage_storage_executor_vmdk + +package executors + +import ( + // load the packages + _ "github.com/codedellemc/libstorage/drivers/storage/vmw/vmdk/executor" +) diff --git a/imports/remote/imports_remote.go b/imports/remote/imports_remote.go index 24b547a2..9ac2648d 100644 --- a/imports/remote/imports_remote.go +++ b/imports/remote/imports_remote.go @@ -3,7 +3,7 @@ package remote import ( - // import to load + // load the storage drivers _ "github.com/codedellemc/libstorage/drivers/storage/azureud/storage" _ "github.com/codedellemc/libstorage/drivers/storage/dobs/storage" _ "github.com/codedellemc/libstorage/drivers/storage/ebs/storage" diff --git a/imports/remote/imports_remote_linux.go b/imports/remote/imports_remote_linux.go new file mode 100644 index 00000000..732253dc --- /dev/null +++ b/imports/remote/imports_remote_linux.go @@ -0,0 +1,9 @@ +// +build linux +// +build !libstorage_storage_driver + +package remote + +import ( + // import to load + _ "github.com/codedellemc/libstorage/drivers/storage/vmdk/storage" +) diff --git a/imports/remote/imports_remote_vmdk.go b/imports/remote/imports_remote_vmdk.go new file mode 100644 index 00000000..50c07c2b --- /dev/null +++ b/imports/remote/imports_remote_vmdk.go @@ -0,0 +1,9 @@ +// +build linux +// +build libstorage_storage_driver,libstorage_storage_driver_vmdk + +package remote + +import ( + // load the packages + _ "github.com/codedellemc/libstorage/drivers/storage/vmw/vmdk/storage" +)