Skip to content

Commit

Permalink
improve performance by using DMA buffers (#29)
Browse files Browse the repository at this point in the history
this speeds up text drawing and software encoding.
  • Loading branch information
aler9 authored Oct 20, 2024
1 parent 587bd57 commit abb5d58
Show file tree
Hide file tree
Showing 10 changed files with 100 additions and 65 deletions.
89 changes: 58 additions & 31 deletions camera.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@
#include <fcntl.h>
#include <unistd.h>

#include <linux/videodev2.h>
#include <linux/dma-buf.h>
#include <linux/dma-heap.h>
#include <libcamera/camera_manager.h>
#include <libcamera/camera.h>
#include <libcamera/formats.h>
Expand All @@ -16,7 +19,6 @@
#include <libcamera/framebuffer_allocator.h>
#include <libcamera/property_ids.h>
#include <libcamera/transform.h>
#include <linux/videodev2.h>

#include "camera.h"

Expand All @@ -31,17 +33,35 @@ using libcamera::Orientation;
using libcamera::PixelFormat;
using libcamera::Rectangle;
using libcamera::Request;
using libcamera::SharedFD;
using libcamera::Size;
using libcamera::Span;
using libcamera::Stream;
using libcamera::StreamRole;
using libcamera::StreamConfiguration;
using libcamera::Transform;
using libcamera::UniqueFD;

namespace controls = libcamera::controls;
namespace formats = libcamera::formats;
namespace properties = libcamera::properties;

static const char *heap_positions[] = {
"/dev/dma_heap/vidbuf_cached",
"/dev/dma_heap/linux,cma",
};

// https://github.com/raspberrypi/rpicam-apps/blob/6de1ab6a899df35f929b2a15c0831780bd8e750e/core/dma_heaps.cpp
static int create_dma_allocator() {
for (unsigned int i = 0; i < sizeof(heap_positions); i++) {
int fd = open(heap_positions[i], O_RDWR | O_CLOEXEC, 0);
if (fd >= 0) {
return fd;
}
}
return -1;
}

static char errbuf[256];

static void set_error(const char *format, ...) {
Expand Down Expand Up @@ -80,10 +100,10 @@ struct CameraPriv {
std::unique_ptr<CameraManager> camera_manager;
std::shared_ptr<Camera> camera;
Stream *video_stream;
std::unique_ptr<FrameBufferAllocator> allocator;
std::vector<std::unique_ptr<Request>> requests;
std::mutex ctrls_mutex;
std::unique_ptr<ControlList> ctrls;
std::vector<std::unique_ptr<FrameBuffer>> frame_buffers;
std::map<FrameBuffer *, uint8_t *> mapped_buffers;
bool ts_initialized;
uint64_t ts_start;
Expand All @@ -96,22 +116,6 @@ static int get_v4l2_colorspace(std::optional<ColorSpace> const &cs) {
return V4L2_COLORSPACE_SMPTE170M;
}

// https://github.com/raspberrypi/libcamera-apps/blob/a5b5506a132056ac48ba22bc581cc394456da339/core/libcamera_app.cpp#L824
static uint8_t *map_buffer(FrameBuffer *buffer) {
size_t buffer_size = 0;

for (unsigned i = 0; i < buffer->planes().size(); i++) {
const FrameBuffer::Plane &plane = buffer->planes()[i];
buffer_size += plane.length;

if (i == buffer->planes().size() - 1 || plane.fd.get() != buffer->planes()[i + 1].fd.get()) {
return (uint8_t *)mmap(NULL, buffer_size, PROT_READ | PROT_WRITE, MAP_SHARED, plane.fd.get(), 0);
}
}

return NULL;
}

// https://github.com/raspberrypi/libcamera-apps/blob/a6267d51949d0602eedf60f3ddf8c6685f652812/core/options.cpp#L101
static void set_hdr(bool hdr) {
bool ok = false;
Expand Down Expand Up @@ -175,7 +179,7 @@ bool camera_create(const parameters_t *params, camera_frame_cb frame_cb, camera_
return false;
}

std::vector<libcamera::StreamRole> stream_roles = { StreamRole::VideoRecording };
std::vector<StreamRole> stream_roles = { StreamRole::VideoRecording };
if (params->mode != NULL) {
stream_roles.push_back(StreamRole::Raw);
}
Expand All @@ -187,7 +191,7 @@ bool camera_create(const parameters_t *params, camera_frame_cb frame_cb, camera_
}

StreamConfiguration &video_stream_conf = conf->at(0);
video_stream_conf.size = libcamera::Size(params->width, params->height);
video_stream_conf.size = Size(params->width, params->height);
video_stream_conf.pixelFormat = formats::YUV420;
video_stream_conf.bufferCount = params->buffer_count;
if (params->width >= 1280 || params->height >= 720) {
Expand Down Expand Up @@ -234,31 +238,54 @@ bool camera_create(const parameters_t *params, camera_frame_cb frame_cb, camera_
camp->requests.push_back(std::move(request));
}

camp->allocator = std::make_unique<FrameBufferAllocator>(camp->camera);
// allocate DMA buffers manually instead of using default buffers provided by libcamera.
// this improves performance by a lot.
// https://forums.raspberrypi.com/viewtopic.php?t=352554
// https://github.com/raspberrypi/rpicam-apps/blob/6de1ab6a899df35f929b2a15c0831780bd8e750e/core/rpicam_app.cpp#L1012

int allocator_fd = create_dma_allocator();
if (allocator_fd < 0) {
set_error("failed to open dma heap allocator");
return false;
}

for (StreamConfiguration &stream_conf : *conf) {
Stream *stream = stream_conf.stream();

res = camp->allocator->allocate(stream);
if (res < 0) {
set_error("allocate() failed");
return false;
}
for (unsigned int i = 0; i < params->buffer_count; i++) {
struct dma_heap_allocation_data alloc = {};
alloc.len = stream_conf.frameSize;
alloc.fd_flags = O_CLOEXEC | O_RDWR;
int ret = ioctl(allocator_fd, DMA_HEAP_IOCTL_ALLOC, &alloc);
if (ret < 0) {
set_error("failed to allocate buffer in dma heap");
return false;
}
UniqueFD fd(alloc.fd);

int i = 0;
for (const std::unique_ptr<FrameBuffer> &buffer : camp->allocator->buffers(stream)) {
// map buffer of the video stream only
std::vector<FrameBuffer::Plane> plane(1);
plane[0].fd = SharedFD(std::move(fd));
plane[0].offset = 0;
plane[0].length = stream_conf.frameSize;

camp->frame_buffers.push_back(std::make_unique<FrameBuffer>(plane));
FrameBuffer *fb = camp->frame_buffers.back().get();

// map buffers of the video stream only
if (stream == video_stream_conf.stream()) {
camp->mapped_buffers[buffer.get()] = map_buffer(buffer.get());
camp->mapped_buffers[fb] = (uint8_t*)mmap(NULL, stream_conf.frameSize, PROT_READ | PROT_WRITE, MAP_SHARED, plane[0].fd.get(), 0);
}

res = camp->requests.at(i++)->addBuffer(stream, buffer.get());
res = camp->requests.at(i)->addBuffer(stream, fb);
if (res != 0) {
set_error("addBuffer() failed");
return false;
}
}
}

close(allocator_fd);

camp->params = params;
camp->frame_cb = frame_cb;
*cam = camp.release();
Expand Down
4 changes: 1 addition & 3 deletions encoder.c
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,6 @@
#include "encoder_soft_h264.h"
#include "encoder.h"

#define HARDWARE_DEVICE "/dev/video11"

static char errbuf[256];

static void set_error(const char *format, ...) {
Expand All @@ -38,7 +36,7 @@ typedef struct {
} encoder_priv_t;

static bool supports_hardware_h264() {
int fd = open(HARDWARE_DEVICE, O_RDWR, 0);
int fd = open(ENCODER_HARD_H264_DEVICE, O_RDWR, 0);
if (fd < 0) {
return false;
}
Expand Down
5 changes: 1 addition & 4 deletions encoder.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,7 @@

typedef void encoder_t;

typedef void (*encoder_output_cb)(
uint64_t ts,
const uint8_t *buf,
uint64_t size);
typedef void (*encoder_output_cb)(const uint8_t *mapped, uint64_t size, uint64_t ts);

const char *encoder_get_error();
bool encoder_create(const parameters_t *params, int stride, int colorspace, encoder_output_cb output_cb, encoder_t **enc);
Expand Down
14 changes: 6 additions & 8 deletions encoder_hard_h264.c
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,6 @@

#include "encoder_hard_h264.h"

#define DEVICE "/dev/video11"

static char errbuf[256];

static void set_error(const char *format, ...) {
Expand Down Expand Up @@ -61,11 +59,11 @@ static void *output_thread(void *userdata) {
exit(1);
}

const uint8_t *mapped = (const uint8_t *)encp->capture_buffers[buf.index];
int size = buf.m.planes[0].bytesused;
uint64_t ts = ((uint64_t)buf.timestamp.tv_sec * (uint64_t)1000000) + (uint64_t)buf.timestamp.tv_usec;

const uint8_t *buf_mem = (const uint8_t *)encp->capture_buffers[buf.index];
int buf_size = buf.m.planes[0].bytesused;
encp->output_cb(ts, buf_mem, buf_size);
encp->output_cb(mapped, size, ts);

res = ioctl(encp->fd, VIDIOC_QBUF, &buf);
if (res != 0) {
Expand Down Expand Up @@ -103,7 +101,7 @@ bool encoder_hard_h264_create(const parameters_t *params, int stride, int colors
encoder_hard_h264_priv_t *encp = (encoder_hard_h264_priv_t *)(*enc);
memset(encp, 0, sizeof(encoder_hard_h264_priv_t));

encp->fd = open(DEVICE, O_RDWR, 0);
encp->fd = open(ENCODER_HARD_H264_DEVICE, O_RDWR, 0);
if (encp->fd < 0) {
set_error("unable to open device");
goto failed;
Expand Down Expand Up @@ -266,7 +264,7 @@ bool encoder_hard_h264_create(const parameters_t *params, int stride, int colors
return false;
}

void encoder_hard_h264_encode(encoder_hard_h264_t *enc, uint8_t *mapped_buffer, int buffer_fd, size_t size, uint64_t ts) {
void encoder_hard_h264_encode(encoder_hard_h264_t *enc, uint8_t *mapped, int fd, size_t size, uint64_t ts) {
encoder_hard_h264_priv_t *encp = (encoder_hard_h264_priv_t *)enc;

int index = encp->cur_buffer++;
Expand All @@ -282,7 +280,7 @@ void encoder_hard_h264_encode(encoder_hard_h264_t *enc, uint8_t *mapped_buffer,
buf.timestamp.tv_sec = ts / 1000000;
buf.timestamp.tv_usec = ts % 1000000;
buf.m.planes = planes;
buf.m.planes[0].m.fd = buffer_fd;
buf.m.planes[0].m.fd = fd;
buf.m.planes[0].bytesused = size;
buf.m.planes[0].length = size;
int res = ioctl(encp->fd, VIDIOC_QBUF, &buf);
Expand Down
6 changes: 4 additions & 2 deletions encoder_hard_h264.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,15 @@

#include "parameters.h"

#define ENCODER_HARD_H264_DEVICE "/dev/video11"

typedef void encoder_hard_h264_t;

typedef void (*encoder_hard_h264_output_cb)(uint64_t ts, const uint8_t *buf, uint64_t size);
typedef void (*encoder_hard_h264_output_cb)(const uint8_t *mapped, uint64_t size, uint64_t ts);

const char *encoder_hard_h264_get_error();
bool encoder_hard_h264_create(const parameters_t *params, int stride, int colorspace, encoder_hard_h264_output_cb output_cb, encoder_hard_h264_t **enc);
void encoder_hard_h264_encode(encoder_hard_h264_t *enc, uint8_t *mapped_buffer, int buffer_fd, size_t size, uint64_t ts);
void encoder_hard_h264_encode(encoder_hard_h264_t *enc, uint8_t *mapped, int fd, size_t size, uint64_t ts);
void encoder_hard_h264_reload_params(encoder_hard_h264_t *enc, const parameters_t *params);

#endif
6 changes: 3 additions & 3 deletions encoder_soft_h264.c
Original file line number Diff line number Diff line change
Expand Up @@ -97,10 +97,10 @@ bool encoder_soft_h264_create(const parameters_t *params, int stride, int colors
return false;
}

void encoder_soft_h264_encode(encoder_soft_h264_t *enc, uint8_t *mapped_buffer, int buffer_fd, size_t size, uint64_t ts) {
void encoder_soft_h264_encode(encoder_soft_h264_t *enc, uint8_t *mapped, int fd, size_t size, uint64_t ts) {
encoder_soft_h264_priv_t *encp = (encoder_soft_h264_priv_t *)enc;

encp->x_pic_in.img.plane[0] = mapped_buffer; // Y
encp->x_pic_in.img.plane[0] = mapped; // Y
encp->x_pic_in.img.plane[1] = encp->x_pic_in.img.plane[0] + encp->x_pic_in.img.i_stride[0] * encp->params->height; // U
encp->x_pic_in.img.plane[2] = encp->x_pic_in.img.plane[1] + (encp->x_pic_in.img.i_stride[0] / 2) * (encp->params->height / 2); // V
encp->x_pic_in.i_pts = encp->next_pts++;
Expand All @@ -113,7 +113,7 @@ void encoder_soft_h264_encode(encoder_soft_h264_t *enc, uint8_t *mapped_buffer,

pthread_mutex_unlock(&encp->mutex);

encp->output_cb(ts, nal->p_payload, frame_size);
encp->output_cb(nal->p_payload, frame_size, ts);
}

void encoder_soft_h264_reload_params(encoder_soft_h264_t *enc, const parameters_t *params) {
Expand Down
4 changes: 2 additions & 2 deletions encoder_soft_h264.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,11 @@

typedef void encoder_soft_h264_t;

typedef void (*encoder_soft_h264_output_cb)(uint64_t ts, const uint8_t *buf, uint64_t size);
typedef void (*encoder_soft_h264_output_cb)(const uint8_t *mapped, uint64_t size, uint64_t ts);

const char *encoder_soft_h264_get_error();
bool encoder_soft_h264_create(const parameters_t *params, int stride, int colorspace, encoder_soft_h264_output_cb output_cb, encoder_soft_h264_t **enc);
void encoder_soft_h264_encode(encoder_soft_h264_t *enc, uint8_t *mapped_buffer, int buffer_fd, size_t size, uint64_t ts);
void encoder_soft_h264_encode(encoder_soft_h264_t *enc, uint8_t *mapped, int fd, size_t size, uint64_t ts);
void encoder_soft_h264_reload_params(encoder_soft_h264_t *enc, const parameters_t *params);

#endif
27 changes: 20 additions & 7 deletions main.c
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@
#include <stdlib.h>
#include <string.h>
#include <pthread.h>
#include <sys/ioctl.h>

#include <linux/dma-buf.h>

#include "parameters.h"
#include "pipe.h"
Expand All @@ -18,17 +21,27 @@ static text_t *text;
static encoder_t *enc;

static void on_frame(
uint8_t *mapped_buffer,
int buffer_fd,
uint8_t *mapped,
int fd,
uint64_t size,
uint64_t timestamp) {
text_draw(text, mapped_buffer);
encoder_encode(enc, mapped_buffer, buffer_fd, size, timestamp);
uint64_t ts) {
// mapped DMA buffers require a DMA_BUF_IOCTL_SYNC before and after usage.
// https://forums.raspberrypi.com/viewtopic.php?t=352554
struct dma_buf_sync dma_sync = {0};
dma_sync.flags = DMA_BUF_SYNC_START | DMA_BUF_SYNC_RW;
ioctl(fd, DMA_BUF_IOCTL_SYNC, &dma_sync);

text_draw(text, mapped);

dma_sync.flags = DMA_BUF_SYNC_END | DMA_BUF_SYNC_RW;
ioctl(fd, DMA_BUF_IOCTL_SYNC, &dma_sync);

encoder_encode(enc, mapped, fd, size, ts);
}

static void on_encoder_output(uint64_t ts, const uint8_t *buf, uint64_t size) {
static void on_encoder_output(const uint8_t *mapped, uint64_t size, uint64_t ts) {
pthread_mutex_lock(&pipe_video_mutex);
pipe_write_buf(pipe_video_fd, ts, buf, size);
pipe_write_buf(pipe_video_fd, mapped, size, ts);
pthread_mutex_unlock(&pipe_video_mutex);
}

Expand Down
8 changes: 4 additions & 4 deletions pipe.c
Original file line number Diff line number Diff line change
Expand Up @@ -25,13 +25,13 @@ void pipe_write_ready(int fd) {
write(fd, buf, n);
}

void pipe_write_buf(int fd, uint64_t ts, const uint8_t *buf, uint32_t n) {
void pipe_write_buf(int fd, const uint8_t *mapped, uint32_t size, uint64_t ts) {
char head[] = {'b'};
n += 1 + sizeof(uint64_t);
write(fd, &n, 4);
size += 1 + sizeof(uint64_t);
write(fd, &size, 4);
write(fd, head, 1);
write(fd, &ts, sizeof(uint64_t));
write(fd, buf, n - 1 - sizeof(uint64_t));
write(fd, mapped, size - 1 - sizeof(uint64_t));
}

uint32_t pipe_read(int fd, uint8_t **pbuf) {
Expand Down
2 changes: 1 addition & 1 deletion pipe.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

void pipe_write_error(int fd, const char *format, ...);
void pipe_write_ready(int fd);
void pipe_write_buf(int fd, uint64_t ts, const uint8_t *buf, uint32_t n);
void pipe_write_buf(int fd, const uint8_t *mapped, uint32_t size, uint64_t ts);
uint32_t pipe_read(int fd, uint8_t **pbuf);

#endif

0 comments on commit abb5d58

Please sign in to comment.