From cb08d576d37ce341019c692eab1695286c5e47ea Mon Sep 17 00:00:00 2001
From: lxowalle <lxowalle@outlook.com>
Date: Wed, 22 May 2024 20:22:54 +0800
Subject: [PATCH] * update video module

---
 components/vision/include/maix_rtsp.hpp       |   6 +-
 components/vision/include/maix_video.hpp      | 403 +++++++++++++-
 .../vision/port/linux/maix_video_linux.cpp    |  65 +++
 .../vision/port/maixcam/maix_rtsp_maixcam.cpp |  11 +-
 .../vision/port/maixcam/maix_video_mmf.cpp    | 525 ++++++++++++++++++
 .../voice/port/maixcam/maix_audio_mmf.cpp     |   6 +-
 examples/video_demo/main/src/main.cpp         | 105 ++++
 7 files changed, 1108 insertions(+), 13 deletions(-)

diff --git a/components/vision/include/maix_rtsp.hpp b/components/vision/include/maix_rtsp.hpp
index 2b681df6..c8ae33b4 100644
--- a/components/vision/include/maix_rtsp.hpp
+++ b/components/vision/include/maix_rtsp.hpp
@@ -117,13 +117,11 @@ namespace maix::rtsp
 
         /**
          * @brief Write data to rtsp
-         * @param type rtsp stream type
-         * @param data rtsp stream data
-         * @param fps rtsp stream data size
+         * @param frame video frame data
          * @return error code, err::ERR_NONE means success, others means failed
          * @maixpy maix.rtsp.Rtsp.write
         */
-        err::Err write(video::Frame &stream);
+        err::Err write(video::Frame &frame);
 
         /**
          * @brief Get url of rtsp
diff --git a/components/vision/include/maix_video.hpp b/components/vision/include/maix_video.hpp
index 327f6db1..a344b18c 100644
--- a/components/vision/include/maix_video.hpp
+++ b/components/vision/include/maix_video.hpp
@@ -18,8 +18,6 @@
 */
 namespace maix::video
 {
-    extern maix::image::Image NoneImage;
-
     /**
      * Video type
      * @maixpy maix.video.VideoType
@@ -31,6 +29,10 @@ namespace maix::video
         VIDEO_ENC_MP4_CBR,
         VIDEO_DEC_H265_CBR,
         VIDEO_DEC_MP4_CBR,
+        VIDEO_H264_CBR,
+        VIDEO_H265_CBR,
+        VIDEO_H264_CBR_MP4,
+        VIDEO_H265_CBR_MP4,
     };
 
     /**
@@ -39,9 +41,186 @@ namespace maix::video
      */
     class Frame
     {
+        uint64_t _pts;                        // unit: time_base
+        uint64_t _dts;                        // unit: time_base
+        uint64_t _duration;                   // equals next_pts - this_pts in presentation order. unit: time_base.
+        uint8_t *_data;
+        size_t _data_size;
+        video::VideoType _type;
+        bool _is_alloc;
     public:
-        VideoType type;
-        std::unique_ptr<uint8_t> frame;
+        /**
+         * @brief Frame object
+         * @param data src data pointer, use pointers directly without copying.
+         * Note: this object will try to free this memory
+         * @param len data len
+         * @param pts presentation time stamp. unit: time_base
+         * @param dts decoding time stamp. unit: time_base
+         * @param duration packet display time. unit: time_base (not used)
+         * @param auto_detele if true, will delete data when destruct. When copy is true, this arg will be ignore.
+         * @param copy data will be copy to new buffer if true, if false, will use data directly,
+         *             default true to ensure memory safety.
+         * @maixcdk maix.video.Frame.Frame
+         */
+        Frame(uint8_t *data, int len, uint64_t pts = -1, uint64_t dts = -1, int64_t duration = 0, bool auto_detele = false, bool copy = false) {
+            _data = data;
+            _data_size = _data ? len : 0;
+            _pts = pts;
+            _dts = dts;
+            _duration = duration;
+            if(len > 0)
+            {
+                if(data && copy)
+                {
+                    _data = (uint8_t *)malloc(_data_size);
+                    _is_alloc = true;
+                    memcpy(_data, data, _data_size);
+                } else {
+                    _is_alloc = auto_detele;
+                }
+            } else {
+                _is_alloc = false;
+            }
+        }
+
+        /**
+         * @brief Frame number (pair of numerator and denominator).
+         * @maixcdk maix.video.Frame.Frame
+         */
+        Frame() {
+            _data = NULL;
+            _data_size = 0;
+            _is_alloc = false;
+        }
+
+        ~Frame() {
+            if (_is_alloc && _data)
+            {
+                free(_data);
+                _data = nullptr;
+            }
+        }
+
+        /**
+         * @brief Get raw data of packet
+         * @param data data pointer
+         * @param len data length pointer
+         * @return raw data
+         * @maixcdk maix.video.Frame.get
+         */
+        err::Err get(void **data, int *len) {
+            if (data) *data = _data;
+            if (len) *len = _data_size;
+            return err::ERR_NONE;
+        }
+
+        /**
+         * @brief Get raw data of packet
+         * @param copy if true, will alloc memory and copy data to new buffer
+         * @return raw data
+         * @maixpy maix.video.Frame.to_bytes
+         */
+        Bytes *to_bytes(bool copy) {
+            Bytes *b = NULL;
+            if (copy) {
+                b = new Bytes(_data, _data_size, true, true);
+            } else {
+                b = new Bytes(_data, _data_size, false, false);
+            }
+            return b;
+        }
+
+        /**
+         * @brief Get raw data of packet
+         * @return raw data
+         * @maicdk maix.video.Frame.data
+         */
+        uint8_t *data() {
+            return _data;
+        }
+
+        /**
+         * @brief Get raw data size of packet
+         * @return size of raw data
+         * @maixpy maix.video.Frame.size
+         */
+        size_t size() {
+            return _data_size;
+        }
+
+        /**
+         * @brief Check packet is valid
+         * @return true, packet is valid; false, packet is invalid
+         * @maixpy maix.video.Frame.is_valid
+         */
+        bool is_valid() {
+            return (_data && _data_size != 0) ? true : false;
+        }
+
+        /**
+         * @brief Set pts
+         * @param pts presentation time stamp. unit: time_base
+         * @maixpy maix.video.Frame.set_pts
+         */
+        void set_pts(uint64_t pts) {
+            _pts = pts;
+        }
+
+        /**
+         * @brief Set dts
+         * @param dts decoding time stamp.  unit: time_base
+         * @maixpy maix.video.Frame.set_dts
+         */
+        void set_dts(uint64_t dts) {
+            _dts = dts;
+        }
+
+        /**
+         * @brief Set duration
+         * @param duration packet display time. unit: time_base
+         * @maixpy maix.video.Frame.set_duration
+         */
+        void set_duration(uint64_t duration) {
+            _duration = duration;
+        }
+
+        /**
+         * @brief Set pts
+         * @param pts presentation time stamp. unit: time_base
+         * @return pts value
+         * @maixpy maix.video.Frame.get_pts
+         */
+        uint64_t get_pts() {
+            return _pts;
+        }
+
+        /**
+         * @brief Set dts
+         * @param dts decoding time stamp.  unit: time_base
+         * @return dts value
+         * @maixpy maix.video.Frame.get_dts
+         */
+        uint64_t get_dts() {
+            return _dts;
+        }
+
+        /**
+         * @brief Get duration
+         * @return duration value
+         * @maixpy maix.video.Frame.get_duration
+         */
+        uint64_t get_duration() {
+            return _duration;
+        }
+
+        /**
+         * @brief Get frame type
+         * @return video type. @see video::VideoType
+         * @maixpy maix.video.Frame.type
+         */
+        video::VideoType type() {
+            return _type;
+        }
     };
 
     /**
@@ -159,6 +338,222 @@ namespace maix::video
         }
     };
 
+    /**
+     * Encode class
+     * @maixpy maix.video.Encoder
+    */
+    class Encoder
+    {
+    public:
+        static maix::image::Image *NoneImage;
+
+        /**
+         * @brief Construct a new Video object
+         * @param width picture width. this value may be set automatically. default is 2560.
+         * @param height picture height. this value may be set automatically. default is 1440.
+         * @param format picture format. default is image::Format::FMT_YVU420SP. @see image::Format
+         * @param type video encode/decode type. default is ENC_H265_CBR. @see EncodeType
+         * @param framerate frame rate. framerate default is 30, means 30 frames per second
+         * for video. 1/time_base is not the average frame rate if the frame rate is not constant.
+         * @param gop for h264/h265 encoding, the interval between two I-frames, default is 50.
+         * @param bitrate for h264/h265 encoding, used to limit the bandwidth used by compressed data, default is 3000kbps
+         * @param time_base frame time base. time_base default is 1000, means 1/1000 ms (not used)
+         * @param capture enable capture, if true, you can use capture() function to get an image object
+         * @maixpy maix.video.Encoder.__init__
+         * @maixcdk maix.video.Encoder.Encoder
+         */
+        Encoder(int width = 2560, int height = 1440, image::Format format = image::Format::FMT_YVU420SP, video::VideoType type = video::VideoType::VIDEO_H265_CBR, int framerate = 30, int gop = 50, int bitrate = 3000 * 1000, int time_base = 1000, bool capture = false);
+        ~Encoder();
+
+        /**
+         * @brief Bind camera
+         * @param camera camera object
+         * @return error code, err::ERR_NONE means success, others means failed
+         * @maixpy maix.video.Encoder.bind_camera
+        */
+        err::Err bind_camera(camera::Camera *camera);
+
+        /**
+         * Encode image.
+         * @param img the image will be encode.
+         * if the img is NULL, this function will try to get image from camera, you must use bind_camera() function to bind the camera.
+         * @return encode result
+         * @maixpy maix.video.Encoder.encode
+        */
+        video::Frame *encode(image::Image *img = maix::video::Encoder::NoneImage);
+
+        /**
+         * Capture image
+         * @attention Each time encode is called, the last captured image will be released.
+         * @return error code
+         * @maixpy maix.video.Encoder.capture
+        */
+        image::Image *capture() {
+            err::check_null_raise(_capture_image, "Can't capture image, please make sure the capture flag is set, and run this api after encode().");
+            image::Image *new_image = new image::Image(_capture_image->width(), _capture_image->height(), _capture_image->format(),
+                (uint8_t *)_capture_image->data(), _capture_image->data_size(), false);
+            return new_image;
+        }
+
+        /**
+         * Get video width
+         * @return video width
+         * @maixpy maix.video.Encoder.width
+        */
+        int width()
+        {
+            return _width;
+        }
+
+        /**
+         * Get video height
+         * @return video height
+         * @maixpy maix.video.Encoder.height
+        */
+        int height()
+        {
+            return _height;
+        }
+
+        /**
+         * Get video encode type
+         * @return VideoType
+         * @maixpy maix.video.Encoder.type
+        */
+        video::VideoType type()
+        {
+            return _type;
+        }
+
+        /**
+         * Get video encode framerate
+         * @return frame rate
+         * @maixpy maix.video.Encoder.framerate
+        */
+        int framerate()
+        {
+            return _framerate;
+        }
+
+        /**
+         * Get video encode gop
+         * @return gop value
+         * @maixpy maix.video.Encoder.gop
+        */
+        int gop()
+        {
+            return _gop;
+        }
+
+        /**
+         * Get video encode bitrate
+         * @return bitrate value
+         * @maixpy maix.video.Encoder.bitrate
+        */
+        int bitrate()
+        {
+            return _bitrate;
+        }
+
+        /**
+         * Get video encode time base
+         * @return time base value
+         * @maixpy maix.video.Encoder.time_base
+        */
+        int time_base()
+        {
+            return _time_base;
+        }
+
+        /**
+         * Get current pts, unit: time_base
+         * Note: The current default is to assume that there is no B-frame implementation, so pts and bts are always the same
+         * @param time_ms start time from the first frame. unit: ms
+         * @return time base value
+         * @maixpy maix.video.Encoder.get_pts
+        */
+        uint64_t get_pts(uint64_t time_ms)
+        {
+            return time_ms * 1000 / _time_base;
+        }
+
+        /**
+         * Get current dts, unit: time_base
+         * Note: The current default is to assume that there is no B-frame implementation, so pts and bts are always the same
+         * @param time_ms start time from the first frame. unit: ms
+         * @return time base value
+         * @maixpy maix.video.Encoder.get_dts
+        */
+        uint64_t get_dts(uint64_t time_ms)
+        {
+            return time_ms * 1000 / _time_base;
+        }
+    private:
+        int _width;
+        int _height;
+        image::Format _format;
+        video::VideoType _type;
+        int _framerate;
+        int _gop;
+        int _bitrate;
+        int _time_base;
+        bool _need_capture;
+        image::Image *_capture_image;
+        camera::Camera *_camera;
+        bool _bind_camera;
+        uint64_t _pts;                        // unit: time_base
+        uint64_t _dts;                        // unit: time_base
+        uint64_t _start_encode_ms;
+        bool _encode_started;
+    };
+
+
+    /**
+     * Decoder class
+     * @maixpy maix.video.Decoder
+     */
+    class Decoder
+    {
+    public:
+        /**
+         * @brief Construct a new Decoder object
+         * @maixpy maix.video.Decoder.__init__
+         * @maixcdk maix.video.Decoder.Decoder
+         */
+        Decoder();
+        ~Decoder();
+
+        /**
+         * Prepare data to decode
+         * @param data need decode data
+         * @param copy if false, need to ensure that data is not released in decoding.
+         * @return error code, err::ERR_NONE means success, others means failed
+         * @maixpy maix.video.Decoder.prepare
+        */
+        err::Err prepare(Bytes *data, bool copy = true);
+
+        /**
+         * Prepare data to decode
+         * @param data need decode data
+         * @param data_size size of data to be decoded
+         * @param copy if false, need to ensure that data is not released in decoding.
+         * @return error code, err::ERR_NONE means success, others means failed
+         * @maixcdk maix.video.Decoder.prepare
+        */
+        err::Err prepare(void *data, int data_size, bool copy = true);
+
+        /**
+         * Decode
+         * @param frame the frame will be decode (not used)
+         * @return decode result
+         * @maixpy maix.video.Decoder.decode
+        */
+        image::Image *decode(video::Frame *frame = nullptr);
+    private:
+        int _path;
+        Bytes *_prepare_data;
+    };
+
     /**
      * Video class
      * @maixpy maix.video.Video
diff --git a/components/vision/port/linux/maix_video_linux.cpp b/components/vision/port/linux/maix_video_linux.cpp
index 6eb99355..cce56645 100644
--- a/components/vision/port/linux/maix_video_linux.cpp
+++ b/components/vision/port/linux/maix_video_linux.cpp
@@ -14,10 +14,70 @@
 
 namespace maix::video
 {
+#if CONFIG_BUILD_WITH_MAIXPY
     maix::image::Image *Video::NoneImage = new maix::image::Image();
+    maix::image::Image *Encoder::NoneImage = new maix::image::Image();
+#else
+    maix::image::Image *Video::NoneImage = NULL;
+    maix::image::Image *Encoder::NoneImage = NULL;
+#endif
+
+    Encoder::Encoder(int width, int height, image::Format format, VideoType type, int framerate, int gop, int bitrate, int time_base, bool capture) {
+        throw err::Exception(err::ERR_NOT_IMPL);
+    }
+
+    Encoder::~Encoder() {
+        throw err::Exception(err::ERR_NOT_IMPL);
+    }
+
+    err::Err Encoder::bind_camera(camera::Camera *camera) {
+        (void)camera;
+        throw err::Exception(err::ERR_NOT_IMPL);
+        return err::ERR_NOT_IMPL;
+    }
+
+    video::Frame *Encoder::encode(image::Image *img) {
+        (void)img;
+        throw err::Exception(err::ERR_NOT_IMPL);
+        return nullptr;
+    }
+
+    Decoder::Decoder() {
+        throw err::Exception(err::ERR_NOT_IMPL);
+    }
+
+    Decoder::~Decoder() {
+        throw err::Exception(err::ERR_NOT_IMPL);
+    }
+
+    err::Err Decoder::prepare(Bytes *data, bool copy) {
+        (void)data;
+        (void)copy;
+        return err::ERR_NONE;
+    }
+
+    err::Err Decoder::prepare(void *data, int data_size, bool copy) {
+        (void)data;
+        (void)data_size;
+        (void)copy;
+        return err::ERR_NONE;
+    }
+
+    image::Image *Decoder::decode(video::Frame *frame) {
+        (void)frame;
+        return NULL;
+    }
 
     Video::Video(std::string path, int width, int height, image::Format format, int time_base, int framerate, bool capture, bool open)
     {
+        (void)path;
+        (void)width;
+        (void)height;
+        (void)format;
+        (void)time_base;
+        (void)framerate;
+        (void)capture;
+        (void)open;
         throw err::Exception(err::ERR_NOT_IMPL);
     }
 
@@ -29,6 +89,8 @@ namespace maix::video
 
     err::Err Video::open(std::string path, double fps)
     {
+        (void)path;
+        (void)fps;
         throw err::Exception(err::ERR_NOT_IMPL);
         return err::ERR_NONE;
     }
@@ -39,16 +101,19 @@ namespace maix::video
     }
 
     err::Err Video::bind_camera(camera::Camera *camera) {
+        (void)camera;
         err::Err err = err::ERR_NONE;
         throw err::Exception(err::ERR_NOT_IMPL);
         return err;
     }
 
     video::Packet *Video::encode(image::Image *img) {
+        (void)img;
         return nullptr;
     }
 
     image::Image *Video::decode(video::Frame *frame) {
+        (void)frame;
         return NULL;
     }
 
diff --git a/components/vision/port/maixcam/maix_rtsp_maixcam.cpp b/components/vision/port/maixcam/maix_rtsp_maixcam.cpp
index 033c961b..d6f9c552 100644
--- a/components/vision/port/maixcam/maix_rtsp_maixcam.cpp
+++ b/components/vision/port/maixcam/maix_rtsp_maixcam.cpp
@@ -257,16 +257,21 @@ namespace maix::rtsp
         return err;
     }
 
-    err::Err Rtsp::write(video::Frame &stream) {
+    err::Err Rtsp::write(video::Frame &frame) {
         err::Err err = err::ERR_NONE;
 
-        if (stream.type != video::VideoType::VIDEO_ENC_H265_CBR) {
+        if (frame.type() != video::VideoType::VIDEO_ENC_H265_CBR) {
             log::warn("You passed in an unsupported type!\r\n");
             return err::ERR_RUNTIME;
         }
 
+        void *data;
+        int data_len = 0;
+        if (err::ERR_NONE != frame.get(&data, &data_len) || data_len == 0) {
+            return err::ERR_NONE;
+        }
 
-        rtsp_send_h265_data(stream.frame.get(), sizeof(stream.frame));
+        rtsp_send_h265_data((uint8_t *)data, data_len);
 
         return err;
     }
diff --git a/components/vision/port/maixcam/maix_video_mmf.cpp b/components/vision/port/maixcam/maix_video_mmf.cpp
index f9e02da5..9181ee84 100644
--- a/components/vision/port/maixcam/maix_video_mmf.cpp
+++ b/components/vision/port/maixcam/maix_video_mmf.cpp
@@ -21,7 +21,532 @@
 #define MMF_VENC_CHN            (1)
 namespace maix::video
 {
+#if CONFIG_BUILD_WITH_MAIXPY
     maix::image::Image *Video::NoneImage = new maix::image::Image();
+    maix::image::Image *Encoder::NoneImage = new maix::image::Image();
+#else
+    maix::image::Image *Video::NoneImage = NULL;
+    maix::image::Image *Encoder::NoneImage = NULL;
+#endif
+
+    Encoder::Encoder(int width, int height, image::Format format, VideoType type, int framerate, int gop, int bitrate, int time_base, bool capture) {
+        _width = width;
+        _height = height;
+        _format = format;
+        _type = type;
+        _framerate = framerate;
+        _gop = gop;
+        _bitrate = bitrate;
+        _time_base = time_base;
+        _need_capture = capture;
+        _capture_image = NULL;
+        _camera = NULL;
+        _bind_camera = NULL;
+        _start_encode_ms = 0;
+        _encode_started = false;
+
+        switch (_type) {
+        case VIDEO_H265_CBR:
+        {
+            if (0 != mmf_enc_h265_init(MMF_VENC_CHN, _width, _height)) {
+                err::check_raise(err::ERR_RUNTIME, "init mmf enc failed!");
+            }
+            break;
+        }
+        case VIDEO_H264_CBR:
+        {
+            mmf_venc_cfg_t cfg = {
+                .type = 2,  //1, h265, 2, h264
+                .w = _width,
+                .h = _height,
+                .fmt = mmf_invert_format_to_mmf(_format),
+                .jpg_quality = 0,       // unused
+                .gop = _gop,
+                .intput_fps = _framerate,
+                .output_fps = _framerate,
+                .bitrate = _bitrate,
+            };
+            if (0 != mmf_add_venc_channel(MMF_VENC_CHN, &cfg)) {
+                err::check_raise(err::ERR_RUNTIME, "mmf venc init failed!");
+            }
+            break;
+        }
+        default:
+            std::string err_str = "Encoder not support type: " + std::to_string(_type);
+            err::check_raise(err::ERR_RUNTIME, err_str);
+        }
+    }
+
+    Encoder::~Encoder() {
+        switch (_type) {
+        case VIDEO_H265_CBR:
+        {
+            mmf_enc_h265_deinit(MMF_VENC_CHN);
+            break;
+        }
+        case VIDEO_H264_CBR:
+        {
+            mmf_del_venc_channel(MMF_VENC_CHN);
+            break;
+        }
+        default:
+            std::string err_str = "Encoder not support type: " + std::to_string(_type);
+            err::check_raise(err::ERR_RUNTIME, err_str);
+        }
+
+        if (_capture_image && _capture_image->data()) {
+            delete _capture_image;
+            _capture_image = nullptr;
+        }
+    }
+
+    err::Err Encoder::bind_camera(camera::Camera *camera) {
+        err::Err err = err::ERR_NONE;
+        if (camera->format() != image::Format::FMT_YVU420SP) {
+            err::check_raise(err::ERR_RUNTIME, "bind camera failed! support FMT_YVU420SP only!\r\n");
+            return err::ERR_RUNTIME;
+        }
+
+        this->_camera = camera;
+        this->_bind_camera = true;
+        return err;
+    }
+
+    video::Frame *Encoder::encode(image::Image *img) {
+        uint8_t *stream_buffer = NULL;
+        int stream_size = 0;
+
+        uint64_t pts = 0, dts = 0;
+        uint64_t curr_ms = time::time_ms();
+        uint64_t diff_ms = 0;
+        if (!_encode_started) {
+            _encode_started = true;
+            _start_encode_ms = curr_ms;
+        }
+        diff_ms = curr_ms - _start_encode_ms;
+
+        switch (_type) {
+        case VIDEO_H264_CBR:
+        {
+            if (img && img->data() != NULL) {  // encode from image
+                if (img->data_size() > 2560 * 1440 * 3 / 2) {
+                    log::error("image is too large!\r\n");
+                    goto _exit;
+                }
+
+                mmf_venc_cfg_t cfg = {0};
+                if (0 != mmf_venc_get_cfg(MMF_VENC_CHN, &cfg)) {
+                    err::check_raise(err::ERR_RUNTIME, "get venc config failed!\r\n");
+                }
+
+                dts = get_dts(diff_ms);
+                pts = get_pts(diff_ms);
+
+                int img_w = img->width();
+                int img_h = img->height();
+                image::Format img_fmt = img->format();
+                if (img_w != cfg.w
+                    || img->height() != cfg.h
+                    || img->format() != mmf_invert_format_to_maix(cfg.fmt)) {
+                    log::warn("image size or format is incorrect, try to reinit venc!\r\n");
+                    mmf_del_venc_channel(MMF_VENC_CHN);
+                    cfg.w = img_w;
+                    cfg.h = img_h;
+                    cfg.fmt = mmf_invert_format_to_mmf(img_fmt);
+                    if (0 != mmf_add_venc_channel(MMF_VENC_CHN, &cfg)) {
+                        err::check_raise(err::ERR_RUNTIME, "mmf venc init failed!\r\n");
+                    }
+                    _width = img_w;
+                    _height = img_h;
+                    _format = img_fmt;
+                }
+
+                if (mmf_venc_push(MMF_VENC_CHN, (uint8_t *)img->data(), img->width(), img->height(), mmf_invert_format_to_mmf(img->format()))) {
+                    log::error("mmf_venc_push failed\n");
+                    goto _exit;
+                }
+
+                mmf_h265_stream_t stream = {0};
+                if (mmf_venc_pop(MMF_VENC_CHN, &stream)) {
+                    log::error("mmf_enc_h265_pull failed\n");
+                    mmf_venc_free(MMF_VENC_CHN);
+                    goto _exit;
+                }
+
+                for (int i = 0; i < stream.count; i ++) {
+                    // printf("[%d] stream.data:%p stream.len:%d\n", i, stream.data[i], stream.data_size[i]);
+                    stream_size += stream.data_size[i];
+                }
+
+                if (stream_size != 0) {
+                    stream_buffer = (uint8_t *)malloc(stream_size);
+                    if (!stream_buffer) {
+                        log::error("malloc failed!\r\n");
+                        mmf_venc_free(MMF_VENC_CHN);
+                        goto _exit;
+                    } else {
+                        if (stream.count > 1) {
+                            int copy_length = 0;
+                            for (int i = 0; i < stream.count; i ++) {
+                                memcpy(stream_buffer + copy_length, stream.data[i], stream.data_size[i]);
+                                copy_length += stream.data_size[i];
+                            }
+                        } else if (stream.count == 1) {
+                            memcpy(stream_buffer, stream.data[0], stream.data_size[0]);
+                        }
+                    }
+                }
+
+                if (mmf_venc_free(MMF_VENC_CHN)) {
+                    printf("mmf_venc_free failed\n");
+                    free(stream_buffer);
+                    stream_buffer = NULL;
+                    goto _exit;
+                }
+            } else { // encode from camera
+                if (!this->_bind_camera) {
+                    log::warn("You need use bind_camera() function to bind the camera!\r\n");
+                    goto _exit;
+                }
+
+                int vi_ch = _camera->get_channel();
+                void *data;
+                int data_size, width, height, format;
+                do {
+                    mmf_h265_stream_t stream = {0};
+                    if (mmf_venc_pop(MMF_VENC_CHN, &stream)) {
+                        log::error("mmf_venc_pop failed\n");
+                        mmf_venc_free(MMF_VENC_CHN);
+                        mmf_del_venc_channel(MMF_VENC_CHN);
+                        goto _exit;
+                    }
+
+                    for (int i = 0; i < stream.count; i ++) {
+                        stream_size += stream.data_size[i];
+                    }
+
+                    if (stream_size != 0) {
+                        stream_buffer = (uint8_t *)malloc(stream_size);
+                        if (!stream_buffer) {
+                            log::error("malloc failed!\r\n");
+                            mmf_venc_free(MMF_VENC_CHN);
+                            mmf_del_venc_channel(MMF_VENC_CHN);
+                            goto _exit;
+                        } else {
+                            if (stream.count > 1) {
+                                int copy_length = 0;
+                                for (int i = 0; i < stream.count; i ++) {
+                                    memcpy(stream_buffer + copy_length, stream.data[i], stream.data_size[i]);
+                                    copy_length += stream.data_size[i];
+                                }
+                            } else if (stream.count == 1) {
+                                memcpy(stream_buffer, stream.data[0], stream.data_size[0]);
+                            }
+                        }
+                    }
+
+                    if (mmf_venc_free(MMF_VENC_CHN)) {
+                        printf("mmf_venc_free failed\n");
+                        free(stream_buffer);
+                        stream_buffer = NULL;
+                        mmf_del_venc_channel(MMF_VENC_CHN);
+                        goto _exit;
+                    }
+
+                    if (mmf_vi_frame_pop(vi_ch, &data, &data_size, &width, &height, &format)) {
+                        log::error("read camera image failed!\r\n");
+                        goto _exit;
+                    }
+
+                    dts = get_dts(diff_ms);
+                    pts = get_pts(diff_ms);
+
+                    if (data_size > 2560 * 1440 * 3 / 2) {
+                        log::error("image is too large!\r\n");
+                        goto _exit;
+                    }
+
+                    if (_need_capture) {
+                        if (_capture_image && _capture_image->data()) {
+                            delete _capture_image;
+                            _capture_image = NULL;
+                        }
+
+                        image::Format capture_format = (image::Format)mmf_invert_format_to_maix(format);
+                        bool need_align = (width % mmf_vi_aligned_width(vi_ch) == 0) ? false : true;   // Width need align only
+                        switch (capture_format) {
+                            case image::Format::FMT_BGR888: // fall through
+                            case image::Format::FMT_RGB888:
+                            {
+                                _capture_image = new image::Image(width, height, capture_format);
+                                uint8_t * image_data = (uint8_t *)_capture_image->data();
+                                if (need_align) {
+                                    for (int h = 0; h < height; h++) {
+                                        memcpy((uint8_t *)image_data + h * width * 3, (uint8_t *)data + h * width * 3, width * 3);
+                                    }
+                                } else {
+                                    memcpy(image_data, data, width * height * 3);
+                                }
+                            }
+                                break;
+                            case image::Format::FMT_YVU420SP:
+                            {
+                                _capture_image = new image::Image(width, height, capture_format);
+                                uint8_t * image_data = (uint8_t *)_capture_image->data();
+                                if (need_align) {
+                                    for (int h = 0; h < height * 3 / 2; h ++) {
+                                        memcpy((uint8_t *)image_data + h * width, (uint8_t *)data + h * width, width);
+                                    }
+                                } else {
+                                    memcpy(image_data, data, width * height * 3 / 2);
+                                }
+                                break;
+                            }
+                            default:
+                            {
+                                _capture_image = NULL;
+                                break;
+                            }
+                        }
+                    }
+
+                    mmf_venc_cfg_t cfg = {0};
+                    if (0 != mmf_venc_get_cfg(MMF_VENC_CHN, &cfg)) {
+                        err::check_raise(err::ERR_RUNTIME, "get venc config failed!\r\n");
+                    }
+
+                    int img_w = img->width();
+                    int img_h = img->height();
+                    image::Format img_fmt = img->format();
+                    if (img_w != cfg.w
+                        || img->height() != cfg.h
+                        || img->format() != mmf_invert_format_to_maix(cfg.fmt)) {
+                        log::warn("image size or format is incorrect, try to reinit venc!\r\n");
+                        mmf_del_venc_channel(MMF_VENC_CHN);
+                        cfg.w = img_w;
+                        cfg.h = img_h;
+                        cfg.fmt = mmf_invert_format_to_mmf(img_fmt);
+                        if (0 != mmf_add_venc_channel(MMF_VENC_CHN, &cfg)) {
+                            err::check_raise(err::ERR_RUNTIME, "mmf venc init failed!\r\n");
+                        }
+                        _width = img_w;
+                        _height = img_h;
+                        _format = img_fmt;
+                    }
+
+                    if (mmf_venc_push(MMF_VENC_CHN, (uint8_t *)data, width, height, format)) {
+                        log::warn("mmf_venc_push failed\n");
+                        mmf_del_venc_channel(MMF_VENC_CHN);
+                        goto _exit;
+                    }
+
+                    mmf_vi_frame_free(vi_ch);
+                } while (stream_size == 0);
+            }
+            break;
+        }
+        case VIDEO_H265_CBR:
+        {
+            if (img && img->data() != NULL) {  // encode from image
+                if (img->data_size() > 2560 * 1440 * 3 / 2) {
+                    log::error("image is too large!\r\n");
+                    goto _exit;
+                }
+
+                dts = get_dts(diff_ms);
+                pts = get_pts(diff_ms);
+
+                if (mmf_enc_h265_push(MMF_VENC_CHN, (uint8_t *)img->data(), img->width(), img->height(), mmf_invert_format_to_mmf(img->format()))) {
+                    log::error("mmf_enc_h265_push failed\n");
+                    goto _exit;
+                }
+
+                mmf_h265_stream_t stream = {0};
+                if (mmf_enc_h265_pop(MMF_VENC_CHN, &stream)) {
+                    log::error("mmf_enc_h265_pull failed\n");
+                    mmf_enc_h265_free(MMF_VENC_CHN);
+                    goto _exit;
+                }
+
+                for (int i = 0; i < stream.count; i ++) {
+                    // printf("[%d] stream.data:%p stream.len:%d\n", i, stream.data[i], stream.data_size[i]);
+                    stream_size += stream.data_size[i];
+                }
+
+                if (stream_size != 0) {
+                    stream_buffer = (uint8_t *)malloc(stream_size);
+                    if (!stream_buffer) {
+                        log::error("malloc failed!\r\n");
+                        mmf_enc_h265_free(MMF_VENC_CHN);
+                        goto _exit;
+                    } else {
+                        if (stream.count > 1) {
+                            int copy_length = 0;
+                            for (int i = 0; i < stream.count; i ++) {
+                                memcpy(stream_buffer + copy_length, stream.data[i], stream.data_size[i]);
+                                copy_length += stream.data_size[i];
+                            }
+                        } else if (stream.count == 1) {
+                            memcpy(stream_buffer, stream.data[0], stream.data_size[0]);
+                        }
+                    }
+                }
+
+                if (mmf_enc_h265_free(MMF_VENC_CHN)) {
+                    printf("mmf_enc_h265_free failed\n");
+                    free(stream_buffer);
+                    stream_buffer = NULL;
+                    goto _exit;
+                }
+            } else { // encode from camera
+                if (!this->_bind_camera) {
+                    log::warn("You need use bind_camera() function to bind the camera!\r\n");
+                    goto _exit;
+                }
+
+                int vi_ch = _camera->get_channel();
+                void *data;
+                int data_size, width, height, format;
+
+                do {
+                    mmf_h265_stream_t stream = {0};
+                    if (mmf_enc_h265_pop(MMF_VENC_CHN, &stream)) {
+                        log::error("mmf_enc_h265_pop failed\n");
+                        mmf_enc_h265_free(MMF_VENC_CHN);
+                        mmf_enc_h265_deinit(MMF_VENC_CHN);
+                        goto _exit;
+                    }
+
+                    for (int i = 0; i < stream.count; i ++) {
+                        stream_size += stream.data_size[i];
+                    }
+
+                    if (stream_size != 0) {
+                        stream_buffer = (uint8_t *)malloc(stream_size);
+                        if (!stream_buffer) {
+                            log::error("malloc failed!\r\n");
+                            mmf_enc_h265_free(MMF_VENC_CHN);
+                            mmf_enc_h265_deinit(MMF_VENC_CHN);
+                            goto _exit;
+                        } else {
+                            if (stream.count > 1) {
+                                int copy_length = 0;
+                                for (int i = 0; i < stream.count; i ++) {
+                                    memcpy(stream_buffer + copy_length, stream.data[i], stream.data_size[i]);
+                                    copy_length += stream.data_size[i];
+                                }
+                            } else if (stream.count == 1) {
+                                memcpy(stream_buffer, stream.data[0], stream.data_size[0]);
+                            }
+                        }
+                    }
+
+                    if (mmf_enc_h265_free(MMF_VENC_CHN)) {
+                        printf("mmf_enc_h265_free failed\n");
+                        free(stream_buffer);
+                        stream_buffer = NULL;
+                        mmf_enc_h265_deinit(MMF_VENC_CHN);
+                        goto _exit;
+                    }
+
+                    if (mmf_vi_frame_pop(vi_ch, &data, &data_size, &width, &height, &format)) {
+                        log::error("read camera image failed!\r\n");
+                        goto _exit;
+                    }
+
+                    dts = get_dts(diff_ms);
+                    pts = get_pts(diff_ms);
+
+                    if (data_size > 2560 * 1440 * 3 / 2) {
+                        log::error("image is too large!\r\n");
+                        goto _exit;
+                    }
+
+                    if (_need_capture) {
+                        if (_capture_image && _capture_image->data()) {
+                            delete _capture_image;
+                            _capture_image = NULL;
+                        }
+
+                        image::Format capture_format = (image::Format)mmf_invert_format_to_maix(format);
+                        bool need_align = (width % mmf_vi_aligned_width(vi_ch) == 0) ? false : true;   // Width need align only
+                        switch (capture_format) {
+                            case image::Format::FMT_BGR888: // fall through
+                            case image::Format::FMT_RGB888:
+                            {
+                                _capture_image = new image::Image(width, height, capture_format);
+                                uint8_t * image_data = (uint8_t *)_capture_image->data();
+                                if (need_align) {
+                                    for (int h = 0; h < height; h++) {
+                                        memcpy((uint8_t *)image_data + h * width * 3, (uint8_t *)data + h * width * 3, width * 3);
+                                    }
+                                } else {
+                                    memcpy(image_data, data, width * height * 3);
+                                }
+                            }
+                                break;
+                            case image::Format::FMT_YVU420SP:
+                            {
+                                _capture_image = new image::Image(width, height, capture_format);
+                                uint8_t * image_data = (uint8_t *)_capture_image->data();
+                                if (need_align) {
+                                    for (int h = 0; h < height * 3 / 2; h ++) {
+                                        memcpy((uint8_t *)image_data + h * width, (uint8_t *)data + h * width, width);
+                                    }
+                                } else {
+                                    memcpy(image_data, data, width * height * 3 / 2);
+                                }
+                                break;
+                            }
+                            default:
+                            {
+                                _capture_image = NULL;
+                                break;
+                            }
+                        }
+                    }
+
+                    if (mmf_enc_h265_push(MMF_VENC_CHN, (uint8_t *)data, width, height, format)) {
+                        log::warn("mmf_enc_h265_push failed\n");
+                        mmf_enc_h265_deinit(MMF_VENC_CHN);
+                        goto _exit;
+                    }
+
+                    mmf_vi_frame_free(vi_ch);
+                } while (stream_size == 0);
+            }
+            break;
+        }
+        default:
+            std::string err_str = "Encoder not support type: " + std::to_string(_type);
+            err::check_raise(err::ERR_RUNTIME, err_str);
+        }
+_exit:
+        video::Frame *frame = new video::Frame(stream_buffer, stream_size, pts, dts, 0, true, false);
+        return frame;
+    }
+
+    Decoder::Decoder() {
+
+    }
+
+    Decoder::~Decoder() {
+
+    }
+
+
+    err::Err Decoder::prepare(Bytes *data, bool copy) {
+        return err::ERR_NONE;
+    }
+
+    err::Err Decoder::prepare(void *data, int data_size, bool copy) {
+        return err::ERR_NONE;
+    }
+
+    image::Image *Decoder::decode(video::Frame *frame) {
+        return NULL;
+    }
+
 
     Video::Video(std::string path, int width, int height, image::Format format, int time_base, int framerate, bool capture, bool open)
     {
diff --git a/components/voice/port/maixcam/maix_audio_mmf.cpp b/components/voice/port/maixcam/maix_audio_mmf.cpp
index 0fd273a9..8856e00e 100644
--- a/components/voice/port/maixcam/maix_audio_mmf.cpp
+++ b/components/voice/port/maixcam/maix_audio_mmf.cpp
@@ -404,9 +404,11 @@ namespace maix::audio
 
         return err::ERR_NONE;
     }
-
+#if CONFIG_BUILD_WITH_MAIXPY
     maix::Bytes *Player::NoneBytes = new maix::Bytes();
-
+#else
+    maix::Bytes *Player::NoneBytes = NULL;
+#endif
     Player::Player(std::string path, int sample_rate, audio::Format format, int channel) {
         _path = path;
         _sample_rate = sample_rate;
diff --git a/examples/video_demo/main/src/main.cpp b/examples/video_demo/main/src/main.cpp
index 0a4395e9..b6200156 100644
--- a/examples/video_demo/main/src/main.cpp
+++ b/examples/video_demo/main/src/main.cpp
@@ -19,6 +19,10 @@ static void helper(void)
     "2 [record_time] [output_path]: encode image and save to h265\r\n"
     "3 [record_time] [output_path]: record from camera and save to h265\r\n"
     "4 [record_time] [output_path]: record from camera and save to h265, then display\r\n"
+    "5 : encode h265\r\n"
+    "6 : bind camera and encode h265\r\n"
+    "7 : encode h264\r\n"
+    "8 : bind camera and encode h264\r\n"
     "\r\n"
     "Example: ./video_demo 0 5 output.mp4     # means record 5s from camera, and save to output.mp4\r\n"
     "==================================\r\n");
@@ -186,6 +190,107 @@ int _main(int argc, char* argv[])
         }
         break;
     }
+    case 5:
+    {
+        int width = 640;
+        int height = 480;
+        video::VideoType type = video::VIDEO_H265_CBR;
+        video::Encoder e = video::Encoder(width, height, image::Format::FMT_YVU420SP, type);
+        camera::Camera cam = camera::Camera(width, height, image::Format::FMT_YVU420SP);
+
+        while(!app::need_exit()) {
+            image::Image *img = cam.read();
+            video::Frame *frame = e.encode(img);
+            printf("frame data:%p size:%ld pts:%ld dts:%ld\r\n",
+                frame->data(), frame->size(), frame->get_pts(), frame->get_dts());
+            delete frame;
+            delete img;
+        }
+        break;
+    }
+    case 6:
+    {
+        int width = 640;
+        int height = 480;
+        video::VideoType type = video::VIDEO_H265_CBR;
+        int framerate = 30;
+        int gop = 50;
+        int bitrate = 3000 * 1000;
+        int time_base = 1000;
+        bool capture = true;
+        video::Encoder e = video::Encoder(width, height, image::Format::FMT_YVU420SP, type, framerate, gop, bitrate, time_base, capture);
+        camera::Camera cam = camera::Camera(width, height, image::Format::FMT_YVU420SP);
+        e.bind_camera(&cam);
+
+        char *file = (char *)"output.h265";
+        FILE *f = fopen(file, "wb");
+        err::check_null_raise(f, "open file failed!");
+
+        while(!app::need_exit()) {
+            video::Frame *frame = e.encode();
+            image::Image *img = e.capture();
+            printf("frame data:%p size:%ld pts:%ld dts:%ld\r\n",
+                frame->data(), frame->size(), frame->get_pts(), frame->get_dts());
+            printf("image size:%d\r\n", img->data_size());
+            fwrite(frame->data(), frame->size(), 1, f);
+            delete frame;
+            delete img;
+        }
+        fclose(f);
+        system("sync");
+        break;
+    }
+    case 7:
+    {
+        int width = 640;
+        int height = 480;
+        video::VideoType type = video::VIDEO_H264_CBR;
+        video::Encoder e = video::Encoder(width, height, image::Format::FMT_YVU420SP, type);
+        camera::Camera cam = camera::Camera(width, height, image::Format::FMT_YVU420SP);
+
+        while(!app::need_exit()) {
+            image::Image *img = cam.read();
+            video::Frame *frame = e.encode(img);
+            printf("frame data:%p size:%ld pts:%ld dts:%ld\r\n",
+                frame->data(), frame->size(), frame->get_pts(), frame->get_dts());
+            delete frame;
+            delete img;
+        }
+        break;
+    }
+    case 8:
+    {
+        int width = 640;
+        int height = 480;
+        video::VideoType type = video::VIDEO_H264_CBR;
+        int framerate = 30;
+        int gop = 50;
+        int bitrate = 3000 * 1000;
+        int time_base = 1000;
+        bool capture = true;
+        video::Encoder e = video::Encoder(width, height, image::Format::FMT_YVU420SP, type, framerate, gop, bitrate, time_base, capture);
+        camera::Camera cam = camera::Camera(width, height, image::Format::FMT_YVU420SP);
+        e.bind_camera(&cam);
+
+        char *file = (char *)"output.h264";
+        FILE *f = fopen(file, "wb");
+        err::check_null_raise(f, "open file failed!");
+
+        while(!app::need_exit()) {
+            video::Frame *frame = e.encode();
+            image::Image *img = e.capture();
+            printf("frame data:%p size:%ld pts:%ld dts:%ld\r\n",
+                frame->data(), frame->size(), frame->get_pts(), frame->get_dts());
+            printf("image size:%d\r\n", img->data_size());
+            fwrite(frame->data(), frame->size(), 1, f);
+            delete frame;
+            delete img;
+        }
+
+        fclose(f);
+        system("sync");
+        break;
+    }
     default:
         helper();
         return 0;