diff --git a/components/3rd_party/FFmpeg/CMakeLists.txt b/components/3rd_party/FFmpeg/CMakeLists.txt
index 84b7a715..e33cc57c 100644
--- a/components/3rd_party/FFmpeg/CMakeLists.txt
+++ b/components/3rd_party/FFmpeg/CMakeLists.txt
@@ -3,6 +3,7 @@ set(ffmpeg_unzip_path "${DL_EXTRACTED_PATH}/ffmpeg_srcs")
 set(src_path "${ffmpeg_unzip_path}/ffmpeg")
 ############### Add include ###################
 set(ffmpeg_include_dir          "${src_path}/include"
+                                "."
                                 )
 list(APPEND ADD_INCLUDE ${ffmpeg_include_dir})
 set_property(SOURCE ${ffmpeg_include_dir} PROPERTY GENERATED 1)
diff --git a/components/3rd_party/FFmpeg/maix_ffmpeg.hpp b/components/3rd_party/FFmpeg/maix_ffmpeg.hpp
new file mode 100644
index 00000000..7efef138
--- /dev/null
+++ b/components/3rd_party/FFmpeg/maix_ffmpeg.hpp
@@ -0,0 +1,641 @@
+#ifndef __MAIX_FFMPEG_HPP
+#define __MAIX_FFMPEG_HPP
+
+extern "C" {
+#include <libavformat/avformat.h>
+#include <libavcodec/avcodec.h>
+#include <libavutil/avutil.h>
+#include <libavutil/opt.h>
+#include <libswscale/swscale.h>
+#include <libavutil/imgutils.h>
+#include <libswresample/swresample.h>
+}
+#include <list>
+#include <string>
+
+
+
+
+namespace maix::ffmpeg {
+using namespace std;
+class FFmpegPacker {
+    class Bytes
+    {
+    public:
+        Bytes(uint8_t *data, uint32_t len, bool auto_detele = false, bool copy = true)
+        {
+            this->data = data;
+            this->data_len = len;
+            this->buff_len = len;
+            this->_is_alloc = auto_detele;
+            if(len > 0)
+            {
+                if(data && copy)
+                {
+                    this->data = new uint8_t[this->buff_len];
+                    this->_is_alloc = true;
+                    memcpy(this->data, data, this->buff_len);
+                }
+                else if (!data && this->buff_len > 0)
+                {
+                    this->data = new uint8_t[this->buff_len];
+                    this->_is_alloc = true;
+                }
+            }
+        }
+
+        Bytes()
+        {
+            this->data = NULL;
+            this->buff_len = 0;
+            this->data_len = 0;
+            this->_is_alloc = false;
+        }
+
+        ~Bytes()
+        {
+            if (_is_alloc && data)
+            {
+                delete[] data;
+            }
+        }
+
+        Bytes &operator=(const Bytes &other)
+        {
+            if (this != &other)
+            {
+                if (_is_alloc && data)
+                {
+                    delete[] data;
+                }
+                // alloc new buffer and copy
+                this->data = new uint8_t[other.buff_len];
+                this->buff_len = other.buff_len;
+                this->data_len = other.data_len;
+                this->_is_alloc = true;
+                memcpy(this->data, other.data, this->buff_len);
+            }
+            return *this;
+        }
+
+        uint8_t at(int index) const
+        {
+            if (index < 0 || index >= (int)this->data_len)
+            {
+                return 0;
+            }
+            return this->data[index];
+        }
+
+        uint8_t operator[](int index) const
+        {
+            if (index < 0 || index >= (int)this->data_len)
+            {
+                return 0;
+            }
+            return this->data[index];
+        }
+
+        size_t size() const
+        {
+            return this->data_len;
+        }
+
+        uint8_t *begin()
+        {
+            return this->data;
+        }
+
+        uint8_t *end()
+        {
+            return this->data + this->data_len;
+        }
+
+        uint8_t *data;
+        size_t buff_len;
+        size_t data_len;
+    private:
+        bool _is_alloc;
+    };
+
+    bool _open = false;
+    size_t _last_pts;
+    int _stream_index;
+    std::string _path;
+    std::string _context_format_name;
+    AVFormatContext *_format_context = nullptr;
+
+    /* video parameter */
+    bool _has_video = false;
+    AVStream *_video_stream = nullptr;
+    enum AVCodecID _video_codec_id;
+    int _video_width;
+    int _video_height;
+    enum AVPixelFormat _vidoe_pixel_format;
+    uint32_t _video_bitrate;
+    AVRational _video_timebase;
+    uint8_t *_video_sps_pps = nullptr;
+    size_t _video_sps_pps_size = 0;
+    size_t _video_last_pts;
+
+    /* audio parameter */
+    bool _has_audio = false;
+    AVStream *_audio_stream = nullptr;
+    AVCodecContext *_audio_codec_ctx;
+    SwrContext *_audio_swr_ctx;
+    AVCodec *_audio_codec = nullptr;
+    AVFrame *_audio_frame;
+    int _audio_sample_rate = 48000;
+    int _audio_channels = 1;
+    int _audio_bitrate = 128000;
+    enum AVSampleFormat _audio_format = AV_SAMPLE_FMT_S16;
+    std::list<std::pair<size_t, Bytes *>> *_pcm_list;
+public:
+    // pack video(h264) and audio(acc), then save to dist path
+    FFmpegPacker() {
+
+    }
+
+    ~FFmpegPacker() {
+        if (_open) {
+            this->close();
+        }
+    }
+    int config(std::string cmd, int data)
+    {
+        if (cmd == "has_video") {
+            _has_video = data ? true : false;
+            return 0;
+        } else if (cmd == "video_codec_id") {
+            _video_codec_id = (AVCodecID)data;
+            return 0;
+        } else if (cmd == "video_width") {
+            _video_width = data;
+            return 0;
+        } else if (cmd == "video_height") {
+            _video_height = data;
+            return 0;
+        } else if (cmd == "video_bitrate") {
+            _video_bitrate = data;
+            return 0;
+        } else if (cmd == "video_fps") {
+            _video_timebase = (AVRational){1, data};
+            return 0;
+        } else if (cmd == "video_pixel_format") {
+            _vidoe_pixel_format = (AVPixelFormat)data;
+            return 0;
+        } else if (cmd == "has_audio") {
+            _has_audio = data ? true : false;
+            return 0;
+        } else if (cmd == "audio_sample_rate") {
+            _audio_sample_rate = data;
+            return 0;
+        } else if (cmd == "audio_channels") {
+            _audio_channels = data;
+            return 0;
+        } else if (cmd == "audio_bitrate") {
+            _audio_bitrate = data;
+            return 0;
+        } else if (cmd == "audio_format") {
+            _audio_format = (enum AVSampleFormat)data;
+            return 0;
+        } else {
+            return -1;
+        }
+        return -1;
+    }
+
+    int config2(std::string cmd, std::string str) {
+        if (cmd == "path") {
+            _path = str;
+            return 0;
+        } else if (cmd == "context_format_name") {
+            _context_format_name = str;
+            return 0;
+        } else {
+            return -1;
+        }
+
+        return -1;
+    }
+
+    int config_sps_pps(uint8_t *sps_pps, int sps_pps_size)
+    {
+        if (_video_sps_pps) {
+            free(_video_sps_pps);
+            _video_sps_pps = NULL;
+        }
+        _video_sps_pps = (uint8_t *)malloc(sps_pps_size);
+        if (!_video_sps_pps) {
+            return -1;
+        }
+        memcpy(_video_sps_pps, sps_pps, sps_pps_size);
+        _video_sps_pps_size = sps_pps_size;
+        return 0;
+    }
+
+    bool is_opened() {return _open;};
+
+    int open() {
+        if (_open) return 0;
+
+        AVFormatContext *format_context = NULL;
+        const char *context_format_name;
+        if (_context_format_name.size() == 0) {
+            context_format_name = NULL;
+        } else {
+            context_format_name = _context_format_name.c_str();
+        }
+        avformat_alloc_output_context2(&format_context, NULL, context_format_name, _path.c_str());
+        if (!format_context) {
+			printf("Can't create output context\r\n");
+            return -1;
+        }
+
+        if (_has_video) {
+            AVStream *video_stream = avformat_new_stream(format_context, NULL);
+            if (!video_stream) {
+				printf("Can't create video stream\r\n");
+                goto _free_format_context;
+            }
+            video_stream->codecpar->codec_type = AVMEDIA_TYPE_VIDEO;
+            video_stream->codecpar->codec_id = _video_codec_id;
+            video_stream->codecpar->width = _video_width;
+            video_stream->codecpar->height = _video_height;
+            video_stream->codecpar->format = _vidoe_pixel_format;
+            video_stream->codecpar->bit_rate = _video_bitrate;
+            video_stream->codecpar->codec_tag = 0;
+            video_stream->codecpar->extradata = _video_sps_pps;
+            video_stream->codecpar->extradata_size = _video_sps_pps_size;
+            video_stream->time_base = _video_timebase;
+            if (video_stream->codecpar->extradata == NULL ||
+                video_stream->codecpar->extradata_size == 0) {
+				printf("video sps pps is null, size is 0\r\n");
+                goto _free_format_context;
+            }
+
+            _video_sps_pps = nullptr;
+            _video_sps_pps_size = 0;
+            _video_stream = video_stream;
+        }
+
+        if (_has_audio) {
+            int sample_rate = _audio_sample_rate;
+            int channels = _audio_channels;
+            int bitrate = _audio_bitrate;
+            enum AVSampleFormat format = _audio_format;
+
+            AVStream *audio_stream = avformat_new_stream(format_context, NULL);
+            if (!audio_stream) {
+				printf("Can't create audio stream\r\n");
+                goto _free_format_context;
+            }
+
+            AVCodec *audio_codec = avcodec_find_encoder(AV_CODEC_ID_AAC);
+            if (!audio_codec) {
+				printf("Can't find audio encoder\r\n");
+                goto _free_format_context;
+            }
+
+            AVCodecContext *audio_codec_ctx = avcodec_alloc_context3(audio_codec);
+            if (!audio_codec_ctx) {
+				printf("Can't alloc audio codec context\r\n");
+                goto _free_format_context;
+            }
+            audio_codec_ctx->codec_id = AV_CODEC_ID_AAC;
+            audio_codec_ctx->codec_type = AVMEDIA_TYPE_AUDIO;
+            audio_codec_ctx->sample_rate = sample_rate;
+            audio_codec_ctx->channels = channels;
+            audio_codec_ctx->channel_layout = av_get_default_channel_layout(audio_codec_ctx->channels);
+            audio_codec_ctx->sample_fmt = AV_SAMPLE_FMT_FLTP;
+            audio_codec_ctx->time_base = (AVRational){1, sample_rate};
+            audio_codec_ctx->bit_rate = bitrate;
+            audio_stream->time_base = audio_codec_ctx->time_base;
+
+            if (0 > avcodec_open2(audio_codec_ctx, audio_codec, NULL)) {
+				printf("Can't open audio codec\r\n");
+                goto _free_audio_codec_ctx;
+            }
+
+            if (0 > avcodec_parameters_from_context(audio_stream->codecpar, audio_codec_ctx)) {
+				printf("Can't copy audio codec parameters\r\n");
+                goto _free_audio_codec_ctx;
+            }
+
+            SwrContext *swr_ctx = swr_alloc();
+            if (!swr_ctx) {
+				printf("Can't alloc swr context\r\n");
+                goto _free_audio_codec_ctx;
+            }
+
+            av_opt_set_int(swr_ctx, "in_channel_layout", audio_codec_ctx->channel_layout, 0);
+            av_opt_set_int(swr_ctx, "out_channel_layout", audio_codec_ctx->channel_layout, 0);
+            av_opt_set_int(swr_ctx, "in_sample_rate", audio_codec_ctx->sample_rate, 0);
+            av_opt_set_int(swr_ctx, "out_sample_rate", audio_codec_ctx->sample_rate, 0);
+            av_opt_set_sample_fmt(swr_ctx, "in_sample_fmt", format, 0);
+            av_opt_set_sample_fmt(swr_ctx, "out_sample_fmt", AV_SAMPLE_FMT_FLTP, 0);
+            swr_init(swr_ctx);
+
+            AVFrame *audio_frame = av_frame_alloc();
+            if (!audio_frame) {
+				printf("Can't alloc audio frame\r\n");
+                swr_free(&swr_ctx);
+                swr_ctx = NULL;
+                goto _free_swr_ctx;
+            }
+            audio_frame->nb_samples = audio_codec_ctx->frame_size;
+            audio_frame->channel_layout = audio_codec_ctx->channel_layout;
+            audio_frame->format = AV_SAMPLE_FMT_FLTP;
+            audio_frame->sample_rate = audio_codec_ctx->sample_rate;
+            av_frame_get_buffer(audio_frame, 0);
+
+            _audio_stream = audio_stream;
+            _audio_codec = audio_codec;
+            _audio_codec_ctx = audio_codec_ctx;
+            _audio_swr_ctx = swr_ctx;
+            _audio_frame = audio_frame;
+            _audio_sample_rate = sample_rate;
+            _audio_channels = channels;
+            _audio_bitrate = bitrate;
+            _audio_format = format;
+        }
+
+        if (avio_open(&format_context->pb, _path.c_str(), AVIO_FLAG_WRITE) < 0) {
+			printf("rtmp connect failed!\r\n");
+            goto _free_audio_frame;
+        }
+
+        if (avformat_write_header(format_context, NULL) < 0) {
+			printf("rtmp write header failed!\r\n");
+            goto _close_io;
+        }
+
+        _format_context = format_context;
+        _open = true;
+        _video_last_pts = 0;
+        _pcm_list = new std::list<std::pair<size_t, Bytes *>>;
+        return 0;
+_close_io:
+        if (_format_context && _format_context->pb) {
+            avio_closep(&_format_context->pb);
+			_format_context->pb = nullptr;
+        }
+_free_audio_frame:
+        if (_audio_frame) {
+            av_frame_free(&_audio_frame);
+            _audio_frame = nullptr;
+        }
+_free_swr_ctx:
+        if (_audio_swr_ctx) {
+            swr_free(&_audio_swr_ctx);
+            _audio_swr_ctx = nullptr;
+        }
+_free_audio_codec_ctx:
+        if (_audio_codec_ctx) {
+            avcodec_free_context(&_audio_codec_ctx);
+            _audio_codec_ctx = nullptr;
+        }
+_free_format_context:
+        if (_format_context) {
+            avformat_free_context(_format_context);
+			_format_context = nullptr;
+        }
+        return -1;
+    }
+
+    void close() {
+        if (!_open) return;
+
+        if (_pcm_list) {
+            for (auto it = _pcm_list->begin(); it != _pcm_list->end(); ++it) {
+                auto &item = *it;
+                Bytes *pcm = item.second;
+                delete pcm;
+                it = _pcm_list->erase(it);
+            }
+            delete _pcm_list;
+            _pcm_list = nullptr;
+        }
+
+        if (_audio_frame) {
+            av_frame_free(&_audio_frame);
+            _audio_frame = nullptr;
+        }
+
+        if (_audio_swr_ctx) {
+            swr_free(&_audio_swr_ctx);
+            _audio_swr_ctx = nullptr;
+        }
+
+        if (_audio_codec_ctx) {
+            avcodec_free_context(&_audio_codec_ctx);
+            _audio_codec_ctx = nullptr;
+        }
+
+        if (_format_context) {
+            av_write_trailer(_format_context);
+
+            // av_write_trailer(_format_context);
+            if (_format_context && _format_context->pb) {
+                avio_closep(&_format_context->pb);
+            }
+            avformat_free_context(_format_context);
+            _format_context = NULL;
+        }
+
+        _open = false;
+        _video_last_pts = 0;
+    }
+
+
+    int push(uint8_t *frame, size_t frame_size, uint64_t pts, bool is_audio = false)
+    {
+        if (!_open) {
+            return -1;
+        }
+
+        if (!is_audio) {
+            if (_has_video) {
+                AVPacket *pkt = av_packet_alloc();
+                if (!pkt) {
+                    fprintf(stderr, "Can't malloc avpacket\r\n");
+                    return -1;
+                }
+
+                pkt->data = frame;
+                pkt->size = frame_size;
+                pkt->stream_index = _video_stream->index;
+                pkt->duration = pts - _video_last_pts;
+                _video_last_pts = pts;
+                pkt->pts = pkt->dts = pts;
+                pkt->flags |= AV_PKT_FLAG_KEY;
+                // log::info("[VIDEO] frame:%p frame_size:%d pts:%ld(%f s)", frame, frame_size, pkt->pts, this->video_pts_to_us(pkt->pts) / 1000);
+                if (av_interleaved_write_frame(_format_context, pkt) < 0) {
+                    fprintf(stderr, "send frame failed!\r\n");
+                    av_packet_unref(pkt);
+                    av_packet_free(&pkt);
+                    return -1;
+                }
+
+                av_packet_unref(pkt);
+                av_packet_free(&pkt);
+            }
+        } else {
+            if (_has_audio) {
+                AVPacket *pkt = av_packet_alloc();
+                if (!pkt) {
+                    fprintf(stderr, "Can't malloc avpacket\r\n");
+                    return -1;
+                }
+
+                if (frame && frame_size > 0) {
+                    auto pcm_list = _pcm_list;
+                    AVFrame *audio_frame = _audio_frame;
+                    AVStream *audio_stream = _audio_stream;
+                    AVCodecContext *audio_codec_ctx = _audio_codec_ctx;
+                    SwrContext *swr_ctx = _audio_swr_ctx;
+                    AVFormatContext *outputFormatContext = _format_context;
+                    AVPacket *audio_packet = pkt;
+                    size_t buffer_size = av_samples_get_buffer_size(NULL, _audio_channels, audio_frame->nb_samples, _audio_format, 1);
+                    size_t pcm_remain_len = frame_size;
+
+                    // fill last pcm to buffer_size
+                    size_t next_pts = pts;
+                    if (!pcm_list->empty()) {
+                        auto last_item = pcm_list->back();
+                        Bytes *last_pcm = last_item.second;
+                        if (last_pcm && last_pcm->data_len < buffer_size) {
+                            int temp_size = pcm_remain_len + last_pcm->data_len >= buffer_size ? buffer_size : pcm_remain_len + last_pcm->data_len;
+                            uint8_t *temp = (uint8_t *)malloc(temp_size);
+                            if (!temp) {
+                                fprintf(stderr, "malloc failed!\r\n");
+                                return -1;
+                            }
+                            memcpy(temp, last_pcm->data, last_pcm->data_len);
+                            if (pcm_remain_len + last_pcm->data_len < buffer_size) {
+                                memcpy(temp + last_pcm->data_len, frame, pcm_remain_len);
+                                pcm_remain_len = 0;
+                            } else {
+                                memcpy(temp + last_pcm->data_len, frame, buffer_size - last_pcm->data_len);
+                                pcm_remain_len -= (buffer_size - last_pcm->data_len);
+                            }
+
+                            Bytes *new_pcm = new Bytes(temp, temp_size, true, false);
+                            pcm_list->pop_back();
+                            delete last_pcm;
+
+                            size_t new_pts = last_item.first;
+                            next_pts = new_pts + get_audio_pts_from_pcm_size(new_pcm->data_len);
+                            pcm_list->push_back(std::make_pair(new_pts, new_pcm));
+                        }
+                    }
+
+                    // fill other pcm
+                    while (pcm_remain_len > 0) {
+                        int temp_size = pcm_remain_len >= buffer_size ? buffer_size : pcm_remain_len;
+                        uint8_t *temp = (uint8_t *)malloc(temp_size);
+                        if (!temp) {
+                            fprintf(stderr, "malloc failed!\r\n");
+                            return -1;
+                        }
+                        memcpy(temp, frame + frame_size - pcm_remain_len, temp_size);
+                        pcm_remain_len -= temp_size;
+
+                        Bytes *new_pcm = new Bytes(temp, temp_size, true, false);
+                        pcm_list->push_back(std::make_pair(next_pts, new_pcm));
+                        next_pts += get_audio_pts_from_pcm_size(temp_size);
+                    }
+
+                    // for (auto it = _pcm_list->begin(); it != _pcm_list->end(); ++it) {
+                    //     auto &item = *it;
+                    //     log::info("PTS:%d PCM:%p PCM_SIZE:%d", item.first, item.second->data, item.second->data_len);
+                    // }
+
+                    // audio process
+                    while (pcm_list->size() > 0) {
+                        auto item = pcm_list->front();
+                        auto next_pts = item.first;
+                        Bytes *pcm = item.second;
+                        if (pcm) {
+                            if (pcm->data_len == buffer_size) {
+                                const uint8_t *in[] = {pcm->data};
+                                uint8_t *out[] = {audio_frame->data[0]};
+                                swr_convert(swr_ctx, out, audio_codec_ctx->frame_size, in, audio_codec_ctx->frame_size);
+                                audio_frame->pts = next_pts;
+                                if (avcodec_send_frame(audio_codec_ctx, audio_frame) < 0) {
+                                    printf("Error sending audio_frame to encoder.\n");
+                                    break;
+                                }
+
+                                while (avcodec_receive_packet(audio_codec_ctx, audio_packet) == 0) {
+                                    audio_packet->stream_index = audio_stream->index;
+                                    audio_packet->pts = audio_packet->dts = next_pts;
+                                    audio_packet->duration = get_audio_pts_from_pcm_size(pcm->data_len);
+
+                                    // log::info("[AUIDIO] frame:%p frame_size:%d pts:%ld(%f s)", pcm->data, pcm->data_len, pkt->pts, this->audio_pts_to_us(pkt->pts) / 1000);
+                                    av_interleaved_write_frame(outputFormatContext, audio_packet);
+                                    av_packet_unref(audio_packet);
+                                }
+                                pcm_list->pop_front();
+                                delete pcm;
+                            } else {
+                                break;
+                            }
+                        } else {
+                            fprintf(stderr, "pcm data is nullptr..\r\n");
+                        }
+                    }
+                }
+
+                av_packet_unref(pkt);
+                av_packet_free(&pkt);
+            }
+        }
+
+        return 0;
+    }
+
+    uint64_t get_audio_pts_from_pcm_size(size_t pcm_length) {
+        if (!_open || !_has_audio)
+            return 0;
+        uint64_t frame_size_per_second = _audio_frame->sample_rate * _audio_frame->channels * av_get_bytes_per_sample(_audio_format);
+        return pcm_length * (_audio_stream->time_base.den / _audio_stream->time_base.num) / frame_size_per_second;
+    }
+
+    double video_pts_to_us(uint64_t pts) {
+        if (!_open || !_has_video)
+            return 0;
+        return (double)pts / ( _video_stream->time_base.den /_video_stream->time_base.num) * 1000000;
+    }
+
+    double audio_pts_to_us(uint64_t pts) {
+        if (!_open || !_has_video)
+            return 0;
+        return (double)pts / ( _audio_stream->time_base.den /_audio_stream->time_base.num) * 1000000;
+    }
+
+    uint64_t video_us_to_pts(uint64_t us) {
+        if (!_open || !_has_video) {
+            return 0;
+        }
+
+        return us * (_video_stream->time_base.den / _video_stream->time_base.num) / 1000000;
+    }
+
+    uint64_t audio_us_to_pts(uint64_t us) {
+        if (!_open || !_has_video) {
+            return 0;
+        }
+
+        return us * (_audio_stream->time_base.den / _audio_stream->time_base.num) / 1000000;
+    }
+
+    int get_audio_frame_size_per_second() {
+        if (!_open || !_has_video)
+            return 0;
+        return _audio_frame->sample_rate * _audio_frame->channels * av_get_bytes_per_sample(_audio_format);
+    }
+};
+}
+
+#endif // __MAIX_FFMPEG_HPP
diff --git a/components/vision/port/maixcam/maix_rtmp_maixcam.cpp b/components/vision/port/maixcam/maix_rtmp_maixcam.cpp
index 9ebbbf6a..a5326f17 100644
--- a/components/vision/port/maixcam/maix_rtmp_maixcam.cpp
+++ b/components/vision/port/maixcam/maix_rtmp_maixcam.cpp
@@ -369,20 +369,6 @@ class RTMPClient
         return err::ERR_NONE;
     }
 
-    static void add_adts_header(uint8_t adts_header[7], int packet_len, AVCodecContext *audio_codec_ctx) {
-        int profile = 1;  // AAC LC
-        int freq_idx = audio_codec_ctx->sample_rate == 44100 ? 4 : 3; // 4: 44.1kHz, 3: 48kHz
-        int chan_cfg = audio_codec_ctx->channels;
-
-        adts_header[0] = 0xFF;
-        adts_header[1] = 0xF1;
-        adts_header[2] = ((profile << 6) + (freq_idx << 2) + (chan_cfg >> 2));
-        adts_header[3] = (((chan_cfg & 3) << 6) + (packet_len >> 11));
-        adts_header[4] = ((packet_len & 0x7FF) >> 3);
-        adts_header[5] = (((packet_len & 7) << 5) + 0x1F);
-        adts_header[6] = 0xFC;
-    }
-
     err::Err push(uint8_t *frame, size_t frame_size, uint64_t pts, bool is_audio = false)
     {
         ffmpeg_param_t *ffmpeg = (ffmpeg_param_t *)&_ffmpeg;
diff --git a/projects/app_camera/main/CMakeLists.txt b/projects/app_camera/main/CMakeLists.txt
index ea6669d2..f4a8faa1 100644
--- a/projects/app_camera/main/CMakeLists.txt
+++ b/projects/app_camera/main/CMakeLists.txt
@@ -23,7 +23,7 @@ append_srcs_dir(ADD_SRCS "assets")
 ###############################################
 
 ###### Add required/dependent components ######
-list(APPEND ADD_REQUIREMENTS basic lvgl)
+list(APPEND ADD_REQUIREMENTS basic lvgl FFmpeg)
 ###############################################
 
 ###### Add link search path for requirements/libs ######
diff --git a/projects/app_camera/main/app/app.cpp b/projects/app_camera/main/app/app.cpp
index a8e83708..bc00409c 100644
--- a/projects/app_camera/main/app/app.cpp
+++ b/projects/app_camera/main/app/app.cpp
@@ -10,6 +10,7 @@
 #include "maix_gpio.hpp"
 #include "maix_fs.hpp"
 #include "maix_vision.hpp"
+#include "maix_ffmpeg.hpp"
 #include "sophgo_middleware.hpp"
 #include <sys/ioctl.h>
 #include <sys/types.h>
@@ -39,9 +40,7 @@ static struct {
 
     uint8_t cam_snap_delay_s;
     int video_start_ms;
-    std::string video_save_path;
     std::string video_mp4_path;
-    int video_save_fd;
 
     uint64_t loop_last_ms;
     void *loop_last_frame;
@@ -56,7 +55,14 @@ static struct {
     touchscreen::TouchScreen *touchscreen;
     video::Encoder *encoder;
     gpio::GPIO *light;
+    ffmpeg::FFmpegPacker *ffmpeg_packer;
+    audio::Recorder *audio_recorder;
     int encoder_bitrate;
+
+    uint64_t last_read_pcm_ms;
+    uint64_t last_read_cam_ms;
+    uint64_t video_pts;
+    uint64_t audio_pts;
 } priv;
 
 static void _capture_image(maix::camera::Camera &camera, maix::image::Image *img);
@@ -261,7 +267,8 @@ int app_base_init(void)
     mmf_deinit_v2(true);
 
     // init camera
-    priv.camera = new camera::Camera(priv.camera_resolution_w, priv.camera_resolution_h, image::Format::FMT_YVU420SP, NULL, 30, 3, true, priv.capture_raw_enable);
+    int fps = 30;
+    priv.camera = new camera::Camera(priv.camera_resolution_w, priv.camera_resolution_h, image::Format::FMT_YVU420SP, NULL, fps, 3, true, priv.capture_raw_enable);
     err::check_bool_raise(priv.camera->is_opened(), "camera open failed");
 
     // init display
@@ -271,7 +278,7 @@ int app_base_init(void)
 
     // init encoder
     priv.encoder_bitrate = _get_encode_bitrate_by_camera_resolution(priv.camera_resolution_w, priv.camera_resolution_h);
-    priv.encoder = new video::Encoder("", priv.camera_resolution_w, priv.camera_resolution_h, image::Format::FMT_YVU420SP, video::VideoType::VIDEO_H264, 30, 50, priv.encoder_bitrate);
+    priv.encoder = new video::Encoder("", priv.camera_resolution_w, priv.camera_resolution_h, image::Format::FMT_YVU420SP, video::VideoType::VIDEO_H264, fps, 50, priv.encoder_bitrate);
 
     // touch screen
     priv.touchscreen = new touchscreen::TouchScreen();
@@ -283,6 +290,27 @@ int app_base_init(void)
     priv.light->low();
     err::check_null_raise(priv.light, "light gpio open failed");
 
+    // init audio
+    priv.audio_recorder = new audio::Recorder();
+    err::check_null_raise(priv.audio_recorder, "audio recorder init failed!");
+
+    // init ffmpeg packer
+    priv.ffmpeg_packer = new ffmpeg::FFmpegPacker();
+    err::check_null_raise(priv.ffmpeg_packer, "ffmpeg packer init failed");
+    err::check_bool_raise(!priv.ffmpeg_packer->config("has_video", true), "rtmp config failed!");
+    err::check_bool_raise(!priv.ffmpeg_packer->config("video_codec_id", AV_CODEC_ID_H264), "rtmp config failed!");
+    err::check_bool_raise(!priv.ffmpeg_packer->config("video_width", priv.camera_resolution_w), "rtmp config failed!");
+    err::check_bool_raise(!priv.ffmpeg_packer->config("video_height", priv.camera_resolution_h), "rtmp config failed!");
+    err::check_bool_raise(!priv.ffmpeg_packer->config("video_bitrate", priv.encoder_bitrate), "rtmp config failed!");
+    err::check_bool_raise(!priv.ffmpeg_packer->config("video_fps", fps), "rtmp config failed!");
+    err::check_bool_raise(!priv.ffmpeg_packer->config("video_pixel_format", AV_PIX_FMT_NV21), "rtmp config failed!");
+
+    err::check_bool_raise(!priv.ffmpeg_packer->config("has_audio", true), "rtmp config failed!");
+    err::check_bool_raise(!priv.ffmpeg_packer->config("audio_sample_rate", 48000), "rtmp config failed!");
+    err::check_bool_raise(!priv.ffmpeg_packer->config("audio_channels", 1), "rtmp config failed!");
+    err::check_bool_raise(!priv.ffmpeg_packer->config("audio_bitrate", 128000), "rtmp config failed!");
+    err::check_bool_raise(!priv.ffmpeg_packer->config("audio_format", AV_SAMPLE_FMT_S16), "rtmp config failed!");
+
     // init gui
     maix::lvgl_init(priv.other_disp, priv.touchscreen);
     app_init(*priv.camera);
@@ -295,6 +323,16 @@ int app_base_deinit(void)
 {
     maix::lvgl_destroy();
 
+    if (priv.ffmpeg_packer) {
+        delete priv.ffmpeg_packer;
+        priv.ffmpeg_packer = NULL;
+    }
+
+    if (priv.audio_recorder) {
+        delete priv.audio_recorder;
+        priv.audio_recorder = NULL;
+    }
+
     if (priv.light) {
         priv.light->low();
         delete priv.light;
@@ -364,12 +402,6 @@ int app_base_loop(void)
         // Push frame to encoder
         int enc_ch = 1;
 
-        if (priv.video_start_flag && priv.video_prepare_is_ok) {
-            uint64_t record_time = time::ticks_ms() - priv.video_start_ms;
-            mmf_venc_push2(enc_ch, frame);
-            ui_set_record_time(record_time);
-        }
-
         // Snap picture
         if (priv.cam_snap_flag) {
             priv.cam_snap_flag = false;
@@ -386,22 +418,122 @@ int app_base_loop(void)
             delete img;
         }
 
+        bool found_venc_stream = false;
+
         // Pop stream from encoder
-        mmf_stream_t stream = {0};
-        if (0 == mmf_venc_pop(enc_ch, &stream)) {
-            for (int i = 0; i < stream.count; i++) {
-                printf("stream[%d]: data:%p size:%d\r\n", i, stream.data[i], stream.data_size[i]);
-
-                if (priv.video_save_fd > 0) {
-                    int size = write(priv.video_save_fd, stream.data[i], stream.data_size[i]);
-                    if (size != stream.data_size[i]) {
-                        printf("write file failed! need %d bytes, write %d bytes\r\n", stream.data_size[i], size);
+        mmf_stream_t venc_stream = {0};
+        if (0 == mmf_venc_pop(enc_ch, &venc_stream)) {
+            // for (int i = 0; i < venc_stream.count; i++) {
+            //     printf("venc stream[%d]: data:%p size:%d\r\n", i, venc_stream.data[i], venc_stream.data_size[i]);
+            // }
+
+            if (venc_stream.count > 0) {
+                found_venc_stream = true;
+            }
+        }
+
+        if (priv.ffmpeg_packer && priv.ffmpeg_packer->is_opened()) {
+            double temp_us = priv.ffmpeg_packer->video_pts_to_us(priv.video_pts);
+            priv.audio_pts = priv.ffmpeg_packer->audio_us_to_pts(temp_us);
+        }
+
+        if (found_venc_stream) {
+            if (priv.ffmpeg_packer) {
+                if (!priv.ffmpeg_packer->is_opened()) {
+                    if (venc_stream.count > 1) {
+                        int sps_pps_size = venc_stream.data_size[0] + venc_stream.data_size[1];
+                        uint8_t *sps_pps = (uint8_t *)malloc(sps_pps_size);
+                        if (sps_pps) {
+                            memcpy(sps_pps, venc_stream.data[0], venc_stream.data_size[0]);
+                            memcpy(sps_pps + venc_stream.data_size[0], venc_stream.data[1], venc_stream.data_size[1]);
+
+                            if (0 == priv.ffmpeg_packer->config_sps_pps(sps_pps, sps_pps_size)) {
+                                while (0 != priv.ffmpeg_packer->open() && !app::need_exit()) {
+                                    time::sleep_ms(500);
+                                    log::info("Can't open ffmpeg, retry again..");
+                                }
+
+                                if (priv.audio_recorder) {
+                                    Bytes *pcm_data = priv.audio_recorder->record();
+                                    if (pcm_data) {
+                                        delete pcm_data;
+                                        pcm_data = NULL;
+                                    }
+                                }
+
+                                priv.last_read_pcm_ms = 0;
+                                priv.last_read_cam_ms = 0;
+                                priv.video_pts = 0;
+                                priv.audio_pts = 0;
+                            }
+                            free(sps_pps);
+                        }
+                    }
+                }
+            }
+
+            if (priv.ffmpeg_packer->is_opened()) {
+                uint8_t *data = NULL;
+                int data_size = 0;
+                if (venc_stream.count == 1) {
+                    data = venc_stream.data[0];
+                    data_size = venc_stream.data_size[0];
+                } else if (venc_stream.count > 1) {
+                    data = venc_stream.data[2];
+                    data_size = venc_stream.data_size[2];
+                }
+
+                if (data_size) {
+                    if (priv.last_read_cam_ms == 0) {
+                        priv.video_pts = 0;
+                        priv.last_read_cam_ms = time::ticks_ms();
+                    } else {
+                        priv.video_pts += priv.ffmpeg_packer->video_us_to_pts((time::ticks_ms() - priv.last_read_cam_ms) * 1000);
+                        priv.last_read_cam_ms = time::ticks_ms();
+                    }
+                    log::info("[VIDEO] pts:%d  pts %f s", priv.video_pts, priv.ffmpeg_packer->video_pts_to_us(priv.video_pts) / 1000000);
+                    if (err::ERR_NONE != priv.ffmpeg_packer->push(data, data_size, priv.video_pts)) {
+                        log::error("ffmpeg push failed!");
                     }
                 }
             }
             mmf_venc_free(enc_ch);
         }
 
+        if (priv.ffmpeg_packer->is_opened()) {
+            int frame_size_per_second = priv.ffmpeg_packer->get_audio_frame_size_per_second();
+            uint64_t loop_ms = 0;
+            int read_pcm_size = 0;
+            if (priv.last_read_pcm_ms == 0) {
+                loop_ms = 30;
+                read_pcm_size = frame_size_per_second * loop_ms * 1.5 / 1000;
+                priv.audio_pts = 0;
+                priv.last_read_pcm_ms = time::ticks_ms();
+            } else {
+                loop_ms = time::ticks_ms() - priv.last_read_pcm_ms;
+                priv.last_read_pcm_ms = time::ticks_ms();
+
+                read_pcm_size = frame_size_per_second * loop_ms * 1.5 / 1000;
+                priv.audio_pts += priv.ffmpeg_packer->audio_us_to_pts(loop_ms * 1000);
+            }
+
+            Bytes *pcm_data = priv.audio_recorder->record_bytes(read_pcm_size);
+            if (pcm_data) {
+                if (pcm_data->data_len > 0) {
+                    log::info("[AUDIO] pts:%d  pts %f s", priv.audio_pts, priv.ffmpeg_packer->audio_pts_to_us(priv.audio_pts) / 1000000);
+                    if (err::ERR_NONE != priv.ffmpeg_packer->push(pcm_data->data, pcm_data->data_len, priv.audio_pts, true)) {
+                        log::error("ffmpeg push failed!");
+                    }
+                }
+                delete pcm_data;
+            }
+        }
+
+        if (priv.video_start_flag && priv.video_prepare_is_ok) {
+            uint64_t record_time = time::ticks_ms() - priv.video_start_ms;
+            mmf_venc_push2(enc_ch, frame);
+            ui_set_record_time(record_time);
+        }
         priv.loop_last_frame = frame;
     } else {
         frame = priv.loop_last_frame;
@@ -728,7 +860,6 @@ int app_loop(maix::camera::Camera &camera, maix::display::Display &disp, maix::d
     if (priv.video_start_flag && !priv.video_prepare_is_ok) {
         printf("Prepare record video\n");
         char *date = ui_get_sys_date();
-        printf("video_save_path:%s\n", priv.video_save_path.c_str());
         if (date) {
             string video_root_path = maix::app::get_video_path();
             string video_date(date);
@@ -738,24 +869,19 @@ int app_loop(maix::camera::Camera &camera, maix::display::Display &disp, maix::d
                 fs::mkdir(video_path);
             }
             std::vector<std::string> *file_list = fs::listdir(video_path);
-            printf("file_list_cnt:%ld\n", file_list->size());
-            string video_save_path = video_path + "/" + std::to_string(file_list->size()) +".h264";
             string video_mp4_path = video_path + "/" + std::to_string(file_list->size()) +".mp4";
-            printf("video_path path:%s  video_save_path:%s\n", video_path.c_str(), video_save_path.c_str());
             free(file_list);
             free(date);
 
-            priv.video_save_path = video_save_path;
-            priv.video_mp4_path = video_mp4_path;
-            priv.video_save_fd = open(video_save_path.c_str(), O_RDWR | O_CREAT, 0666);
-            if (priv.video_save_fd > 0) {
-                printf("open video file success\n");
+            if (priv.ffmpeg_packer) {
+                priv.ffmpeg_packer->config2("path", video_mp4_path);
             }
+
+            priv.video_mp4_path = video_mp4_path;
+            printf("video_save_path:%s\n", priv.video_mp4_path.c_str());
         } else {
             printf("get date failed!\n");
-            priv.video_save_path = "";
             priv.video_mp4_path = "";
-            priv.video_save_fd = -1;
         }
 
         priv.video_prepare_is_ok = true;
@@ -763,21 +889,10 @@ int app_loop(maix::camera::Camera &camera, maix::display::Display &disp, maix::d
 
     if (priv.video_stop_flag) {
         printf("Stop video\n");
-        if (priv.video_save_fd > 0) {
-            close(priv.video_save_fd);
-            priv.video_save_fd = -1;
-        }
-
-        if (priv.video_mp4_path != "" && priv.video_save_path != "") {
-            char cmd[128];
-            snprintf(cmd, sizeof(cmd), "ffmpeg -loglevel quiet -i %s -c:v copy -c:a copy %s -y",
-                        priv.video_save_path.c_str(),
-                        priv.video_mp4_path.c_str());
-            system(cmd);
-            snprintf(cmd, sizeof(cmd), "rm %s", priv.video_save_path.c_str());
-            system(cmd);
-            system("sync");
+        if (priv.ffmpeg_packer) {
+            priv.ffmpeg_packer->close();
         }
+        system("sync");
 
         priv.video_stop_flag = false;
         priv.video_prepare_is_ok = false;