From 56fc581b0475b3aa4ac207d67430943e6a9b5b17 Mon Sep 17 00:00:00 2001 From: omigamedev Date: Wed, 13 Nov 2019 23:27:32 +0100 Subject: [PATCH] implement aligned memory to speed up video frame encoding --- src/app.cpp | 2 +- src/app_shaders.cpp | 2 ++ src/canvas_layer.cpp | 2 +- src/mp4enc.cpp | 16 +++++++-- src/mp4enc.h | 3 +- src/shader.cpp | 1 + src/shader.h | 1 + src/util.h | 86 ++++++++++++++++++++++++++++++++++++++++++++ 8 files changed, 108 insertions(+), 5 deletions(-) diff --git a/src/app.cpp b/src/app.cpp index fbd7176..b4b4503 100644 --- a/src/app.cpp +++ b/src/app.cpp @@ -801,7 +801,7 @@ void App::rec_loop() { canvas->m_canvas->m_dirty_stroke = false; PBO equirect = Canvas::I->m_layers_merge.gen_equirect_pbo( - Canvas::I->m_encoder->frame_size() / 4); + Canvas::I->m_encoder->frame_size()); std::this_thread::yield(); ImageRef img; img.create(equirect.width, equirect.height, equirect.map()); diff --git a/src/app_shaders.cpp b/src/app_shaders.cpp index 4ef6bf3..9eb3a1e 100644 --- a/src/app_shaders.cpp +++ b/src/app_shaders.cpp @@ -17,6 +17,8 @@ void App::initShaders() std::string ext = (const char*)glGetStringi(GL_EXTENSIONS, i); if (ext.find("shader_framebuffer_fetch") != std::string::npos) ShaderManager::ext_framebuffer_fetch = true; + if (ext.find("map_buffer_alignment") != std::string::npos) + ShaderManager::ext_map_aligned = true; #if __GLES__ && !__WEB__ if (ext.find("texture_float") != std::string::npos) ShaderManager::ext_float32 = true; diff --git a/src/canvas_layer.cpp b/src/canvas_layer.cpp index 54cbae7..697a168 100644 --- a/src/canvas_layer.cpp +++ b/src/canvas_layer.cpp @@ -110,7 +110,7 @@ PBO Layer::gen_equirect_pbo(glm::ivec2 size /*= { 0, 0 }*/) TextureCube cube = gen_cube(); std::this_thread::yield(); RTT latlong; - latlong.create(size.x * 4, size.y * 2); + latlong.create(size.x, size.y); std::this_thread::yield(); App::I->render_task([&] diff --git a/src/mp4enc.cpp b/src/mp4enc.cpp index 8266f6c..769710b 100644 --- a/src/mp4enc.cpp +++ b/src/mp4enc.cpp @@ -1,11 +1,13 @@ #include "pch.h" #include "mp4enc.h" +#include "util.h" #include #include #define MP4V2_NO_STDINT_DEFS #include +#include "shader.h" static void encoder_trace_callback(void* context, int level, const char* message) { @@ -105,13 +107,23 @@ bool MP4Encoder::encode(const Image& rgba) noexcept if (rgba.width != m_width || rgba.height != m_height) { Image resized = rgba.resize(m_width, m_height); + //libyuv::ARGBScale libyuv::ABGRToI420(resized.data(), resized.width * 4, pic.pData[0], m_width, pic.pData[1], m_width / 2, pic.pData[2], m_width / 2, m_width, m_height); } else { - libyuv::ABGRToI420(rgba.data(), rgba.width * 4, pic.pData[0], m_width, - pic.pData[1], m_width / 2, pic.pData[2], m_width / 2, m_width, m_height); + if (((uintptr_t)rgba.data() & 0xFF) != 0) + { + std::vector> aligned_buffer(rgba.data(), rgba.data() + (size_t)rgba.size()); + libyuv::ABGRToI420(aligned_buffer.data(), rgba.width * 4, pic.pData[0], m_width, + pic.pData[1], m_width / 2, pic.pData[2], m_width / 2, m_width, m_height); + } + else + { + libyuv::ABGRToI420(rgba.data(), rgba.width * 4, pic.pData[0], m_width, + pic.pData[1], m_width / 2, pic.pData[2], m_width / 2, m_width, m_height); + } } if (m_encoder->EncodeFrame(&pic, &info)) diff --git a/src/mp4enc.h b/src/mp4enc.h index 54dcf98..5f6d3d0 100644 --- a/src/mp4enc.h +++ b/src/mp4enc.h @@ -1,6 +1,7 @@ #pragma once #include "image.h" #include "serializer.h" +#include "util.h" class MP4Encoder : public Serializer::Type { @@ -25,7 +26,7 @@ class MP4Encoder : public Serializer::Type float m_framerate = 0; Header m_header; std::vector m_frames; - std::vector m_yuv_buffer; + std::vector> m_yuv_buffer; public: ~MP4Encoder(); bool init() noexcept; diff --git a/src/shader.cpp b/src/shader.cpp index 541b64f..c52e04e 100644 --- a/src/shader.cpp +++ b/src/shader.cpp @@ -10,6 +10,7 @@ bool ShaderManager::ext_framebuffer_fetch = false; bool ShaderManager::ext_float32 = false; bool ShaderManager::ext_float32_linear = false; bool ShaderManager::ext_float16 = false; +bool ShaderManager::ext_map_aligned = false; std::string Shader::read(const std::string& path) { diff --git a/src/shader.h b/src/shader.h index 45a946b..f6dedd8 100644 --- a/src/shader.h +++ b/src/shader.h @@ -113,6 +113,7 @@ public: static bool ext_float32; static bool ext_float32_linear; static bool ext_float16; + static bool ext_map_aligned; static bool load(kShader id, const std::string& path); static bool reload(); static bool create(kShader id, const std::string& vertex, const std::string& fragment); diff --git a/src/util.h b/src/util.h index e4ddeb3..e514aa0 100644 --- a/src/util.h +++ b/src/util.h @@ -29,6 +29,92 @@ uint16_t constexpr const_hash(const char* input) 5381; } +inline void* aligned_malloc(size_t size, size_t align) { + void* result; +#ifdef _MSC_VER + result = _aligned_malloc(size, align); +#else + if (posix_memalign(&result, align, size)) result = 0; +#endif + return result; +} + +inline void aligned_free(void* ptr) { +#ifdef _MSC_VER + _aligned_free(ptr); +#else + free(ptr); +#endif + +} + +// used as: std::vector > bla; +template +class AlignmentAllocator { +public: + typedef T value_type; + typedef std::size_t size_type; + typedef std::ptrdiff_t difference_type; + + typedef T* pointer; + typedef const T* const_pointer; + + typedef T& reference; + typedef const T& const_reference; + +public: + inline AlignmentAllocator() throw () { } + + template + inline AlignmentAllocator(const AlignmentAllocator&) throw () { } + + inline ~AlignmentAllocator() throw () { } + + inline pointer adress(reference r) { + return &r; + } + + inline const_pointer adress(const_reference r) const { + return &r; + } + + inline pointer allocate(size_type n) { + return (pointer)aligned_malloc(n * sizeof(value_type), N); + } + + inline void deallocate(pointer p, size_type) { + aligned_free(p); + } + + inline void construct(pointer p, const value_type& wert) { + new (p) value_type(wert); + } + + inline void destroy(pointer p) { + p->~value_type(); + } + + inline size_type max_size() const throw () { + return size_type(-1) / sizeof(value_type); + } + + template + struct rebind { + typedef AlignmentAllocator other; + }; + + bool operator!=(const AlignmentAllocator& other) const { + return !(*this == other); + } + + // Returns true if and only if storage allocated from *this + // can be deallocated from other, and vice versa. + // Always returns true for stateless allocators. + bool operator==(const AlignmentAllocator& other) const { + return true; + } +}; + template std::vector poly_remove_duplicate(const std::vector& v, const float tollerance = 0.001) {