implement aligned memory to speed up video frame encoding

This commit is contained in:
2019-11-13 23:27:32 +01:00
parent b719e4c7fc
commit 56fc581b04
8 changed files with 108 additions and 5 deletions

View File

@@ -801,7 +801,7 @@ void App::rec_loop()
{ {
canvas->m_canvas->m_dirty_stroke = false; canvas->m_canvas->m_dirty_stroke = false;
PBO equirect = Canvas::I->m_layers_merge.gen_equirect_pbo( PBO equirect = Canvas::I->m_layers_merge.gen_equirect_pbo(
Canvas::I->m_encoder->frame_size() / 4); Canvas::I->m_encoder->frame_size());
std::this_thread::yield(); std::this_thread::yield();
ImageRef img; ImageRef img;
img.create(equirect.width, equirect.height, equirect.map()); img.create(equirect.width, equirect.height, equirect.map());

View File

@@ -17,6 +17,8 @@ void App::initShaders()
std::string ext = (const char*)glGetStringi(GL_EXTENSIONS, i); std::string ext = (const char*)glGetStringi(GL_EXTENSIONS, i);
if (ext.find("shader_framebuffer_fetch") != std::string::npos) if (ext.find("shader_framebuffer_fetch") != std::string::npos)
ShaderManager::ext_framebuffer_fetch = true; ShaderManager::ext_framebuffer_fetch = true;
if (ext.find("map_buffer_alignment") != std::string::npos)
ShaderManager::ext_map_aligned = true;
#if __GLES__ && !__WEB__ #if __GLES__ && !__WEB__
if (ext.find("texture_float") != std::string::npos) if (ext.find("texture_float") != std::string::npos)
ShaderManager::ext_float32 = true; ShaderManager::ext_float32 = true;

View File

@@ -110,7 +110,7 @@ PBO Layer::gen_equirect_pbo(glm::ivec2 size /*= { 0, 0 }*/)
TextureCube cube = gen_cube(); TextureCube cube = gen_cube();
std::this_thread::yield(); std::this_thread::yield();
RTT latlong; RTT latlong;
latlong.create(size.x * 4, size.y * 2); latlong.create(size.x, size.y);
std::this_thread::yield(); std::this_thread::yield();
App::I->render_task([&] App::I->render_task([&]

View File

@@ -1,11 +1,13 @@
#include "pch.h" #include "pch.h"
#include "mp4enc.h" #include "mp4enc.h"
#include "util.h"
#include <codec_api.h> #include <codec_api.h>
#include <libyuv.h> #include <libyuv.h>
#define MP4V2_NO_STDINT_DEFS #define MP4V2_NO_STDINT_DEFS
#include <mp4v2/mp4v2.h> #include <mp4v2/mp4v2.h>
#include "shader.h"
static void encoder_trace_callback(void* context, int level, const char* message) static void encoder_trace_callback(void* context, int level, const char* message)
{ {
@@ -105,14 +107,24 @@ bool MP4Encoder::encode(const Image& rgba) noexcept
if (rgba.width != m_width || rgba.height != m_height) if (rgba.width != m_width || rgba.height != m_height)
{ {
Image resized = rgba.resize(m_width, m_height); Image resized = rgba.resize(m_width, m_height);
//libyuv::ARGBScale
libyuv::ABGRToI420(resized.data(), resized.width * 4, pic.pData[0], m_width, libyuv::ABGRToI420(resized.data(), resized.width * 4, pic.pData[0], m_width,
pic.pData[1], m_width / 2, pic.pData[2], m_width / 2, m_width, m_height); pic.pData[1], m_width / 2, pic.pData[2], m_width / 2, m_width, m_height);
} }
else else
{
if (((uintptr_t)rgba.data() & 0xFF) != 0)
{
std::vector<uint8_t, AlignmentAllocator<uint8_t, 16>> aligned_buffer(rgba.data(), rgba.data() + (size_t)rgba.size());
libyuv::ABGRToI420(aligned_buffer.data(), rgba.width * 4, pic.pData[0], m_width,
pic.pData[1], m_width / 2, pic.pData[2], m_width / 2, m_width, m_height);
}
else
{ {
libyuv::ABGRToI420(rgba.data(), rgba.width * 4, pic.pData[0], m_width, libyuv::ABGRToI420(rgba.data(), rgba.width * 4, pic.pData[0], m_width,
pic.pData[1], m_width / 2, pic.pData[2], m_width / 2, m_width, m_height); pic.pData[1], m_width / 2, pic.pData[2], m_width / 2, m_width, m_height);
} }
}
if (m_encoder->EncodeFrame(&pic, &info)) if (m_encoder->EncodeFrame(&pic, &info))
{ {

View File

@@ -1,6 +1,7 @@
#pragma once #pragma once
#include "image.h" #include "image.h"
#include "serializer.h" #include "serializer.h"
#include "util.h"
class MP4Encoder : public Serializer::Type class MP4Encoder : public Serializer::Type
{ {
@@ -25,7 +26,7 @@ class MP4Encoder : public Serializer::Type
float m_framerate = 0; float m_framerate = 0;
Header m_header; Header m_header;
std::vector<Frame> m_frames; std::vector<Frame> m_frames;
std::vector<uint8_t> m_yuv_buffer; std::vector<uint8_t, AlignmentAllocator<uint8_t, 16>> m_yuv_buffer;
public: public:
~MP4Encoder(); ~MP4Encoder();
bool init() noexcept; bool init() noexcept;

View File

@@ -10,6 +10,7 @@ bool ShaderManager::ext_framebuffer_fetch = false;
bool ShaderManager::ext_float32 = false; bool ShaderManager::ext_float32 = false;
bool ShaderManager::ext_float32_linear = false; bool ShaderManager::ext_float32_linear = false;
bool ShaderManager::ext_float16 = false; bool ShaderManager::ext_float16 = false;
bool ShaderManager::ext_map_aligned = false;
std::string Shader::read(const std::string& path) std::string Shader::read(const std::string& path)
{ {

View File

@@ -113,6 +113,7 @@ public:
static bool ext_float32; static bool ext_float32;
static bool ext_float32_linear; static bool ext_float32_linear;
static bool ext_float16; static bool ext_float16;
static bool ext_map_aligned;
static bool load(kShader id, const std::string& path); static bool load(kShader id, const std::string& path);
static bool reload(); static bool reload();
static bool create(kShader id, const std::string& vertex, const std::string& fragment); static bool create(kShader id, const std::string& vertex, const std::string& fragment);

View File

@@ -29,6 +29,92 @@ uint16_t constexpr const_hash(const char* input)
5381; 5381;
} }
inline void* aligned_malloc(size_t size, size_t align) {
void* result;
#ifdef _MSC_VER
result = _aligned_malloc(size, align);
#else
if (posix_memalign(&result, align, size)) result = 0;
#endif
return result;
}
inline void aligned_free(void* ptr) {
#ifdef _MSC_VER
_aligned_free(ptr);
#else
free(ptr);
#endif
}
// used as: std::vector<T, AlignmentAllocator<T, 16> > bla;
template <typename T, std::size_t N = 16>
class AlignmentAllocator {
public:
typedef T value_type;
typedef std::size_t size_type;
typedef std::ptrdiff_t difference_type;
typedef T* pointer;
typedef const T* const_pointer;
typedef T& reference;
typedef const T& const_reference;
public:
inline AlignmentAllocator() throw () { }
template <typename T2>
inline AlignmentAllocator(const AlignmentAllocator<T2, N>&) throw () { }
inline ~AlignmentAllocator() throw () { }
inline pointer adress(reference r) {
return &r;
}
inline const_pointer adress(const_reference r) const {
return &r;
}
inline pointer allocate(size_type n) {
return (pointer)aligned_malloc(n * sizeof(value_type), N);
}
inline void deallocate(pointer p, size_type) {
aligned_free(p);
}
inline void construct(pointer p, const value_type& wert) {
new (p) value_type(wert);
}
inline void destroy(pointer p) {
p->~value_type();
}
inline size_type max_size() const throw () {
return size_type(-1) / sizeof(value_type);
}
template <typename T2>
struct rebind {
typedef AlignmentAllocator<T2, N> other;
};
bool operator!=(const AlignmentAllocator<T, N>& other) const {
return !(*this == other);
}
// Returns true if and only if storage allocated from *this
// can be deallocated from other, and vice versa.
// Always returns true for stateless allocators.
bool operator==(const AlignmentAllocator<T, N>& other) const {
return true;
}
};
template<class T> template<class T>
std::vector<T> poly_remove_duplicate(const std::vector<T>& v, const float tollerance = 0.001) std::vector<T> poly_remove_duplicate(const std::vector<T>& v, const float tollerance = 0.001)
{ {