diff --git a/.gitmodules b/.gitmodules index c64e3f4..e26413f 100644 --- a/.gitmodules +++ b/.gitmodules @@ -16,3 +16,6 @@ [submodule "libs/curl-android-ios"] path = libs/curl-android-ios url = https://github.com/gcesarmza/curl-android-ios +[submodule "libs/jpeg"] + path = libs/jpeg + url = https://github.com/richgel999/jpeg-compressor.git diff --git a/engine.xcodeproj/project.pbxproj b/engine.xcodeproj/project.pbxproj index 93a0cf6..4cf7c4f 100644 --- a/engine.xcodeproj/project.pbxproj +++ b/engine.xcodeproj/project.pbxproj @@ -15,6 +15,10 @@ AD0E119C1ECA215600CDA6BB /* app_events.cpp in Sources */ = {isa = PBXBuildFile; fileRef = AD0E11921ECA20F200CDA6BB /* app_events.cpp */; }; AD0E119D1ECA215600CDA6BB /* app_layout.cpp in Sources */ = {isa = PBXBuildFile; fileRef = AD0E11931ECA20F200CDA6BB /* app_layout.cpp */; }; AD0E119E1ECA215600CDA6BB /* app_shaders.cpp in Sources */ = {isa = PBXBuildFile; fileRef = AD0E11941ECA20F200CDA6BB /* app_shaders.cpp */; }; + AD0E11A11ECA619F00CDA6BB /* jpgd.cpp in Sources */ = {isa = PBXBuildFile; fileRef = AD0E119F1ECA619F00CDA6BB /* jpgd.cpp */; }; + AD0E11A31ECA61B500CDA6BB /* jpge.cpp in Sources */ = {isa = PBXBuildFile; fileRef = AD0E11A21ECA61B500CDA6BB /* jpge.cpp */; }; + AD0E11A41ECA61B900CDA6BB /* jpge.cpp in Sources */ = {isa = PBXBuildFile; fileRef = AD0E11A21ECA61B500CDA6BB /* jpge.cpp */; }; + AD0E11A51ECA61B900CDA6BB /* jpgd.cpp in Sources */ = {isa = PBXBuildFile; fileRef = AD0E119F1ECA619F00CDA6BB /* jpgd.cpp */; }; AD10637F1EC7ADFA002A525F /* node_border.cpp in Sources */ = {isa = PBXBuildFile; fileRef = AD1063511EC7ADFA002A525F /* node_border.cpp */; }; AD1063801EC7ADFA002A525F /* node_button_custom.cpp in Sources */ = {isa = PBXBuildFile; fileRef = AD1063531EC7ADFA002A525F /* node_button_custom.cpp */; }; AD1063811EC7ADFA002A525F /* node_button.cpp in Sources */ = {isa = PBXBuildFile; fileRef = AD1063551EC7ADFA002A525F /* node_button.cpp */; }; @@ -147,6 +151,8 @@ AD0E11941ECA20F200CDA6BB /* app_shaders.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = app_shaders.cpp; sourceTree = ""; }; AD0E11951ECA20F200CDA6BB /* node_scroll.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = node_scroll.cpp; sourceTree = ""; }; AD0E11961ECA20F200CDA6BB /* node_scroll.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = node_scroll.h; sourceTree = ""; }; + AD0E119F1ECA619F00CDA6BB /* jpgd.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = jpgd.cpp; path = libs/jpeg/jpgd.cpp; sourceTree = ""; }; + AD0E11A21ECA61B500CDA6BB /* jpge.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = jpge.cpp; path = libs/jpeg/jpge.cpp; sourceTree = ""; }; AD1063511EC7ADFA002A525F /* node_border.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = node_border.cpp; sourceTree = ""; }; AD1063521EC7ADFA002A525F /* node_border.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = node_border.h; sourceTree = ""; }; AD1063531EC7ADFA002A525F /* node_button_custom.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = node_button_custom.cpp; sourceTree = ""; }; @@ -432,6 +438,8 @@ AD58E0731E3421CB006ACC15 /* libs */ = { isa = PBXGroup; children = ( + AD0E11A21ECA61B500CDA6BB /* jpge.cpp */, + AD0E119F1ECA619F00CDA6BB /* jpgd.cpp */, AD58E0781E342205006ACC15 /* tinyxml2.cpp */, AD58E0741E3421F2006ACC15 /* YGNodeList.c */, AD58E0751E3421F2006ACC15 /* Yoga.c */, @@ -587,12 +595,14 @@ AD4C08DC1E89BD0F0051D85F /* canvas.cpp in Sources */, AD95AEC61E41EDEC002DD03A /* font.cpp in Sources */, AD1063911EC7ADFA002A525F /* node_stroke_preview.cpp in Sources */, + AD0E11A31ECA61B500CDA6BB /* jpge.cpp in Sources */, AD1063811EC7ADFA002A525F /* node_button.cpp in Sources */, AD58E0531E107411006ACC15 /* main.cpp in Sources */, ADB1C3DA1EA3A156009A65BD /* event.cpp in Sources */, AD4C08D91E89BD0F0051D85F /* asset.cpp in Sources */, AD1063851EC7ADFA002A525F /* node_dialog_open.cpp in Sources */, AD58E0681E2A7741006ACC15 /* image.cpp in Sources */, + AD0E11A11ECA619F00CDA6BB /* jpgd.cpp in Sources */, AD1063901EC7ADFA002A525F /* node_slider.cpp in Sources */, AD0E11981ECA20F200CDA6BB /* app_layout.cpp in Sources */, AD58E0771E3421F2006ACC15 /* Yoga.c in Sources */, @@ -613,6 +623,7 @@ files = ( ADD7D2951EBF9E1C00D5A897 /* bezier.cpp in Sources */, AD1063A21EC7AE92002A525F /* node_panel_brush.cpp in Sources */, + AD0E11A51ECA61B900CDA6BB /* jpgd.cpp in Sources */, ADD7D29E1EBF9E1C00D5A897 /* shape.cpp in Sources */, ADD7D2901EBF9E1C00D5A897 /* canvas_modes.cpp in Sources */, AD10639A1EC7AE92002A525F /* node_canvas.cpp in Sources */, @@ -639,6 +650,7 @@ ADD7D2971EBF9E1C00D5A897 /* canvas.cpp in Sources */, AD1063AC1EC7AE92002A525F /* node_viewport.cpp in Sources */, ADD7D2991EBF9E1C00D5A897 /* font.cpp in Sources */, + AD0E11A41ECA61B900CDA6BB /* jpge.cpp in Sources */, AD10639C1EC7AE92002A525F /* node_color_quad.cpp in Sources */, AD1063961EC7AE92002A525F /* node.cpp in Sources */, AD10639D1EC7AE92002A525F /* node_dialog_open.cpp in Sources */, @@ -732,6 +744,7 @@ libs/yoga, libs/tinyxml2, /opt/local/include, + libs/jpeg, ); LIBRARY_SEARCH_PATHS = /opt/local/lib; MACOSX_DEPLOYMENT_TARGET = 10.11; @@ -779,6 +792,7 @@ libs/yoga, libs/tinyxml2, /opt/local/include, + libs/jpeg, ); LIBRARY_SEARCH_PATHS = /opt/local/lib; MACOSX_DEPLOYMENT_TARGET = 10.11; diff --git a/engine/app.h b/engine/app.h index 6faf38c..1734b10 100644 --- a/engine/app.h +++ b/engine/app.h @@ -61,8 +61,8 @@ public: void update_memory_usage(size_t bytes); void update(float dt); void resize(float w, float h); - bool mouse_down(int button, float x, float y); - bool mouse_move(float x, float y); + bool mouse_down(int button, float x, float y, float pressure); + bool mouse_move(float x, float y, float pressure); bool mouse_up(int button, float x, float y); bool mouse_scroll(float x, float y, float delta); bool mouse_cancel(int button); diff --git a/engine/app_events.cpp b/engine/app_events.cpp index 012853e..ce4dfb6 100644 --- a/engine/app_events.cpp +++ b/engine/app_events.cpp @@ -11,20 +11,22 @@ void App::resize(float w, float h) main->update(w , h, zoom); } -bool App::mouse_down(int button, float x, float y) +bool App::mouse_down(int button, float x, float y, float pressure) { MouseEvent e; e.m_type = button ? kEventType::MouseDownR : kEventType::MouseDownL; e.m_pos = { x / zoom, y / zoom }; + e.m_pressure = pressure; auto ret = layout[main_id]->on_event(&e); layout[main_id]->update(); return ret == kEventResult::Consumed; } -bool App::mouse_move(float x, float y) +bool App::mouse_move(float x, float y, float pressure) { MouseEvent e; e.m_type = kEventType::MouseMove; e.m_pos = { x / zoom, y / zoom }; + e.m_pressure = pressure; kEventResult ret = kEventResult::Available; if (auto* main = layout[main_id]) ret = main->on_event(&e); diff --git a/engine/brush.cpp b/engine/brush.cpp index a875b82..2b3e880 100644 --- a/engine/brush.cpp +++ b/engine/brush.cpp @@ -165,7 +165,7 @@ ui::StrokeSample ui::Stroke::randomize_sample(const glm::vec2& pos, float pressu s.angle = (m_brush.m_tip_angle + rnd_nor() * m_brush.m_jitter_angle) * (float)(M_PI * 2.0); s.pos = pos + (rnd_vec() * m_brush.m_jitter_spread * 100.f); s.size = 100.f * m_brush.m_tip_size * (1.f - rnd_nor() * m_brush.m_jitter_scale); - s.flow = m_brush.m_tip_flow * (1.f - rnd_nor() * m_brush.m_jitter_flow); + s.flow = m_brush.m_tip_flow * (1.f - rnd_nor() * m_brush.m_jitter_flow) * pressure; return s; } std::vector ui::Stroke::compute_samples() diff --git a/engine/canvas.cpp b/engine/canvas.cpp index 606c584..4da8871 100644 --- a/engine/canvas.cpp +++ b/engine/canvas.cpp @@ -599,9 +599,12 @@ void ui::Canvas::export_equirectangular(std::string data_path) auto latlong_data = std::make_unique(m_latlong.bytes()); m_latlong.readTextureData(latlong_data.get()); static char name[128]; - sprintf(name, "%s/latlong.png", data_path.c_str()); + sprintf(name, "%s/latlong.jpg", data_path.c_str()); LOG("writing %s", name); - int ret = stbi_write_png(name, m_latlong.getWidth(), m_latlong.getHeight(), 4, latlong_data.get(), m_latlong.stride()); + jpge::params params; + params.m_quality = 100; + bool saved = jpge::compress_image_to_jpeg_file(name, m_latlong.getWidth(), m_latlong.getHeight(), 4, latlong_data.get(), params); + //int ret = stbi_write_png(name, m_latlong.getWidth(), m_latlong.getHeight(), 4, latlong_data.get(), m_latlong.stride()); } glDeleteTextures(1, &cube_id); @@ -626,7 +629,6 @@ void ui::Canvas::export_anim(std::string data_path) // prepare common states glViewport(0, 0, m_width, m_height); glDisable(GL_BLEND); - glActiveTexture(GL_TEXTURE0); RTT m_latlong; m_latlong.create(m_width * 4, m_height * 2); // NOTE: w and h must be equal to make sense @@ -646,19 +648,58 @@ void ui::Canvas::export_anim(std::string data_path) }; for (auto layer_index : m_order) { + glViewport(0, 0, m_width, m_height); for (int i = 0; i < 6; i++) { - m_layers[layer_index].m_rtt[i].bindFramebuffer(); + m_tmp[i].bindFramebuffer(); + m_tmp[i].clear({ 1, 1, 1, 1 }); + + ui::ShaderManager::use(kShader::Checkerboard); + ui::ShaderManager::u_mat4(kShaderUniform::MVP, glm::ortho(-.5f, .5f, -.5f, .5f, -1.f, 1.f)); + m_plane.draw_fill(); + + // copy to tmp2 for layer blending + glActiveTexture(GL_TEXTURE0); // TODO: maybe remove this line + m_tex2[i].bind(); + glCopyTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, 0, 0, m_width, m_height); + m_tex2[i].unbind(); + + m_layers[layer_index].m_rtt[i].bindTexture(); + glActiveTexture(GL_TEXTURE1); + m_tex2[i].bind(); + m_sampler.bind(0); + m_sampler_bg.bind(1); + ShaderManager::use(ui::kShader::StrokeLayer); + ShaderManager::u_int(kShaderUniform::TexBG, 1); + ShaderManager::u_float(kShaderUniform::Alpha, 1); + ShaderManager::u_int(kShaderUniform::Tex, 0); + ShaderManager::u_mat4(kShaderUniform::MVP, glm::ortho(-.5f, .5f, -.5f, .5f, -1.f, 1.f)); + m_plane.draw_fill(); + m_sampler.unbind(); + m_sampler_bg.unbind(); + m_tex2[i].unbind(); + glActiveTexture(GL_TEXTURE0); + m_layers[layer_index].m_rtt[i].unbindTexture(); + // copy result to cubemap glBindTexture(GL_TEXTURE_CUBE_MAP, cube_id); glCopyTexImage2D(faces[i], 0, GL_RGBA8, 0, 0, m_width, m_height, 0); glBindTexture(GL_TEXTURE_CUBE_MAP, 0); - m_layers[layer_index].m_rtt[i].unbindFramebuffer(); + + m_tmp[i].unbindFramebuffer(); + +// m_layers[layer_index].m_rtt[i].bindFramebuffer(); +// // copy result to cubemap +// glBindTexture(GL_TEXTURE_CUBE_MAP, cube_id); +// glCopyTexImage2D(faces[i], 0, GL_RGBA8, 0, 0, m_width, m_height, 0); +// glBindTexture(GL_TEXTURE_CUBE_MAP, 0); +// m_layers[layer_index].m_rtt[i].unbindFramebuffer(); } glViewport(0, 0, m_latlong.getWidth(), m_latlong.getHeight()); - m_latlong.clear({ 1, 1, 1, 1 }); + glActiveTexture(GL_TEXTURE0); m_latlong.bindFramebuffer(); + m_latlong.clear({ 1, 1, 1, 1 }); ui::ShaderManager::use(kShader::Equirect); ui::ShaderManager::u_mat4(kShaderUniform::MVP, glm::ortho(-.5f, .5f, -.5f, .5f, -1.f, 1.f)); ui::ShaderManager::u_int(kShaderUniform::Tex, 0); @@ -672,9 +713,12 @@ void ui::Canvas::export_anim(std::string data_path) auto latlong_data = std::make_unique(m_latlong.bytes()); m_latlong.readTextureData(latlong_data.get()); static char name[128]; - sprintf(name, "%s/latlong-frame%02d.png", data_path.c_str(), layer_index); + sprintf(name, "%s/latlong-frame%02d.jpg", data_path.c_str(), layer_index); LOG("writing %s", name); - int ret = stbi_write_png(name, m_latlong.getWidth(), m_latlong.getHeight(), 4, latlong_data.get(), m_latlong.stride()); + //int ret = stbi_write_png(name, m_latlong.getWidth(), m_latlong.getHeight(), 4, latlong_data.get(), m_latlong.stride()); + jpge::params params; + params.m_quality = 100; + bool saved = jpge::compress_image_to_jpeg_file(name, m_latlong.getWidth(), m_latlong.getHeight(), 4, latlong_data.get(), params); } } diff --git a/engine/canvas_modes.cpp b/engine/canvas_modes.cpp index d12c656..5b79e74 100644 --- a/engine/canvas_modes.cpp +++ b/engine/canvas_modes.cpp @@ -71,7 +71,7 @@ void CanvasModePen::on_MouseEvent(MouseEvent* me, glm::vec2& loc) switch (me->m_type) { case kEventType::MouseDownL: - canvas->stroke_start(loc, 1.f, node->m_brush); + canvas->stroke_start(loc, me->m_pressure, node->m_brush); m_dragging = true; node->mouse_capture(); break; @@ -82,7 +82,7 @@ void CanvasModePen::on_MouseEvent(MouseEvent* me, glm::vec2& loc) break; case kEventType::MouseMove: if (m_dragging) - canvas->stroke_update(loc, 1.f); + canvas->stroke_update(loc, me->m_pressure); break; case kEventType::MouseCancel: canvas->stroke_cancel(); diff --git a/engine/event.h b/engine/event.h index 2cabda6..77293c8 100644 --- a/engine/event.h +++ b/engine/event.h @@ -58,6 +58,7 @@ class MouseEvent : public Event public: MouseEvent() { m_cat = kEventCategory::MouseEvent; } glm::vec2 m_pos; + float m_pressure; float m_scroll_delta; }; diff --git a/engine/main.cpp b/engine/main.cpp index e7a1c46..0818441 100644 --- a/engine/main.cpp +++ b/engine/main.cpp @@ -202,14 +202,14 @@ static CVReturn MyDisplayLinkCallback(CVDisplayLinkRef displayLink, const CVTime { CGLLockContext([[self openGLContext] CGLContextObj]); auto mouseLoc = [self convertPoint:[theEvent locationInWindow] fromView:nil]; - App::I.mouse_down(0, mouseLoc.x, App::I.height - mouseLoc.y - 1); + App::I.mouse_down(0, mouseLoc.x, App::I.height - mouseLoc.y - 1, theEvent.pressure); CGLUnlockContext([[self openGLContext] CGLContextObj]); } - (void)rightMouseDown:(NSEvent *)theEvent { CGLLockContext([[self openGLContext] CGLContextObj]); auto mouseLoc = [self convertPoint:[theEvent locationInWindow] fromView:nil]; - App::I.mouse_down(1, mouseLoc.x, App::I.height - mouseLoc.y - 1); + App::I.mouse_down(1, mouseLoc.x, App::I.height - mouseLoc.y - 1, theEvent.pressure); CGLUnlockContext([[self openGLContext] CGLContextObj]); } - (void)mouseUp:(NSEvent *)theEvent @@ -230,21 +230,21 @@ static CVReturn MyDisplayLinkCallback(CVDisplayLinkRef displayLink, const CVTime { CGLLockContext([[self openGLContext] CGLContextObj]); auto mouseLoc = [self convertPoint:[theEvent locationInWindow] fromView:nil]; - App::I.mouse_move(mouseLoc.x, App::I.height - mouseLoc.y - 1); + App::I.mouse_move(mouseLoc.x, App::I.height - mouseLoc.y - 1, theEvent.pressure); CGLUnlockContext([[self openGLContext] CGLContextObj]); } -(void)mouseDragged:(NSEvent *)theEvent { CGLLockContext([[self openGLContext] CGLContextObj]); auto mouseLoc = [self convertPoint:[theEvent locationInWindow] fromView:nil]; - App::I.mouse_move(mouseLoc.x, App::I.height - mouseLoc.y - 1); + App::I.mouse_move(mouseLoc.x, App::I.height - mouseLoc.y - 1, theEvent.pressure); CGLUnlockContext([[self openGLContext] CGLContextObj]); } - (void)rightMouseDragged:(NSEvent *)theEvent { CGLLockContext([[self openGLContext] CGLContextObj]); auto mouseLoc = [self convertPoint:[theEvent locationInWindow] fromView:nil]; - App::I.mouse_move(mouseLoc.x, App::I.height - mouseLoc.y - 1); + App::I.mouse_move(mouseLoc.x, App::I.height - mouseLoc.y - 1, theEvent.pressure); CGLUnlockContext([[self openGLContext] CGLContextObj]); } - (void)scrollWheel:(NSEvent *)theEvent @@ -269,14 +269,6 @@ static CVReturn MyDisplayLinkCallback(CVDisplayLinkRef displayLink, const CVTime auto chars = [theEvent characters]; App::I.key_up(convert_key(keyCode)); CGLUnlockContext([[self openGLContext] CGLContextObj]); -} -- (void)tabletPoint:(NSEvent *)theEvent -{ - -} -- (void)tabletProximity:(NSEvent *)theEvent -{ - } @end diff --git a/engine/pch.h b/engine/pch.h index eebac77..b2597bf 100644 --- a/engine/pch.h +++ b/engine/pch.h @@ -84,6 +84,8 @@ #include #include +#include +#include #endif #include diff --git a/libs/jpeg/jpgd.cpp b/libs/jpeg/jpgd.cpp new file mode 100644 index 0000000..eb4fcfb --- /dev/null +++ b/libs/jpeg/jpgd.cpp @@ -0,0 +1,3172 @@ +// jpgd.cpp - C++ class for JPEG decompression. +// Public domain, Rich Geldreich +// Alex Evans: Linear memory allocator (taken from jpge.h). +// v1.04, May. 19, 2012: Code tweaks to fix VS2008 static code analysis warnings (all looked harmless) +// +// Supports progressive and baseline sequential JPEG image files, and the most common chroma subsampling factors: Y, H1V1, H2V1, H1V2, and H2V2. +// +// Chroma upsampling quality: H2V2 is upsampled in the frequency domain, H2V1 and H1V2 are upsampled using point sampling. +// Chroma upsampling reference: "Fast Scheme for Image Size Change in the Compressed Domain" +// http://vision.ai.uiuc.edu/~dugad/research/dct/index.html + +#include "jpgd.h" +#include + +#include +#define JPGD_ASSERT(x) assert(x) + +#ifdef _MSC_VER +#pragma warning (disable : 4611) // warning C4611: interaction between '_setjmp' and C++ object destruction is non-portable +#endif + +// Set to 1 to enable freq. domain chroma upsampling on images using H2V2 subsampling (0=faster nearest neighbor sampling). +// This is slower, but results in higher quality on images with highly saturated colors. +#define JPGD_SUPPORT_FREQ_DOMAIN_UPSAMPLING 1 + +#define JPGD_TRUE (1) +#define JPGD_FALSE (0) + +#define JPGD_MAX(a,b) (((a)>(b)) ? (a) : (b)) +#define JPGD_MIN(a,b) (((a)<(b)) ? (a) : (b)) + +namespace jpgd { + +static inline void *jpgd_malloc(size_t nSize) { return malloc(nSize); } +static inline void jpgd_free(void *p) { free(p); } + +// DCT coefficients are stored in this sequence. +static int g_ZAG[64] = { 0,1,8,16,9,2,3,10,17,24,32,25,18,11,4,5,12,19,26,33,40,48,41,34,27,20,13,6,7,14,21,28,35,42,49,56,57,50,43,36,29,22,15,23,30,37,44,51,58,59,52,45,38,31,39,46,53,60,61,54,47,55,62,63 }; + +enum JPEG_MARKER +{ + M_SOF0 = 0xC0, M_SOF1 = 0xC1, M_SOF2 = 0xC2, M_SOF3 = 0xC3, M_SOF5 = 0xC5, M_SOF6 = 0xC6, M_SOF7 = 0xC7, M_JPG = 0xC8, + M_SOF9 = 0xC9, M_SOF10 = 0xCA, M_SOF11 = 0xCB, M_SOF13 = 0xCD, M_SOF14 = 0xCE, M_SOF15 = 0xCF, M_DHT = 0xC4, M_DAC = 0xCC, + M_RST0 = 0xD0, M_RST1 = 0xD1, M_RST2 = 0xD2, M_RST3 = 0xD3, M_RST4 = 0xD4, M_RST5 = 0xD5, M_RST6 = 0xD6, M_RST7 = 0xD7, + M_SOI = 0xD8, M_EOI = 0xD9, M_SOS = 0xDA, M_DQT = 0xDB, M_DNL = 0xDC, M_DRI = 0xDD, M_DHP = 0xDE, M_EXP = 0xDF, + M_APP0 = 0xE0, M_APP15 = 0xEF, M_JPG0 = 0xF0, M_JPG13 = 0xFD, M_COM = 0xFE, M_TEM = 0x01, M_ERROR = 0x100, RST0 = 0xD0 +}; + +enum JPEG_SUBSAMPLING { JPGD_GRAYSCALE = 0, JPGD_YH1V1, JPGD_YH2V1, JPGD_YH1V2, JPGD_YH2V2 }; + +#define CONST_BITS 13 +#define PASS1_BITS 2 +#define SCALEDONE ((int32)1) + +#define FIX_0_298631336 ((int32)2446) /* FIX(0.298631336) */ +#define FIX_0_390180644 ((int32)3196) /* FIX(0.390180644) */ +#define FIX_0_541196100 ((int32)4433) /* FIX(0.541196100) */ +#define FIX_0_765366865 ((int32)6270) /* FIX(0.765366865) */ +#define FIX_0_899976223 ((int32)7373) /* FIX(0.899976223) */ +#define FIX_1_175875602 ((int32)9633) /* FIX(1.175875602) */ +#define FIX_1_501321110 ((int32)12299) /* FIX(1.501321110) */ +#define FIX_1_847759065 ((int32)15137) /* FIX(1.847759065) */ +#define FIX_1_961570560 ((int32)16069) /* FIX(1.961570560) */ +#define FIX_2_053119869 ((int32)16819) /* FIX(2.053119869) */ +#define FIX_2_562915447 ((int32)20995) /* FIX(2.562915447) */ +#define FIX_3_072711026 ((int32)25172) /* FIX(3.072711026) */ + +#define DESCALE(x,n) (((x) + (SCALEDONE << ((n)-1))) >> (n)) +#define DESCALE_ZEROSHIFT(x,n) (((x) + (128 << (n)) + (SCALEDONE << ((n)-1))) >> (n)) + +#define MULTIPLY(var, cnst) ((var) * (cnst)) + +#define CLAMP(i) ((static_cast(i) > 255) ? (((~i) >> 31) & 0xFF) : (i)) + +// Compiler creates a fast path 1D IDCT for X non-zero columns +template +struct Row +{ + static void idct(int* pTemp, const jpgd_block_t* pSrc) + { + // ACCESS_COL() will be optimized at compile time to either an array access, or 0. + #define ACCESS_COL(x) (((x) < NONZERO_COLS) ? (int)pSrc[x] : 0) + + const int z2 = ACCESS_COL(2), z3 = ACCESS_COL(6); + + const int z1 = MULTIPLY(z2 + z3, FIX_0_541196100); + const int tmp2 = z1 + MULTIPLY(z3, - FIX_1_847759065); + const int tmp3 = z1 + MULTIPLY(z2, FIX_0_765366865); + + const int tmp0 = (ACCESS_COL(0) + ACCESS_COL(4)) << CONST_BITS; + const int tmp1 = (ACCESS_COL(0) - ACCESS_COL(4)) << CONST_BITS; + + const int tmp10 = tmp0 + tmp3, tmp13 = tmp0 - tmp3, tmp11 = tmp1 + tmp2, tmp12 = tmp1 - tmp2; + + const int atmp0 = ACCESS_COL(7), atmp1 = ACCESS_COL(5), atmp2 = ACCESS_COL(3), atmp3 = ACCESS_COL(1); + + const int bz1 = atmp0 + atmp3, bz2 = atmp1 + atmp2, bz3 = atmp0 + atmp2, bz4 = atmp1 + atmp3; + const int bz5 = MULTIPLY(bz3 + bz4, FIX_1_175875602); + + const int az1 = MULTIPLY(bz1, - FIX_0_899976223); + const int az2 = MULTIPLY(bz2, - FIX_2_562915447); + const int az3 = MULTIPLY(bz3, - FIX_1_961570560) + bz5; + const int az4 = MULTIPLY(bz4, - FIX_0_390180644) + bz5; + + const int btmp0 = MULTIPLY(atmp0, FIX_0_298631336) + az1 + az3; + const int btmp1 = MULTIPLY(atmp1, FIX_2_053119869) + az2 + az4; + const int btmp2 = MULTIPLY(atmp2, FIX_3_072711026) + az2 + az3; + const int btmp3 = MULTIPLY(atmp3, FIX_1_501321110) + az1 + az4; + + pTemp[0] = DESCALE(tmp10 + btmp3, CONST_BITS-PASS1_BITS); + pTemp[7] = DESCALE(tmp10 - btmp3, CONST_BITS-PASS1_BITS); + pTemp[1] = DESCALE(tmp11 + btmp2, CONST_BITS-PASS1_BITS); + pTemp[6] = DESCALE(tmp11 - btmp2, CONST_BITS-PASS1_BITS); + pTemp[2] = DESCALE(tmp12 + btmp1, CONST_BITS-PASS1_BITS); + pTemp[5] = DESCALE(tmp12 - btmp1, CONST_BITS-PASS1_BITS); + pTemp[3] = DESCALE(tmp13 + btmp0, CONST_BITS-PASS1_BITS); + pTemp[4] = DESCALE(tmp13 - btmp0, CONST_BITS-PASS1_BITS); + } +}; + +template <> +struct Row<0> +{ + static void idct(int* pTemp, const jpgd_block_t* pSrc) + { +#ifdef _MSC_VER + pTemp; pSrc; +#endif + } +}; + +template <> +struct Row<1> +{ + static void idct(int* pTemp, const jpgd_block_t* pSrc) + { + const int dcval = (pSrc[0] << PASS1_BITS); + + pTemp[0] = dcval; + pTemp[1] = dcval; + pTemp[2] = dcval; + pTemp[3] = dcval; + pTemp[4] = dcval; + pTemp[5] = dcval; + pTemp[6] = dcval; + pTemp[7] = dcval; + } +}; + +// Compiler creates a fast path 1D IDCT for X non-zero rows +template +struct Col +{ + static void idct(uint8* pDst_ptr, const int* pTemp) + { + // ACCESS_ROW() will be optimized at compile time to either an array access, or 0. + #define ACCESS_ROW(x) (((x) < NONZERO_ROWS) ? pTemp[x * 8] : 0) + + const int z2 = ACCESS_ROW(2); + const int z3 = ACCESS_ROW(6); + + const int z1 = MULTIPLY(z2 + z3, FIX_0_541196100); + const int tmp2 = z1 + MULTIPLY(z3, - FIX_1_847759065); + const int tmp3 = z1 + MULTIPLY(z2, FIX_0_765366865); + + const int tmp0 = (ACCESS_ROW(0) + ACCESS_ROW(4)) << CONST_BITS; + const int tmp1 = (ACCESS_ROW(0) - ACCESS_ROW(4)) << CONST_BITS; + + const int tmp10 = tmp0 + tmp3, tmp13 = tmp0 - tmp3, tmp11 = tmp1 + tmp2, tmp12 = tmp1 - tmp2; + + const int atmp0 = ACCESS_ROW(7), atmp1 = ACCESS_ROW(5), atmp2 = ACCESS_ROW(3), atmp3 = ACCESS_ROW(1); + + const int bz1 = atmp0 + atmp3, bz2 = atmp1 + atmp2, bz3 = atmp0 + atmp2, bz4 = atmp1 + atmp3; + const int bz5 = MULTIPLY(bz3 + bz4, FIX_1_175875602); + + const int az1 = MULTIPLY(bz1, - FIX_0_899976223); + const int az2 = MULTIPLY(bz2, - FIX_2_562915447); + const int az3 = MULTIPLY(bz3, - FIX_1_961570560) + bz5; + const int az4 = MULTIPLY(bz4, - FIX_0_390180644) + bz5; + + const int btmp0 = MULTIPLY(atmp0, FIX_0_298631336) + az1 + az3; + const int btmp1 = MULTIPLY(atmp1, FIX_2_053119869) + az2 + az4; + const int btmp2 = MULTIPLY(atmp2, FIX_3_072711026) + az2 + az3; + const int btmp3 = MULTIPLY(atmp3, FIX_1_501321110) + az1 + az4; + + int i = DESCALE_ZEROSHIFT(tmp10 + btmp3, CONST_BITS+PASS1_BITS+3); + pDst_ptr[8*0] = (uint8)CLAMP(i); + + i = DESCALE_ZEROSHIFT(tmp10 - btmp3, CONST_BITS+PASS1_BITS+3); + pDst_ptr[8*7] = (uint8)CLAMP(i); + + i = DESCALE_ZEROSHIFT(tmp11 + btmp2, CONST_BITS+PASS1_BITS+3); + pDst_ptr[8*1] = (uint8)CLAMP(i); + + i = DESCALE_ZEROSHIFT(tmp11 - btmp2, CONST_BITS+PASS1_BITS+3); + pDst_ptr[8*6] = (uint8)CLAMP(i); + + i = DESCALE_ZEROSHIFT(tmp12 + btmp1, CONST_BITS+PASS1_BITS+3); + pDst_ptr[8*2] = (uint8)CLAMP(i); + + i = DESCALE_ZEROSHIFT(tmp12 - btmp1, CONST_BITS+PASS1_BITS+3); + pDst_ptr[8*5] = (uint8)CLAMP(i); + + i = DESCALE_ZEROSHIFT(tmp13 + btmp0, CONST_BITS+PASS1_BITS+3); + pDst_ptr[8*3] = (uint8)CLAMP(i); + + i = DESCALE_ZEROSHIFT(tmp13 - btmp0, CONST_BITS+PASS1_BITS+3); + pDst_ptr[8*4] = (uint8)CLAMP(i); + } +}; + +template <> +struct Col<1> +{ + static void idct(uint8* pDst_ptr, const int* pTemp) + { + int dcval = DESCALE_ZEROSHIFT(pTemp[0], PASS1_BITS+3); + const uint8 dcval_clamped = (uint8)CLAMP(dcval); + pDst_ptr[0*8] = dcval_clamped; + pDst_ptr[1*8] = dcval_clamped; + pDst_ptr[2*8] = dcval_clamped; + pDst_ptr[3*8] = dcval_clamped; + pDst_ptr[4*8] = dcval_clamped; + pDst_ptr[5*8] = dcval_clamped; + pDst_ptr[6*8] = dcval_clamped; + pDst_ptr[7*8] = dcval_clamped; + } +}; + +static const uint8 s_idct_row_table[] = +{ + 1,0,0,0,0,0,0,0, 2,0,0,0,0,0,0,0, 2,1,0,0,0,0,0,0, 2,1,1,0,0,0,0,0, 2,2,1,0,0,0,0,0, 3,2,1,0,0,0,0,0, 4,2,1,0,0,0,0,0, 4,3,1,0,0,0,0,0, + 4,3,2,0,0,0,0,0, 4,3,2,1,0,0,0,0, 4,3,2,1,1,0,0,0, 4,3,2,2,1,0,0,0, 4,3,3,2,1,0,0,0, 4,4,3,2,1,0,0,0, 5,4,3,2,1,0,0,0, 6,4,3,2,1,0,0,0, + 6,5,3,2,1,0,0,0, 6,5,4,2,1,0,0,0, 6,5,4,3,1,0,0,0, 6,5,4,3,2,0,0,0, 6,5,4,3,2,1,0,0, 6,5,4,3,2,1,1,0, 6,5,4,3,2,2,1,0, 6,5,4,3,3,2,1,0, + 6,5,4,4,3,2,1,0, 6,5,5,4,3,2,1,0, 6,6,5,4,3,2,1,0, 7,6,5,4,3,2,1,0, 8,6,5,4,3,2,1,0, 8,7,5,4,3,2,1,0, 8,7,6,4,3,2,1,0, 8,7,6,5,3,2,1,0, + 8,7,6,5,4,2,1,0, 8,7,6,5,4,3,1,0, 8,7,6,5,4,3,2,0, 8,7,6,5,4,3,2,1, 8,7,6,5,4,3,2,2, 8,7,6,5,4,3,3,2, 8,7,6,5,4,4,3,2, 8,7,6,5,5,4,3,2, + 8,7,6,6,5,4,3,2, 8,7,7,6,5,4,3,2, 8,8,7,6,5,4,3,2, 8,8,8,6,5,4,3,2, 8,8,8,7,5,4,3,2, 8,8,8,7,6,4,3,2, 8,8,8,7,6,5,3,2, 8,8,8,7,6,5,4,2, + 8,8,8,7,6,5,4,3, 8,8,8,7,6,5,4,4, 8,8,8,7,6,5,5,4, 8,8,8,7,6,6,5,4, 8,8,8,7,7,6,5,4, 8,8,8,8,7,6,5,4, 8,8,8,8,8,6,5,4, 8,8,8,8,8,7,5,4, + 8,8,8,8,8,7,6,4, 8,8,8,8,8,7,6,5, 8,8,8,8,8,7,6,6, 8,8,8,8,8,7,7,6, 8,8,8,8,8,8,7,6, 8,8,8,8,8,8,8,6, 8,8,8,8,8,8,8,7, 8,8,8,8,8,8,8,8, +}; + +static const uint8 s_idct_col_table[] = { 1, 1, 2, 3, 3, 3, 3, 3, 3, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8 }; + +void idct(const jpgd_block_t* pSrc_ptr, uint8* pDst_ptr, int block_max_zag) +{ + JPGD_ASSERT(block_max_zag >= 1); + JPGD_ASSERT(block_max_zag <= 64); + + if (block_max_zag <= 1) + { + int k = ((pSrc_ptr[0] + 4) >> 3) + 128; + k = CLAMP(k); + k = k | (k<<8); + k = k | (k<<16); + + for (int i = 8; i > 0; i--) + { + *(int*)&pDst_ptr[0] = k; + *(int*)&pDst_ptr[4] = k; + pDst_ptr += 8; + } + return; + } + + int temp[64]; + + const jpgd_block_t* pSrc = pSrc_ptr; + int* pTemp = temp; + + const uint8* pRow_tab = &s_idct_row_table[(block_max_zag - 1) * 8]; + int i; + for (i = 8; i > 0; i--, pRow_tab++) + { + switch (*pRow_tab) + { + case 0: Row<0>::idct(pTemp, pSrc); break; + case 1: Row<1>::idct(pTemp, pSrc); break; + case 2: Row<2>::idct(pTemp, pSrc); break; + case 3: Row<3>::idct(pTemp, pSrc); break; + case 4: Row<4>::idct(pTemp, pSrc); break; + case 5: Row<5>::idct(pTemp, pSrc); break; + case 6: Row<6>::idct(pTemp, pSrc); break; + case 7: Row<7>::idct(pTemp, pSrc); break; + case 8: Row<8>::idct(pTemp, pSrc); break; + } + + pSrc += 8; + pTemp += 8; + } + + pTemp = temp; + + const int nonzero_rows = s_idct_col_table[block_max_zag - 1]; + for (i = 8; i > 0; i--) + { + switch (nonzero_rows) + { + case 1: Col<1>::idct(pDst_ptr, pTemp); break; + case 2: Col<2>::idct(pDst_ptr, pTemp); break; + case 3: Col<3>::idct(pDst_ptr, pTemp); break; + case 4: Col<4>::idct(pDst_ptr, pTemp); break; + case 5: Col<5>::idct(pDst_ptr, pTemp); break; + case 6: Col<6>::idct(pDst_ptr, pTemp); break; + case 7: Col<7>::idct(pDst_ptr, pTemp); break; + case 8: Col<8>::idct(pDst_ptr, pTemp); break; + } + + pTemp++; + pDst_ptr++; + } +} + +void idct_4x4(const jpgd_block_t* pSrc_ptr, uint8* pDst_ptr) +{ + int temp[64]; + int* pTemp = temp; + const jpgd_block_t* pSrc = pSrc_ptr; + + for (int i = 4; i > 0; i--) + { + Row<4>::idct(pTemp, pSrc); + pSrc += 8; + pTemp += 8; + } + + pTemp = temp; + for (int i = 8; i > 0; i--) + { + Col<4>::idct(pDst_ptr, pTemp); + pTemp++; + pDst_ptr++; + } +} + +// Retrieve one character from the input stream. +inline uint jpeg_decoder::get_char() +{ + // Any bytes remaining in buffer? + if (!m_in_buf_left) + { + // Try to get more bytes. + prep_in_buffer(); + // Still nothing to get? + if (!m_in_buf_left) + { + // Pad the end of the stream with 0xFF 0xD9 (EOI marker) + int t = m_tem_flag; + m_tem_flag ^= 1; + if (t) + return 0xD9; + else + return 0xFF; + } + } + + uint c = *m_pIn_buf_ofs++; + m_in_buf_left--; + + return c; +} + +// Same as previous method, except can indicate if the character is a pad character or not. +inline uint jpeg_decoder::get_char(bool *pPadding_flag) +{ + if (!m_in_buf_left) + { + prep_in_buffer(); + if (!m_in_buf_left) + { + *pPadding_flag = true; + int t = m_tem_flag; + m_tem_flag ^= 1; + if (t) + return 0xD9; + else + return 0xFF; + } + } + + *pPadding_flag = false; + + uint c = *m_pIn_buf_ofs++; + m_in_buf_left--; + + return c; +} + +// Inserts a previously retrieved character back into the input buffer. +inline void jpeg_decoder::stuff_char(uint8 q) +{ + *(--m_pIn_buf_ofs) = q; + m_in_buf_left++; +} + +// Retrieves one character from the input stream, but does not read past markers. Will continue to return 0xFF when a marker is encountered. +inline uint8 jpeg_decoder::get_octet() +{ + bool padding_flag; + int c = get_char(&padding_flag); + + if (c == 0xFF) + { + if (padding_flag) + return 0xFF; + + c = get_char(&padding_flag); + if (padding_flag) + { + stuff_char(0xFF); + return 0xFF; + } + + if (c == 0x00) + return 0xFF; + else + { + stuff_char(static_cast(c)); + stuff_char(0xFF); + return 0xFF; + } + } + + return static_cast(c); +} + +// Retrieves a variable number of bits from the input stream. Does not recognize markers. +inline uint jpeg_decoder::get_bits(int num_bits) +{ + if (!num_bits) + return 0; + + uint i = m_bit_buf >> (32 - num_bits); + + if ((m_bits_left -= num_bits) <= 0) + { + m_bit_buf <<= (num_bits += m_bits_left); + + uint c1 = get_char(); + uint c2 = get_char(); + m_bit_buf = (m_bit_buf & 0xFFFF0000) | (c1 << 8) | c2; + + m_bit_buf <<= -m_bits_left; + + m_bits_left += 16; + + JPGD_ASSERT(m_bits_left >= 0); + } + else + m_bit_buf <<= num_bits; + + return i; +} + +// Retrieves a variable number of bits from the input stream. Markers will not be read into the input bit buffer. Instead, an infinite number of all 1's will be returned when a marker is encountered. +inline uint jpeg_decoder::get_bits_no_markers(int num_bits) +{ + if (!num_bits) + return 0; + + uint i = m_bit_buf >> (32 - num_bits); + + if ((m_bits_left -= num_bits) <= 0) + { + m_bit_buf <<= (num_bits += m_bits_left); + + if ((m_in_buf_left < 2) || (m_pIn_buf_ofs[0] == 0xFF) || (m_pIn_buf_ofs[1] == 0xFF)) + { + uint c1 = get_octet(); + uint c2 = get_octet(); + m_bit_buf |= (c1 << 8) | c2; + } + else + { + m_bit_buf |= ((uint)m_pIn_buf_ofs[0] << 8) | m_pIn_buf_ofs[1]; + m_in_buf_left -= 2; + m_pIn_buf_ofs += 2; + } + + m_bit_buf <<= -m_bits_left; + + m_bits_left += 16; + + JPGD_ASSERT(m_bits_left >= 0); + } + else + m_bit_buf <<= num_bits; + + return i; +} + +// Decodes a Huffman encoded symbol. +inline int jpeg_decoder::huff_decode(huff_tables *pH) +{ + int symbol; + + // Check first 8-bits: do we have a complete symbol? + if ((symbol = pH->look_up[m_bit_buf >> 24]) < 0) + { + // Decode more bits, use a tree traversal to find symbol. + int ofs = 23; + do + { + symbol = pH->tree[-(int)(symbol + ((m_bit_buf >> ofs) & 1))]; + ofs--; + } while (symbol < 0); + + get_bits_no_markers(8 + (23 - ofs)); + } + else + get_bits_no_markers(pH->code_size[symbol]); + + return symbol; +} + +// Decodes a Huffman encoded symbol. +inline int jpeg_decoder::huff_decode(huff_tables *pH, int& extra_bits) +{ + int symbol; + + // Check first 8-bits: do we have a complete symbol? + if ((symbol = pH->look_up2[m_bit_buf >> 24]) < 0) + { + // Use a tree traversal to find symbol. + int ofs = 23; + do + { + symbol = pH->tree[-(int)(symbol + ((m_bit_buf >> ofs) & 1))]; + ofs--; + } while (symbol < 0); + + get_bits_no_markers(8 + (23 - ofs)); + + extra_bits = get_bits_no_markers(symbol & 0xF); + } + else + { + JPGD_ASSERT(((symbol >> 8) & 31) == pH->code_size[symbol & 255] + ((symbol & 0x8000) ? (symbol & 15) : 0)); + + if (symbol & 0x8000) + { + get_bits_no_markers((symbol >> 8) & 31); + extra_bits = symbol >> 16; + } + else + { + int code_size = (symbol >> 8) & 31; + int num_extra_bits = symbol & 0xF; + int bits = code_size + num_extra_bits; + if (bits <= (m_bits_left + 16)) + extra_bits = get_bits_no_markers(bits) & ((1 << num_extra_bits) - 1); + else + { + get_bits_no_markers(code_size); + extra_bits = get_bits_no_markers(num_extra_bits); + } + } + + symbol &= 0xFF; + } + + return symbol; +} + +// Tables and macro used to fully decode the DPCM differences. +static const int s_extend_test[16] = { 0, 0x0001, 0x0002, 0x0004, 0x0008, 0x0010, 0x0020, 0x0040, 0x0080, 0x0100, 0x0200, 0x0400, 0x0800, 0x1000, 0x2000, 0x4000 }; +static const int s_extend_offset[16] = { 0, ((-1)<<1) + 1, ((-1)<<2) + 1, ((-1)<<3) + 1, ((-1)<<4) + 1, ((-1)<<5) + 1, ((-1)<<6) + 1, ((-1)<<7) + 1, ((-1)<<8) + 1, ((-1)<<9) + 1, ((-1)<<10) + 1, ((-1)<<11) + 1, ((-1)<<12) + 1, ((-1)<<13) + 1, ((-1)<<14) + 1, ((-1)<<15) + 1 }; +static const int s_extend_mask[] = { 0, (1<<0), (1<<1), (1<<2), (1<<3), (1<<4), (1<<5), (1<<6), (1<<7), (1<<8), (1<<9), (1<<10), (1<<11), (1<<12), (1<<13), (1<<14), (1<<15), (1<<16) }; +// The logical AND's in this macro are to shut up static code analysis (aren't really necessary - couldn't find another way to do this) +#define JPGD_HUFF_EXTEND(x, s) (((x) < s_extend_test[s & 15]) ? ((x) + s_extend_offset[s & 15]) : (x)) + +// Clamps a value between 0-255. +inline uint8 jpeg_decoder::clamp(int i) +{ + if (static_cast(i) > 255) + i = (((~i) >> 31) & 0xFF); + + return static_cast(i); +} + +namespace DCT_Upsample +{ + struct Matrix44 + { + typedef int Element_Type; + enum { NUM_ROWS = 4, NUM_COLS = 4 }; + + Element_Type v[NUM_ROWS][NUM_COLS]; + + inline int rows() const { return NUM_ROWS; } + inline int cols() const { return NUM_COLS; } + + inline const Element_Type & at(int r, int c) const { return v[r][c]; } + inline Element_Type & at(int r, int c) { return v[r][c]; } + + inline Matrix44() { } + + inline Matrix44& operator += (const Matrix44& a) + { + for (int r = 0; r < NUM_ROWS; r++) + { + at(r, 0) += a.at(r, 0); + at(r, 1) += a.at(r, 1); + at(r, 2) += a.at(r, 2); + at(r, 3) += a.at(r, 3); + } + return *this; + } + + inline Matrix44& operator -= (const Matrix44& a) + { + for (int r = 0; r < NUM_ROWS; r++) + { + at(r, 0) -= a.at(r, 0); + at(r, 1) -= a.at(r, 1); + at(r, 2) -= a.at(r, 2); + at(r, 3) -= a.at(r, 3); + } + return *this; + } + + friend inline Matrix44 operator + (const Matrix44& a, const Matrix44& b) + { + Matrix44 ret; + for (int r = 0; r < NUM_ROWS; r++) + { + ret.at(r, 0) = a.at(r, 0) + b.at(r, 0); + ret.at(r, 1) = a.at(r, 1) + b.at(r, 1); + ret.at(r, 2) = a.at(r, 2) + b.at(r, 2); + ret.at(r, 3) = a.at(r, 3) + b.at(r, 3); + } + return ret; + } + + friend inline Matrix44 operator - (const Matrix44& a, const Matrix44& b) + { + Matrix44 ret; + for (int r = 0; r < NUM_ROWS; r++) + { + ret.at(r, 0) = a.at(r, 0) - b.at(r, 0); + ret.at(r, 1) = a.at(r, 1) - b.at(r, 1); + ret.at(r, 2) = a.at(r, 2) - b.at(r, 2); + ret.at(r, 3) = a.at(r, 3) - b.at(r, 3); + } + return ret; + } + + static inline void add_and_store(jpgd_block_t* pDst, const Matrix44& a, const Matrix44& b) + { + for (int r = 0; r < 4; r++) + { + pDst[0*8 + r] = static_cast(a.at(r, 0) + b.at(r, 0)); + pDst[1*8 + r] = static_cast(a.at(r, 1) + b.at(r, 1)); + pDst[2*8 + r] = static_cast(a.at(r, 2) + b.at(r, 2)); + pDst[3*8 + r] = static_cast(a.at(r, 3) + b.at(r, 3)); + } + } + + static inline void sub_and_store(jpgd_block_t* pDst, const Matrix44& a, const Matrix44& b) + { + for (int r = 0; r < 4; r++) + { + pDst[0*8 + r] = static_cast(a.at(r, 0) - b.at(r, 0)); + pDst[1*8 + r] = static_cast(a.at(r, 1) - b.at(r, 1)); + pDst[2*8 + r] = static_cast(a.at(r, 2) - b.at(r, 2)); + pDst[3*8 + r] = static_cast(a.at(r, 3) - b.at(r, 3)); + } + } + }; + + const int FRACT_BITS = 10; + const int SCALE = 1 << FRACT_BITS; + + typedef int Temp_Type; + #define D(i) (((i) + (SCALE >> 1)) >> FRACT_BITS) + #define F(i) ((int)((i) * SCALE + .5f)) + + // Any decent C++ compiler will optimize this at compile time to a 0, or an array access. + #define AT(c, r) ((((c)>=NUM_COLS)||((r)>=NUM_ROWS)) ? 0 : pSrc[(c)+(r)*8]) + + // NUM_ROWS/NUM_COLS = # of non-zero rows/cols in input matrix + template + struct P_Q + { + static void calc(Matrix44& P, Matrix44& Q, const jpgd_block_t* pSrc) + { + // 4x8 = 4x8 times 8x8, matrix 0 is constant + const Temp_Type X000 = AT(0, 0); + const Temp_Type X001 = AT(0, 1); + const Temp_Type X002 = AT(0, 2); + const Temp_Type X003 = AT(0, 3); + const Temp_Type X004 = AT(0, 4); + const Temp_Type X005 = AT(0, 5); + const Temp_Type X006 = AT(0, 6); + const Temp_Type X007 = AT(0, 7); + const Temp_Type X010 = D(F(0.415735f) * AT(1, 0) + F(0.791065f) * AT(3, 0) + F(-0.352443f) * AT(5, 0) + F(0.277785f) * AT(7, 0)); + const Temp_Type X011 = D(F(0.415735f) * AT(1, 1) + F(0.791065f) * AT(3, 1) + F(-0.352443f) * AT(5, 1) + F(0.277785f) * AT(7, 1)); + const Temp_Type X012 = D(F(0.415735f) * AT(1, 2) + F(0.791065f) * AT(3, 2) + F(-0.352443f) * AT(5, 2) + F(0.277785f) * AT(7, 2)); + const Temp_Type X013 = D(F(0.415735f) * AT(1, 3) + F(0.791065f) * AT(3, 3) + F(-0.352443f) * AT(5, 3) + F(0.277785f) * AT(7, 3)); + const Temp_Type X014 = D(F(0.415735f) * AT(1, 4) + F(0.791065f) * AT(3, 4) + F(-0.352443f) * AT(5, 4) + F(0.277785f) * AT(7, 4)); + const Temp_Type X015 = D(F(0.415735f) * AT(1, 5) + F(0.791065f) * AT(3, 5) + F(-0.352443f) * AT(5, 5) + F(0.277785f) * AT(7, 5)); + const Temp_Type X016 = D(F(0.415735f) * AT(1, 6) + F(0.791065f) * AT(3, 6) + F(-0.352443f) * AT(5, 6) + F(0.277785f) * AT(7, 6)); + const Temp_Type X017 = D(F(0.415735f) * AT(1, 7) + F(0.791065f) * AT(3, 7) + F(-0.352443f) * AT(5, 7) + F(0.277785f) * AT(7, 7)); + const Temp_Type X020 = AT(4, 0); + const Temp_Type X021 = AT(4, 1); + const Temp_Type X022 = AT(4, 2); + const Temp_Type X023 = AT(4, 3); + const Temp_Type X024 = AT(4, 4); + const Temp_Type X025 = AT(4, 5); + const Temp_Type X026 = AT(4, 6); + const Temp_Type X027 = AT(4, 7); + const Temp_Type X030 = D(F(0.022887f) * AT(1, 0) + F(-0.097545f) * AT(3, 0) + F(0.490393f) * AT(5, 0) + F(0.865723f) * AT(7, 0)); + const Temp_Type X031 = D(F(0.022887f) * AT(1, 1) + F(-0.097545f) * AT(3, 1) + F(0.490393f) * AT(5, 1) + F(0.865723f) * AT(7, 1)); + const Temp_Type X032 = D(F(0.022887f) * AT(1, 2) + F(-0.097545f) * AT(3, 2) + F(0.490393f) * AT(5, 2) + F(0.865723f) * AT(7, 2)); + const Temp_Type X033 = D(F(0.022887f) * AT(1, 3) + F(-0.097545f) * AT(3, 3) + F(0.490393f) * AT(5, 3) + F(0.865723f) * AT(7, 3)); + const Temp_Type X034 = D(F(0.022887f) * AT(1, 4) + F(-0.097545f) * AT(3, 4) + F(0.490393f) * AT(5, 4) + F(0.865723f) * AT(7, 4)); + const Temp_Type X035 = D(F(0.022887f) * AT(1, 5) + F(-0.097545f) * AT(3, 5) + F(0.490393f) * AT(5, 5) + F(0.865723f) * AT(7, 5)); + const Temp_Type X036 = D(F(0.022887f) * AT(1, 6) + F(-0.097545f) * AT(3, 6) + F(0.490393f) * AT(5, 6) + F(0.865723f) * AT(7, 6)); + const Temp_Type X037 = D(F(0.022887f) * AT(1, 7) + F(-0.097545f) * AT(3, 7) + F(0.490393f) * AT(5, 7) + F(0.865723f) * AT(7, 7)); + + // 4x4 = 4x8 times 8x4, matrix 1 is constant + P.at(0, 0) = X000; + P.at(0, 1) = D(X001 * F(0.415735f) + X003 * F(0.791065f) + X005 * F(-0.352443f) + X007 * F(0.277785f)); + P.at(0, 2) = X004; + P.at(0, 3) = D(X001 * F(0.022887f) + X003 * F(-0.097545f) + X005 * F(0.490393f) + X007 * F(0.865723f)); + P.at(1, 0) = X010; + P.at(1, 1) = D(X011 * F(0.415735f) + X013 * F(0.791065f) + X015 * F(-0.352443f) + X017 * F(0.277785f)); + P.at(1, 2) = X014; + P.at(1, 3) = D(X011 * F(0.022887f) + X013 * F(-0.097545f) + X015 * F(0.490393f) + X017 * F(0.865723f)); + P.at(2, 0) = X020; + P.at(2, 1) = D(X021 * F(0.415735f) + X023 * F(0.791065f) + X025 * F(-0.352443f) + X027 * F(0.277785f)); + P.at(2, 2) = X024; + P.at(2, 3) = D(X021 * F(0.022887f) + X023 * F(-0.097545f) + X025 * F(0.490393f) + X027 * F(0.865723f)); + P.at(3, 0) = X030; + P.at(3, 1) = D(X031 * F(0.415735f) + X033 * F(0.791065f) + X035 * F(-0.352443f) + X037 * F(0.277785f)); + P.at(3, 2) = X034; + P.at(3, 3) = D(X031 * F(0.022887f) + X033 * F(-0.097545f) + X035 * F(0.490393f) + X037 * F(0.865723f)); + // 40 muls 24 adds + + // 4x4 = 4x8 times 8x4, matrix 1 is constant + Q.at(0, 0) = D(X001 * F(0.906127f) + X003 * F(-0.318190f) + X005 * F(0.212608f) + X007 * F(-0.180240f)); + Q.at(0, 1) = X002; + Q.at(0, 2) = D(X001 * F(-0.074658f) + X003 * F(0.513280f) + X005 * F(0.768178f) + X007 * F(-0.375330f)); + Q.at(0, 3) = X006; + Q.at(1, 0) = D(X011 * F(0.906127f) + X013 * F(-0.318190f) + X015 * F(0.212608f) + X017 * F(-0.180240f)); + Q.at(1, 1) = X012; + Q.at(1, 2) = D(X011 * F(-0.074658f) + X013 * F(0.513280f) + X015 * F(0.768178f) + X017 * F(-0.375330f)); + Q.at(1, 3) = X016; + Q.at(2, 0) = D(X021 * F(0.906127f) + X023 * F(-0.318190f) + X025 * F(0.212608f) + X027 * F(-0.180240f)); + Q.at(2, 1) = X022; + Q.at(2, 2) = D(X021 * F(-0.074658f) + X023 * F(0.513280f) + X025 * F(0.768178f) + X027 * F(-0.375330f)); + Q.at(2, 3) = X026; + Q.at(3, 0) = D(X031 * F(0.906127f) + X033 * F(-0.318190f) + X035 * F(0.212608f) + X037 * F(-0.180240f)); + Q.at(3, 1) = X032; + Q.at(3, 2) = D(X031 * F(-0.074658f) + X033 * F(0.513280f) + X035 * F(0.768178f) + X037 * F(-0.375330f)); + Q.at(3, 3) = X036; + // 40 muls 24 adds + } + }; + + template + struct R_S + { + static void calc(Matrix44& R, Matrix44& S, const jpgd_block_t* pSrc) + { + // 4x8 = 4x8 times 8x8, matrix 0 is constant + const Temp_Type X100 = D(F(0.906127f) * AT(1, 0) + F(-0.318190f) * AT(3, 0) + F(0.212608f) * AT(5, 0) + F(-0.180240f) * AT(7, 0)); + const Temp_Type X101 = D(F(0.906127f) * AT(1, 1) + F(-0.318190f) * AT(3, 1) + F(0.212608f) * AT(5, 1) + F(-0.180240f) * AT(7, 1)); + const Temp_Type X102 = D(F(0.906127f) * AT(1, 2) + F(-0.318190f) * AT(3, 2) + F(0.212608f) * AT(5, 2) + F(-0.180240f) * AT(7, 2)); + const Temp_Type X103 = D(F(0.906127f) * AT(1, 3) + F(-0.318190f) * AT(3, 3) + F(0.212608f) * AT(5, 3) + F(-0.180240f) * AT(7, 3)); + const Temp_Type X104 = D(F(0.906127f) * AT(1, 4) + F(-0.318190f) * AT(3, 4) + F(0.212608f) * AT(5, 4) + F(-0.180240f) * AT(7, 4)); + const Temp_Type X105 = D(F(0.906127f) * AT(1, 5) + F(-0.318190f) * AT(3, 5) + F(0.212608f) * AT(5, 5) + F(-0.180240f) * AT(7, 5)); + const Temp_Type X106 = D(F(0.906127f) * AT(1, 6) + F(-0.318190f) * AT(3, 6) + F(0.212608f) * AT(5, 6) + F(-0.180240f) * AT(7, 6)); + const Temp_Type X107 = D(F(0.906127f) * AT(1, 7) + F(-0.318190f) * AT(3, 7) + F(0.212608f) * AT(5, 7) + F(-0.180240f) * AT(7, 7)); + const Temp_Type X110 = AT(2, 0); + const Temp_Type X111 = AT(2, 1); + const Temp_Type X112 = AT(2, 2); + const Temp_Type X113 = AT(2, 3); + const Temp_Type X114 = AT(2, 4); + const Temp_Type X115 = AT(2, 5); + const Temp_Type X116 = AT(2, 6); + const Temp_Type X117 = AT(2, 7); + const Temp_Type X120 = D(F(-0.074658f) * AT(1, 0) + F(0.513280f) * AT(3, 0) + F(0.768178f) * AT(5, 0) + F(-0.375330f) * AT(7, 0)); + const Temp_Type X121 = D(F(-0.074658f) * AT(1, 1) + F(0.513280f) * AT(3, 1) + F(0.768178f) * AT(5, 1) + F(-0.375330f) * AT(7, 1)); + const Temp_Type X122 = D(F(-0.074658f) * AT(1, 2) + F(0.513280f) * AT(3, 2) + F(0.768178f) * AT(5, 2) + F(-0.375330f) * AT(7, 2)); + const Temp_Type X123 = D(F(-0.074658f) * AT(1, 3) + F(0.513280f) * AT(3, 3) + F(0.768178f) * AT(5, 3) + F(-0.375330f) * AT(7, 3)); + const Temp_Type X124 = D(F(-0.074658f) * AT(1, 4) + F(0.513280f) * AT(3, 4) + F(0.768178f) * AT(5, 4) + F(-0.375330f) * AT(7, 4)); + const Temp_Type X125 = D(F(-0.074658f) * AT(1, 5) + F(0.513280f) * AT(3, 5) + F(0.768178f) * AT(5, 5) + F(-0.375330f) * AT(7, 5)); + const Temp_Type X126 = D(F(-0.074658f) * AT(1, 6) + F(0.513280f) * AT(3, 6) + F(0.768178f) * AT(5, 6) + F(-0.375330f) * AT(7, 6)); + const Temp_Type X127 = D(F(-0.074658f) * AT(1, 7) + F(0.513280f) * AT(3, 7) + F(0.768178f) * AT(5, 7) + F(-0.375330f) * AT(7, 7)); + const Temp_Type X130 = AT(6, 0); + const Temp_Type X131 = AT(6, 1); + const Temp_Type X132 = AT(6, 2); + const Temp_Type X133 = AT(6, 3); + const Temp_Type X134 = AT(6, 4); + const Temp_Type X135 = AT(6, 5); + const Temp_Type X136 = AT(6, 6); + const Temp_Type X137 = AT(6, 7); + // 80 muls 48 adds + + // 4x4 = 4x8 times 8x4, matrix 1 is constant + R.at(0, 0) = X100; + R.at(0, 1) = D(X101 * F(0.415735f) + X103 * F(0.791065f) + X105 * F(-0.352443f) + X107 * F(0.277785f)); + R.at(0, 2) = X104; + R.at(0, 3) = D(X101 * F(0.022887f) + X103 * F(-0.097545f) + X105 * F(0.490393f) + X107 * F(0.865723f)); + R.at(1, 0) = X110; + R.at(1, 1) = D(X111 * F(0.415735f) + X113 * F(0.791065f) + X115 * F(-0.352443f) + X117 * F(0.277785f)); + R.at(1, 2) = X114; + R.at(1, 3) = D(X111 * F(0.022887f) + X113 * F(-0.097545f) + X115 * F(0.490393f) + X117 * F(0.865723f)); + R.at(2, 0) = X120; + R.at(2, 1) = D(X121 * F(0.415735f) + X123 * F(0.791065f) + X125 * F(-0.352443f) + X127 * F(0.277785f)); + R.at(2, 2) = X124; + R.at(2, 3) = D(X121 * F(0.022887f) + X123 * F(-0.097545f) + X125 * F(0.490393f) + X127 * F(0.865723f)); + R.at(3, 0) = X130; + R.at(3, 1) = D(X131 * F(0.415735f) + X133 * F(0.791065f) + X135 * F(-0.352443f) + X137 * F(0.277785f)); + R.at(3, 2) = X134; + R.at(3, 3) = D(X131 * F(0.022887f) + X133 * F(-0.097545f) + X135 * F(0.490393f) + X137 * F(0.865723f)); + // 40 muls 24 adds + // 4x4 = 4x8 times 8x4, matrix 1 is constant + S.at(0, 0) = D(X101 * F(0.906127f) + X103 * F(-0.318190f) + X105 * F(0.212608f) + X107 * F(-0.180240f)); + S.at(0, 1) = X102; + S.at(0, 2) = D(X101 * F(-0.074658f) + X103 * F(0.513280f) + X105 * F(0.768178f) + X107 * F(-0.375330f)); + S.at(0, 3) = X106; + S.at(1, 0) = D(X111 * F(0.906127f) + X113 * F(-0.318190f) + X115 * F(0.212608f) + X117 * F(-0.180240f)); + S.at(1, 1) = X112; + S.at(1, 2) = D(X111 * F(-0.074658f) + X113 * F(0.513280f) + X115 * F(0.768178f) + X117 * F(-0.375330f)); + S.at(1, 3) = X116; + S.at(2, 0) = D(X121 * F(0.906127f) + X123 * F(-0.318190f) + X125 * F(0.212608f) + X127 * F(-0.180240f)); + S.at(2, 1) = X122; + S.at(2, 2) = D(X121 * F(-0.074658f) + X123 * F(0.513280f) + X125 * F(0.768178f) + X127 * F(-0.375330f)); + S.at(2, 3) = X126; + S.at(3, 0) = D(X131 * F(0.906127f) + X133 * F(-0.318190f) + X135 * F(0.212608f) + X137 * F(-0.180240f)); + S.at(3, 1) = X132; + S.at(3, 2) = D(X131 * F(-0.074658f) + X133 * F(0.513280f) + X135 * F(0.768178f) + X137 * F(-0.375330f)); + S.at(3, 3) = X136; + // 40 muls 24 adds + } + }; +} // end namespace DCT_Upsample + +// Unconditionally frees all allocated m_blocks. +void jpeg_decoder::free_all_blocks() +{ + m_pStream = NULL; + for (mem_block *b = m_pMem_blocks; b; ) + { + mem_block *n = b->m_pNext; + jpgd_free(b); + b = n; + } + m_pMem_blocks = NULL; +} + +// This method handles all errors. It will never return. +// It could easily be changed to use C++ exceptions. +JPGD_NORETURN void jpeg_decoder::stop_decoding(jpgd_status status) +{ + m_error_code = status; + free_all_blocks(); + longjmp(m_jmp_state, status); +} + +void *jpeg_decoder::alloc(size_t nSize, bool zero) +{ + nSize = (JPGD_MAX(nSize, 1) + 3) & ~3; + char *rv = NULL; + for (mem_block *b = m_pMem_blocks; b; b = b->m_pNext) + { + if ((b->m_used_count + nSize) <= b->m_size) + { + rv = b->m_data + b->m_used_count; + b->m_used_count += nSize; + break; + } + } + if (!rv) + { + int capacity = JPGD_MAX(32768 - 256, (nSize + 2047) & ~2047); + mem_block *b = (mem_block*)jpgd_malloc(sizeof(mem_block) + capacity); + if (!b) { stop_decoding(JPGD_NOTENOUGHMEM); } + b->m_pNext = m_pMem_blocks; m_pMem_blocks = b; + b->m_used_count = nSize; + b->m_size = capacity; + rv = b->m_data; + } + if (zero) memset(rv, 0, nSize); + return rv; +} + +void jpeg_decoder::word_clear(void *p, uint16 c, uint n) +{ + uint8 *pD = (uint8*)p; + const uint8 l = c & 0xFF, h = (c >> 8) & 0xFF; + while (n) + { + pD[0] = l; pD[1] = h; pD += 2; + n--; + } +} + +// Refill the input buffer. +// This method will sit in a loop until (A) the buffer is full or (B) +// the stream's read() method reports and end of file condition. +void jpeg_decoder::prep_in_buffer() +{ + m_in_buf_left = 0; + m_pIn_buf_ofs = m_in_buf; + + if (m_eof_flag) + return; + + do + { + int bytes_read = m_pStream->read(m_in_buf + m_in_buf_left, JPGD_IN_BUF_SIZE - m_in_buf_left, &m_eof_flag); + if (bytes_read == -1) + stop_decoding(JPGD_STREAM_READ); + + m_in_buf_left += bytes_read; + } while ((m_in_buf_left < JPGD_IN_BUF_SIZE) && (!m_eof_flag)); + + m_total_bytes_read += m_in_buf_left; + + // Pad the end of the block with M_EOI (prevents the decompressor from going off the rails if the stream is invalid). + // (This dates way back to when this decompressor was written in C/asm, and the all-asm Huffman decoder did some fancy things to increase perf.) + word_clear(m_pIn_buf_ofs + m_in_buf_left, 0xD9FF, 64); +} + +// Read a Huffman code table. +void jpeg_decoder::read_dht_marker() +{ + int i, index, count; + uint8 huff_num[17]; + uint8 huff_val[256]; + + uint num_left = get_bits(16); + + if (num_left < 2) + stop_decoding(JPGD_BAD_DHT_MARKER); + + num_left -= 2; + + while (num_left) + { + index = get_bits(8); + + huff_num[0] = 0; + + count = 0; + + for (i = 1; i <= 16; i++) + { + huff_num[i] = static_cast(get_bits(8)); + count += huff_num[i]; + } + + if (count > 255) + stop_decoding(JPGD_BAD_DHT_COUNTS); + + for (i = 0; i < count; i++) + huff_val[i] = static_cast(get_bits(8)); + + i = 1 + 16 + count; + + if (num_left < (uint)i) + stop_decoding(JPGD_BAD_DHT_MARKER); + + num_left -= i; + + if ((index & 0x10) > 0x10) + stop_decoding(JPGD_BAD_DHT_INDEX); + + index = (index & 0x0F) + ((index & 0x10) >> 4) * (JPGD_MAX_HUFF_TABLES >> 1); + + if (index >= JPGD_MAX_HUFF_TABLES) + stop_decoding(JPGD_BAD_DHT_INDEX); + + if (!m_huff_num[index]) + m_huff_num[index] = (uint8 *)alloc(17); + + if (!m_huff_val[index]) + m_huff_val[index] = (uint8 *)alloc(256); + + m_huff_ac[index] = (index & 0x10) != 0; + memcpy(m_huff_num[index], huff_num, 17); + memcpy(m_huff_val[index], huff_val, 256); + } +} + +// Read a quantization table. +void jpeg_decoder::read_dqt_marker() +{ + int n, i, prec; + uint num_left; + uint temp; + + num_left = get_bits(16); + + if (num_left < 2) + stop_decoding(JPGD_BAD_DQT_MARKER); + + num_left -= 2; + + while (num_left) + { + n = get_bits(8); + prec = n >> 4; + n &= 0x0F; + + if (n >= JPGD_MAX_QUANT_TABLES) + stop_decoding(JPGD_BAD_DQT_TABLE); + + if (!m_quant[n]) + m_quant[n] = (jpgd_quant_t *)alloc(64 * sizeof(jpgd_quant_t)); + + // read quantization entries, in zag order + for (i = 0; i < 64; i++) + { + temp = get_bits(8); + + if (prec) + temp = (temp << 8) + get_bits(8); + + m_quant[n][i] = static_cast(temp); + } + + i = 64 + 1; + + if (prec) + i += 64; + + if (num_left < (uint)i) + stop_decoding(JPGD_BAD_DQT_LENGTH); + + num_left -= i; + } +} + +// Read the start of frame (SOF) marker. +void jpeg_decoder::read_sof_marker() +{ + int i; + uint num_left; + + num_left = get_bits(16); + + if (get_bits(8) != 8) /* precision: sorry, only 8-bit precision is supported right now */ + stop_decoding(JPGD_BAD_PRECISION); + + m_image_y_size = get_bits(16); + + if ((m_image_y_size < 1) || (m_image_y_size > JPGD_MAX_HEIGHT)) + stop_decoding(JPGD_BAD_HEIGHT); + + m_image_x_size = get_bits(16); + + if ((m_image_x_size < 1) || (m_image_x_size > JPGD_MAX_WIDTH)) + stop_decoding(JPGD_BAD_WIDTH); + + m_comps_in_frame = get_bits(8); + + if (m_comps_in_frame > JPGD_MAX_COMPONENTS) + stop_decoding(JPGD_TOO_MANY_COMPONENTS); + + if (num_left != (uint)(m_comps_in_frame * 3 + 8)) + stop_decoding(JPGD_BAD_SOF_LENGTH); + + for (i = 0; i < m_comps_in_frame; i++) + { + m_comp_ident[i] = get_bits(8); + m_comp_h_samp[i] = get_bits(4); + m_comp_v_samp[i] = get_bits(4); + m_comp_quant[i] = get_bits(8); + } +} + +// Used to skip unrecognized markers. +void jpeg_decoder::skip_variable_marker() +{ + uint num_left; + + num_left = get_bits(16); + + if (num_left < 2) + stop_decoding(JPGD_BAD_VARIABLE_MARKER); + + num_left -= 2; + + while (num_left) + { + get_bits(8); + num_left--; + } +} + +// Read a define restart interval (DRI) marker. +void jpeg_decoder::read_dri_marker() +{ + if (get_bits(16) != 4) + stop_decoding(JPGD_BAD_DRI_LENGTH); + + m_restart_interval = get_bits(16); +} + +// Read a start of scan (SOS) marker. +void jpeg_decoder::read_sos_marker() +{ + uint num_left; + int i, ci, n, c, cc; + + num_left = get_bits(16); + + n = get_bits(8); + + m_comps_in_scan = n; + + num_left -= 3; + + if ( (num_left != (uint)(n * 2 + 3)) || (n < 1) || (n > JPGD_MAX_COMPS_IN_SCAN) ) + stop_decoding(JPGD_BAD_SOS_LENGTH); + + for (i = 0; i < n; i++) + { + cc = get_bits(8); + c = get_bits(8); + num_left -= 2; + + for (ci = 0; ci < m_comps_in_frame; ci++) + if (cc == m_comp_ident[ci]) + break; + + if (ci >= m_comps_in_frame) + stop_decoding(JPGD_BAD_SOS_COMP_ID); + + m_comp_list[i] = ci; + m_comp_dc_tab[ci] = (c >> 4) & 15; + m_comp_ac_tab[ci] = (c & 15) + (JPGD_MAX_HUFF_TABLES >> 1); + } + + m_spectral_start = get_bits(8); + m_spectral_end = get_bits(8); + m_successive_high = get_bits(4); + m_successive_low = get_bits(4); + + if (!m_progressive_flag) + { + m_spectral_start = 0; + m_spectral_end = 63; + } + + num_left -= 3; + + while (num_left) /* read past whatever is num_left */ + { + get_bits(8); + num_left--; + } +} + +// Finds the next marker. +int jpeg_decoder::next_marker() +{ + uint c, bytes; + + bytes = 0; + + do + { + do + { + bytes++; + c = get_bits(8); + } while (c != 0xFF); + + do + { + c = get_bits(8); + } while (c == 0xFF); + + } while (c == 0); + + // If bytes > 0 here, there where extra bytes before the marker (not good). + + return c; +} + +// Process markers. Returns when an SOFx, SOI, EOI, or SOS marker is +// encountered. +int jpeg_decoder::process_markers() +{ + int c; + + for ( ; ; ) + { + c = next_marker(); + + switch (c) + { + case M_SOF0: + case M_SOF1: + case M_SOF2: + case M_SOF3: + case M_SOF5: + case M_SOF6: + case M_SOF7: +// case M_JPG: + case M_SOF9: + case M_SOF10: + case M_SOF11: + case M_SOF13: + case M_SOF14: + case M_SOF15: + case M_SOI: + case M_EOI: + case M_SOS: + { + return c; + } + case M_DHT: + { + read_dht_marker(); + break; + } + // No arithmitic support - dumb patents! + case M_DAC: + { + stop_decoding(JPGD_NO_ARITHMITIC_SUPPORT); + break; + } + case M_DQT: + { + read_dqt_marker(); + break; + } + case M_DRI: + { + read_dri_marker(); + break; + } + //case M_APP0: /* no need to read the JFIF marker */ + + case M_JPG: + case M_RST0: /* no parameters */ + case M_RST1: + case M_RST2: + case M_RST3: + case M_RST4: + case M_RST5: + case M_RST6: + case M_RST7: + case M_TEM: + { + stop_decoding(JPGD_UNEXPECTED_MARKER); + break; + } + default: /* must be DNL, DHP, EXP, APPn, JPGn, COM, or RESn or APP0 */ + { + skip_variable_marker(); + break; + } + } + } +} + +// Finds the start of image (SOI) marker. +// This code is rather defensive: it only checks the first 512 bytes to avoid +// false positives. +void jpeg_decoder::locate_soi_marker() +{ + uint lastchar, thischar; + uint bytesleft; + + lastchar = get_bits(8); + + thischar = get_bits(8); + + /* ok if it's a normal JPEG file without a special header */ + + if ((lastchar == 0xFF) && (thischar == M_SOI)) + return; + + bytesleft = 4096; //512; + + for ( ; ; ) + { + if (--bytesleft == 0) + stop_decoding(JPGD_NOT_JPEG); + + lastchar = thischar; + + thischar = get_bits(8); + + if (lastchar == 0xFF) + { + if (thischar == M_SOI) + break; + else if (thischar == M_EOI) // get_bits will keep returning M_EOI if we read past the end + stop_decoding(JPGD_NOT_JPEG); + } + } + + // Check the next character after marker: if it's not 0xFF, it can't be the start of the next marker, so the file is bad. + thischar = (m_bit_buf >> 24) & 0xFF; + + if (thischar != 0xFF) + stop_decoding(JPGD_NOT_JPEG); +} + +// Find a start of frame (SOF) marker. +void jpeg_decoder::locate_sof_marker() +{ + locate_soi_marker(); + + int c = process_markers(); + + switch (c) + { + case M_SOF2: + m_progressive_flag = JPGD_TRUE; + case M_SOF0: /* baseline DCT */ + case M_SOF1: /* extended sequential DCT */ + { + read_sof_marker(); + break; + } + case M_SOF9: /* Arithmitic coding */ + { + stop_decoding(JPGD_NO_ARITHMITIC_SUPPORT); + break; + } + default: + { + stop_decoding(JPGD_UNSUPPORTED_MARKER); + break; + } + } +} + +// Find a start of scan (SOS) marker. +int jpeg_decoder::locate_sos_marker() +{ + int c; + + c = process_markers(); + + if (c == M_EOI) + return JPGD_FALSE; + else if (c != M_SOS) + stop_decoding(JPGD_UNEXPECTED_MARKER); + + read_sos_marker(); + + return JPGD_TRUE; +} + +// Reset everything to default/uninitialized state. +void jpeg_decoder::init(jpeg_decoder_stream *pStream) +{ + m_pMem_blocks = NULL; + m_error_code = JPGD_SUCCESS; + m_ready_flag = false; + m_image_x_size = m_image_y_size = 0; + m_pStream = pStream; + m_progressive_flag = JPGD_FALSE; + + memset(m_huff_ac, 0, sizeof(m_huff_ac)); + memset(m_huff_num, 0, sizeof(m_huff_num)); + memset(m_huff_val, 0, sizeof(m_huff_val)); + memset(m_quant, 0, sizeof(m_quant)); + + m_scan_type = 0; + m_comps_in_frame = 0; + + memset(m_comp_h_samp, 0, sizeof(m_comp_h_samp)); + memset(m_comp_v_samp, 0, sizeof(m_comp_v_samp)); + memset(m_comp_quant, 0, sizeof(m_comp_quant)); + memset(m_comp_ident, 0, sizeof(m_comp_ident)); + memset(m_comp_h_blocks, 0, sizeof(m_comp_h_blocks)); + memset(m_comp_v_blocks, 0, sizeof(m_comp_v_blocks)); + + m_comps_in_scan = 0; + memset(m_comp_list, 0, sizeof(m_comp_list)); + memset(m_comp_dc_tab, 0, sizeof(m_comp_dc_tab)); + memset(m_comp_ac_tab, 0, sizeof(m_comp_ac_tab)); + + m_spectral_start = 0; + m_spectral_end = 0; + m_successive_low = 0; + m_successive_high = 0; + m_max_mcu_x_size = 0; + m_max_mcu_y_size = 0; + m_blocks_per_mcu = 0; + m_max_blocks_per_row = 0; + m_mcus_per_row = 0; + m_mcus_per_col = 0; + m_expanded_blocks_per_component = 0; + m_expanded_blocks_per_mcu = 0; + m_expanded_blocks_per_row = 0; + m_freq_domain_chroma_upsample = false; + + memset(m_mcu_org, 0, sizeof(m_mcu_org)); + + m_total_lines_left = 0; + m_mcu_lines_left = 0; + m_real_dest_bytes_per_scan_line = 0; + m_dest_bytes_per_scan_line = 0; + m_dest_bytes_per_pixel = 0; + + memset(m_pHuff_tabs, 0, sizeof(m_pHuff_tabs)); + + memset(m_dc_coeffs, 0, sizeof(m_dc_coeffs)); + memset(m_ac_coeffs, 0, sizeof(m_ac_coeffs)); + memset(m_block_y_mcu, 0, sizeof(m_block_y_mcu)); + + m_eob_run = 0; + + memset(m_block_y_mcu, 0, sizeof(m_block_y_mcu)); + + m_pIn_buf_ofs = m_in_buf; + m_in_buf_left = 0; + m_eof_flag = false; + m_tem_flag = 0; + + memset(m_in_buf_pad_start, 0, sizeof(m_in_buf_pad_start)); + memset(m_in_buf, 0, sizeof(m_in_buf)); + memset(m_in_buf_pad_end, 0, sizeof(m_in_buf_pad_end)); + + m_restart_interval = 0; + m_restarts_left = 0; + m_next_restart_num = 0; + + m_max_mcus_per_row = 0; + m_max_blocks_per_mcu = 0; + m_max_mcus_per_col = 0; + + memset(m_last_dc_val, 0, sizeof(m_last_dc_val)); + m_pMCU_coefficients = NULL; + m_pSample_buf = NULL; + + m_total_bytes_read = 0; + + m_pScan_line_0 = NULL; + m_pScan_line_1 = NULL; + + // Ready the input buffer. + prep_in_buffer(); + + // Prime the bit buffer. + m_bits_left = 16; + m_bit_buf = 0; + + get_bits(16); + get_bits(16); + + for (int i = 0; i < JPGD_MAX_BLOCKS_PER_MCU; i++) + m_mcu_block_max_zag[i] = 64; +} + +#define SCALEBITS 16 +#define ONE_HALF ((int) 1 << (SCALEBITS-1)) +#define FIX(x) ((int) ((x) * (1L<> SCALEBITS; + m_cbb[i] = ( FIX(1.77200f) * k + ONE_HALF) >> SCALEBITS; + m_crg[i] = (-FIX(0.71414f)) * k; + m_cbg[i] = (-FIX(0.34414f)) * k + ONE_HALF; + } +} + +// This method throws back into the stream any bytes that where read +// into the bit buffer during initial marker scanning. +void jpeg_decoder::fix_in_buffer() +{ + // In case any 0xFF's where pulled into the buffer during marker scanning. + JPGD_ASSERT((m_bits_left & 7) == 0); + + if (m_bits_left == 16) + stuff_char( (uint8)(m_bit_buf & 0xFF)); + + if (m_bits_left >= 8) + stuff_char( (uint8)((m_bit_buf >> 8) & 0xFF)); + + stuff_char((uint8)((m_bit_buf >> 16) & 0xFF)); + stuff_char((uint8)((m_bit_buf >> 24) & 0xFF)); + + m_bits_left = 16; + get_bits_no_markers(16); + get_bits_no_markers(16); +} + +void jpeg_decoder::transform_mcu(int mcu_row) +{ + jpgd_block_t* pSrc_ptr = m_pMCU_coefficients; + uint8* pDst_ptr = m_pSample_buf + mcu_row * m_blocks_per_mcu * 64; + + for (int mcu_block = 0; mcu_block < m_blocks_per_mcu; mcu_block++) + { + idct(pSrc_ptr, pDst_ptr, m_mcu_block_max_zag[mcu_block]); + pSrc_ptr += 64; + pDst_ptr += 64; + } +} + +static const uint8 s_max_rc[64] = +{ + 17, 18, 34, 50, 50, 51, 52, 52, 52, 68, 84, 84, 84, 84, 85, 86, 86, 86, 86, 86, + 102, 118, 118, 118, 118, 118, 118, 119, 120, 120, 120, 120, 120, 120, 120, 136, + 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, + 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136 +}; + +void jpeg_decoder::transform_mcu_expand(int mcu_row) +{ + jpgd_block_t* pSrc_ptr = m_pMCU_coefficients; + uint8* pDst_ptr = m_pSample_buf + mcu_row * m_expanded_blocks_per_mcu * 64; + + // Y IDCT + int mcu_block; + for (mcu_block = 0; mcu_block < m_expanded_blocks_per_component; mcu_block++) + { + idct(pSrc_ptr, pDst_ptr, m_mcu_block_max_zag[mcu_block]); + pSrc_ptr += 64; + pDst_ptr += 64; + } + + // Chroma IDCT, with upsampling + jpgd_block_t temp_block[64]; + + for (int i = 0; i < 2; i++) + { + DCT_Upsample::Matrix44 P, Q, R, S; + + JPGD_ASSERT(m_mcu_block_max_zag[mcu_block] >= 1); + JPGD_ASSERT(m_mcu_block_max_zag[mcu_block] <= 64); + + int max_zag = m_mcu_block_max_zag[mcu_block++] - 1; + if (max_zag <= 0) max_zag = 0; // should never happen, only here to shut up static analysis + switch (s_max_rc[max_zag]) + { + case 1*16+1: + DCT_Upsample::P_Q<1, 1>::calc(P, Q, pSrc_ptr); + DCT_Upsample::R_S<1, 1>::calc(R, S, pSrc_ptr); + break; + case 1*16+2: + DCT_Upsample::P_Q<1, 2>::calc(P, Q, pSrc_ptr); + DCT_Upsample::R_S<1, 2>::calc(R, S, pSrc_ptr); + break; + case 2*16+2: + DCT_Upsample::P_Q<2, 2>::calc(P, Q, pSrc_ptr); + DCT_Upsample::R_S<2, 2>::calc(R, S, pSrc_ptr); + break; + case 3*16+2: + DCT_Upsample::P_Q<3, 2>::calc(P, Q, pSrc_ptr); + DCT_Upsample::R_S<3, 2>::calc(R, S, pSrc_ptr); + break; + case 3*16+3: + DCT_Upsample::P_Q<3, 3>::calc(P, Q, pSrc_ptr); + DCT_Upsample::R_S<3, 3>::calc(R, S, pSrc_ptr); + break; + case 3*16+4: + DCT_Upsample::P_Q<3, 4>::calc(P, Q, pSrc_ptr); + DCT_Upsample::R_S<3, 4>::calc(R, S, pSrc_ptr); + break; + case 4*16+4: + DCT_Upsample::P_Q<4, 4>::calc(P, Q, pSrc_ptr); + DCT_Upsample::R_S<4, 4>::calc(R, S, pSrc_ptr); + break; + case 5*16+4: + DCT_Upsample::P_Q<5, 4>::calc(P, Q, pSrc_ptr); + DCT_Upsample::R_S<5, 4>::calc(R, S, pSrc_ptr); + break; + case 5*16+5: + DCT_Upsample::P_Q<5, 5>::calc(P, Q, pSrc_ptr); + DCT_Upsample::R_S<5, 5>::calc(R, S, pSrc_ptr); + break; + case 5*16+6: + DCT_Upsample::P_Q<5, 6>::calc(P, Q, pSrc_ptr); + DCT_Upsample::R_S<5, 6>::calc(R, S, pSrc_ptr); + break; + case 6*16+6: + DCT_Upsample::P_Q<6, 6>::calc(P, Q, pSrc_ptr); + DCT_Upsample::R_S<6, 6>::calc(R, S, pSrc_ptr); + break; + case 7*16+6: + DCT_Upsample::P_Q<7, 6>::calc(P, Q, pSrc_ptr); + DCT_Upsample::R_S<7, 6>::calc(R, S, pSrc_ptr); + break; + case 7*16+7: + DCT_Upsample::P_Q<7, 7>::calc(P, Q, pSrc_ptr); + DCT_Upsample::R_S<7, 7>::calc(R, S, pSrc_ptr); + break; + case 7*16+8: + DCT_Upsample::P_Q<7, 8>::calc(P, Q, pSrc_ptr); + DCT_Upsample::R_S<7, 8>::calc(R, S, pSrc_ptr); + break; + case 8*16+8: + DCT_Upsample::P_Q<8, 8>::calc(P, Q, pSrc_ptr); + DCT_Upsample::R_S<8, 8>::calc(R, S, pSrc_ptr); + break; + default: + JPGD_ASSERT(false); + } + + DCT_Upsample::Matrix44 a(P + Q); P -= Q; + DCT_Upsample::Matrix44& b = P; + DCT_Upsample::Matrix44 c(R + S); R -= S; + DCT_Upsample::Matrix44& d = R; + + DCT_Upsample::Matrix44::add_and_store(temp_block, a, c); + idct_4x4(temp_block, pDst_ptr); + pDst_ptr += 64; + + DCT_Upsample::Matrix44::sub_and_store(temp_block, a, c); + idct_4x4(temp_block, pDst_ptr); + pDst_ptr += 64; + + DCT_Upsample::Matrix44::add_and_store(temp_block, b, d); + idct_4x4(temp_block, pDst_ptr); + pDst_ptr += 64; + + DCT_Upsample::Matrix44::sub_and_store(temp_block, b, d); + idct_4x4(temp_block, pDst_ptr); + pDst_ptr += 64; + + pSrc_ptr += 64; + } +} + +// Loads and dequantizes the next row of (already decoded) coefficients. +// Progressive images only. +void jpeg_decoder::load_next_row() +{ + int i; + jpgd_block_t *p; + jpgd_quant_t *q; + int mcu_row, mcu_block, row_block = 0; + int component_num, component_id; + int block_x_mcu[JPGD_MAX_COMPONENTS]; + + memset(block_x_mcu, 0, JPGD_MAX_COMPONENTS * sizeof(int)); + + for (mcu_row = 0; mcu_row < m_mcus_per_row; mcu_row++) + { + int block_x_mcu_ofs = 0, block_y_mcu_ofs = 0; + + for (mcu_block = 0; mcu_block < m_blocks_per_mcu; mcu_block++) + { + component_id = m_mcu_org[mcu_block]; + q = m_quant[m_comp_quant[component_id]]; + + p = m_pMCU_coefficients + 64 * mcu_block; + + jpgd_block_t* pAC = coeff_buf_getp(m_ac_coeffs[component_id], block_x_mcu[component_id] + block_x_mcu_ofs, m_block_y_mcu[component_id] + block_y_mcu_ofs); + jpgd_block_t* pDC = coeff_buf_getp(m_dc_coeffs[component_id], block_x_mcu[component_id] + block_x_mcu_ofs, m_block_y_mcu[component_id] + block_y_mcu_ofs); + p[0] = pDC[0]; + memcpy(&p[1], &pAC[1], 63 * sizeof(jpgd_block_t)); + + for (i = 63; i > 0; i--) + if (p[g_ZAG[i]]) + break; + + m_mcu_block_max_zag[mcu_block] = i + 1; + + for ( ; i >= 0; i--) + if (p[g_ZAG[i]]) + p[g_ZAG[i]] = static_cast(p[g_ZAG[i]] * q[i]); + + row_block++; + + if (m_comps_in_scan == 1) + block_x_mcu[component_id]++; + else + { + if (++block_x_mcu_ofs == m_comp_h_samp[component_id]) + { + block_x_mcu_ofs = 0; + + if (++block_y_mcu_ofs == m_comp_v_samp[component_id]) + { + block_y_mcu_ofs = 0; + + block_x_mcu[component_id] += m_comp_h_samp[component_id]; + } + } + } + } + + if (m_freq_domain_chroma_upsample) + transform_mcu_expand(mcu_row); + else + transform_mcu(mcu_row); + } + + if (m_comps_in_scan == 1) + m_block_y_mcu[m_comp_list[0]]++; + else + { + for (component_num = 0; component_num < m_comps_in_scan; component_num++) + { + component_id = m_comp_list[component_num]; + + m_block_y_mcu[component_id] += m_comp_v_samp[component_id]; + } + } +} + +// Restart interval processing. +void jpeg_decoder::process_restart() +{ + int i; + int c = 0; + + // Align to a byte boundry + // FIXME: Is this really necessary? get_bits_no_markers() never reads in markers! + //get_bits_no_markers(m_bits_left & 7); + + // Let's scan a little bit to find the marker, but not _too_ far. + // 1536 is a "fudge factor" that determines how much to scan. + for (i = 1536; i > 0; i--) + if (get_char() == 0xFF) + break; + + if (i == 0) + stop_decoding(JPGD_BAD_RESTART_MARKER); + + for ( ; i > 0; i--) + if ((c = get_char()) != 0xFF) + break; + + if (i == 0) + stop_decoding(JPGD_BAD_RESTART_MARKER); + + // Is it the expected marker? If not, something bad happened. + if (c != (m_next_restart_num + M_RST0)) + stop_decoding(JPGD_BAD_RESTART_MARKER); + + // Reset each component's DC prediction values. + memset(&m_last_dc_val, 0, m_comps_in_frame * sizeof(uint)); + + m_eob_run = 0; + + m_restarts_left = m_restart_interval; + + m_next_restart_num = (m_next_restart_num + 1) & 7; + + // Get the bit buffer going again... + + m_bits_left = 16; + get_bits_no_markers(16); + get_bits_no_markers(16); +} + +static inline int dequantize_ac(int c, int q) { c *= q; return c; } + +// Decodes and dequantizes the next row of coefficients. +void jpeg_decoder::decode_next_row() +{ + int row_block = 0; + + for (int mcu_row = 0; mcu_row < m_mcus_per_row; mcu_row++) + { + if ((m_restart_interval) && (m_restarts_left == 0)) + process_restart(); + + jpgd_block_t* p = m_pMCU_coefficients; + for (int mcu_block = 0; mcu_block < m_blocks_per_mcu; mcu_block++, p += 64) + { + int component_id = m_mcu_org[mcu_block]; + jpgd_quant_t* q = m_quant[m_comp_quant[component_id]]; + + int r, s; + s = huff_decode(m_pHuff_tabs[m_comp_dc_tab[component_id]], r); + s = JPGD_HUFF_EXTEND(r, s); + + m_last_dc_val[component_id] = (s += m_last_dc_val[component_id]); + + p[0] = static_cast(s * q[0]); + + int prev_num_set = m_mcu_block_max_zag[mcu_block]; + + huff_tables *pH = m_pHuff_tabs[m_comp_ac_tab[component_id]]; + + int k; + for (k = 1; k < 64; k++) + { + int extra_bits; + s = huff_decode(pH, extra_bits); + + r = s >> 4; + s &= 15; + + if (s) + { + if (r) + { + if ((k + r) > 63) + stop_decoding(JPGD_DECODE_ERROR); + + if (k < prev_num_set) + { + int n = JPGD_MIN(r, prev_num_set - k); + int kt = k; + while (n--) + p[g_ZAG[kt++]] = 0; + } + + k += r; + } + + s = JPGD_HUFF_EXTEND(extra_bits, s); + + JPGD_ASSERT(k < 64); + + p[g_ZAG[k]] = static_cast(dequantize_ac(s, q[k])); //s * q[k]; + } + else + { + if (r == 15) + { + if ((k + 16) > 64) + stop_decoding(JPGD_DECODE_ERROR); + + if (k < prev_num_set) + { + int n = JPGD_MIN(16, prev_num_set - k); + int kt = k; + while (n--) + { + JPGD_ASSERT(kt <= 63); + p[g_ZAG[kt++]] = 0; + } + } + + k += 16 - 1; // - 1 because the loop counter is k + JPGD_ASSERT(p[g_ZAG[k]] == 0); + } + else + break; + } + } + + if (k < prev_num_set) + { + int kt = k; + while (kt < prev_num_set) + p[g_ZAG[kt++]] = 0; + } + + m_mcu_block_max_zag[mcu_block] = k; + + row_block++; + } + + if (m_freq_domain_chroma_upsample) + transform_mcu_expand(mcu_row); + else + transform_mcu(mcu_row); + + m_restarts_left--; + } +} + +// YCbCr H1V1 (1x1:1:1, 3 m_blocks per MCU) to RGB +void jpeg_decoder::H1V1Convert() +{ + int row = m_max_mcu_y_size - m_mcu_lines_left; + uint8 *d = m_pScan_line_0; + uint8 *s = m_pSample_buf + row * 8; + + for (int i = m_max_mcus_per_row; i > 0; i--) + { + for (int j = 0; j < 8; j++) + { + int y = s[j]; + int cb = s[64+j]; + int cr = s[128+j]; + + d[0] = clamp(y + m_crr[cr]); + d[1] = clamp(y + ((m_crg[cr] + m_cbg[cb]) >> 16)); + d[2] = clamp(y + m_cbb[cb]); + d[3] = 255; + + d += 4; + } + + s += 64*3; + } +} + +// YCbCr H2V1 (2x1:1:1, 4 m_blocks per MCU) to RGB +void jpeg_decoder::H2V1Convert() +{ + int row = m_max_mcu_y_size - m_mcu_lines_left; + uint8 *d0 = m_pScan_line_0; + uint8 *y = m_pSample_buf + row * 8; + uint8 *c = m_pSample_buf + 2*64 + row * 8; + + for (int i = m_max_mcus_per_row; i > 0; i--) + { + for (int l = 0; l < 2; l++) + { + for (int j = 0; j < 4; j++) + { + int cb = c[0]; + int cr = c[64]; + + int rc = m_crr[cr]; + int gc = ((m_crg[cr] + m_cbg[cb]) >> 16); + int bc = m_cbb[cb]; + + int yy = y[j<<1]; + d0[0] = clamp(yy+rc); + d0[1] = clamp(yy+gc); + d0[2] = clamp(yy+bc); + d0[3] = 255; + + yy = y[(j<<1)+1]; + d0[4] = clamp(yy+rc); + d0[5] = clamp(yy+gc); + d0[6] = clamp(yy+bc); + d0[7] = 255; + + d0 += 8; + + c++; + } + y += 64; + } + + y += 64*4 - 64*2; + c += 64*4 - 8; + } +} + +// YCbCr H2V1 (1x2:1:1, 4 m_blocks per MCU) to RGB +void jpeg_decoder::H1V2Convert() +{ + int row = m_max_mcu_y_size - m_mcu_lines_left; + uint8 *d0 = m_pScan_line_0; + uint8 *d1 = m_pScan_line_1; + uint8 *y; + uint8 *c; + + if (row < 8) + y = m_pSample_buf + row * 8; + else + y = m_pSample_buf + 64*1 + (row & 7) * 8; + + c = m_pSample_buf + 64*2 + (row >> 1) * 8; + + for (int i = m_max_mcus_per_row; i > 0; i--) + { + for (int j = 0; j < 8; j++) + { + int cb = c[0+j]; + int cr = c[64+j]; + + int rc = m_crr[cr]; + int gc = ((m_crg[cr] + m_cbg[cb]) >> 16); + int bc = m_cbb[cb]; + + int yy = y[j]; + d0[0] = clamp(yy+rc); + d0[1] = clamp(yy+gc); + d0[2] = clamp(yy+bc); + d0[3] = 255; + + yy = y[8+j]; + d1[0] = clamp(yy+rc); + d1[1] = clamp(yy+gc); + d1[2] = clamp(yy+bc); + d1[3] = 255; + + d0 += 4; + d1 += 4; + } + + y += 64*4; + c += 64*4; + } +} + +// YCbCr H2V2 (2x2:1:1, 6 m_blocks per MCU) to RGB +void jpeg_decoder::H2V2Convert() +{ + int row = m_max_mcu_y_size - m_mcu_lines_left; + uint8 *d0 = m_pScan_line_0; + uint8 *d1 = m_pScan_line_1; + uint8 *y; + uint8 *c; + + if (row < 8) + y = m_pSample_buf + row * 8; + else + y = m_pSample_buf + 64*2 + (row & 7) * 8; + + c = m_pSample_buf + 64*4 + (row >> 1) * 8; + + for (int i = m_max_mcus_per_row; i > 0; i--) + { + for (int l = 0; l < 2; l++) + { + for (int j = 0; j < 8; j += 2) + { + int cb = c[0]; + int cr = c[64]; + + int rc = m_crr[cr]; + int gc = ((m_crg[cr] + m_cbg[cb]) >> 16); + int bc = m_cbb[cb]; + + int yy = y[j]; + d0[0] = clamp(yy+rc); + d0[1] = clamp(yy+gc); + d0[2] = clamp(yy+bc); + d0[3] = 255; + + yy = y[j+1]; + d0[4] = clamp(yy+rc); + d0[5] = clamp(yy+gc); + d0[6] = clamp(yy+bc); + d0[7] = 255; + + yy = y[j+8]; + d1[0] = clamp(yy+rc); + d1[1] = clamp(yy+gc); + d1[2] = clamp(yy+bc); + d1[3] = 255; + + yy = y[j+8+1]; + d1[4] = clamp(yy+rc); + d1[5] = clamp(yy+gc); + d1[6] = clamp(yy+bc); + d1[7] = 255; + + d0 += 8; + d1 += 8; + + c++; + } + y += 64; + } + + y += 64*6 - 64*2; + c += 64*6 - 8; + } +} + +// Y (1 block per MCU) to 8-bit grayscale +void jpeg_decoder::gray_convert() +{ + int row = m_max_mcu_y_size - m_mcu_lines_left; + uint8 *d = m_pScan_line_0; + uint8 *s = m_pSample_buf + row * 8; + + for (int i = m_max_mcus_per_row; i > 0; i--) + { + *(uint *)d = *(uint *)s; + *(uint *)(&d[4]) = *(uint *)(&s[4]); + + s += 64; + d += 8; + } +} + +void jpeg_decoder::expanded_convert() +{ + int row = m_max_mcu_y_size - m_mcu_lines_left; + + uint8* Py = m_pSample_buf + (row / 8) * 64 * m_comp_h_samp[0] + (row & 7) * 8; + + uint8* d = m_pScan_line_0; + + for (int i = m_max_mcus_per_row; i > 0; i--) + { + for (int k = 0; k < m_max_mcu_x_size; k += 8) + { + const int Y_ofs = k * 8; + const int Cb_ofs = Y_ofs + 64 * m_expanded_blocks_per_component; + const int Cr_ofs = Y_ofs + 64 * m_expanded_blocks_per_component * 2; + for (int j = 0; j < 8; j++) + { + int y = Py[Y_ofs + j]; + int cb = Py[Cb_ofs + j]; + int cr = Py[Cr_ofs + j]; + + d[0] = clamp(y + m_crr[cr]); + d[1] = clamp(y + ((m_crg[cr] + m_cbg[cb]) >> 16)); + d[2] = clamp(y + m_cbb[cb]); + d[3] = 255; + + d += 4; + } + } + + Py += 64 * m_expanded_blocks_per_mcu; + } +} + +// Find end of image (EOI) marker, so we can return to the user the exact size of the input stream. +void jpeg_decoder::find_eoi() +{ + if (!m_progressive_flag) + { + // Attempt to read the EOI marker. + //get_bits_no_markers(m_bits_left & 7); + + // Prime the bit buffer + m_bits_left = 16; + get_bits(16); + get_bits(16); + + // The next marker _should_ be EOI + process_markers(); + } + + m_total_bytes_read -= m_in_buf_left; +} + +int jpeg_decoder::decode(const void** pScan_line, uint* pScan_line_len) +{ + if ((m_error_code) || (!m_ready_flag)) + return JPGD_FAILED; + + if (m_total_lines_left == 0) + return JPGD_DONE; + + if (m_mcu_lines_left == 0) + { + if (setjmp(m_jmp_state)) + return JPGD_FAILED; + + if (m_progressive_flag) + load_next_row(); + else + decode_next_row(); + + // Find the EOI marker if that was the last row. + if (m_total_lines_left <= m_max_mcu_y_size) + find_eoi(); + + m_mcu_lines_left = m_max_mcu_y_size; + } + + if (m_freq_domain_chroma_upsample) + { + expanded_convert(); + *pScan_line = m_pScan_line_0; + } + else + { + switch (m_scan_type) + { + case JPGD_YH2V2: + { + if ((m_mcu_lines_left & 1) == 0) + { + H2V2Convert(); + *pScan_line = m_pScan_line_0; + } + else + *pScan_line = m_pScan_line_1; + + break; + } + case JPGD_YH2V1: + { + H2V1Convert(); + *pScan_line = m_pScan_line_0; + break; + } + case JPGD_YH1V2: + { + if ((m_mcu_lines_left & 1) == 0) + { + H1V2Convert(); + *pScan_line = m_pScan_line_0; + } + else + *pScan_line = m_pScan_line_1; + + break; + } + case JPGD_YH1V1: + { + H1V1Convert(); + *pScan_line = m_pScan_line_0; + break; + } + case JPGD_GRAYSCALE: + { + gray_convert(); + *pScan_line = m_pScan_line_0; + + break; + } + } + } + + *pScan_line_len = m_real_dest_bytes_per_scan_line; + + m_mcu_lines_left--; + m_total_lines_left--; + + return JPGD_SUCCESS; +} + +// Creates the tables needed for efficient Huffman decoding. +void jpeg_decoder::make_huff_table(int index, huff_tables *pH) +{ + int p, i, l, si; + uint8 huffsize[257]; + uint huffcode[257]; + uint code; + uint subtree; + int code_size; + int lastp; + int nextfreeentry; + int currententry; + + pH->ac_table = m_huff_ac[index] != 0; + + p = 0; + + for (l = 1; l <= 16; l++) + { + for (i = 1; i <= m_huff_num[index][l]; i++) + huffsize[p++] = static_cast(l); + } + + huffsize[p] = 0; + + lastp = p; + + code = 0; + si = huffsize[0]; + p = 0; + + while (huffsize[p]) + { + while (huffsize[p] == si) + { + huffcode[p++] = code; + code++; + } + + code <<= 1; + si++; + } + + memset(pH->look_up, 0, sizeof(pH->look_up)); + memset(pH->look_up2, 0, sizeof(pH->look_up2)); + memset(pH->tree, 0, sizeof(pH->tree)); + memset(pH->code_size, 0, sizeof(pH->code_size)); + + nextfreeentry = -1; + + p = 0; + + while (p < lastp) + { + i = m_huff_val[index][p]; + code = huffcode[p]; + code_size = huffsize[p]; + + pH->code_size[i] = static_cast(code_size); + + if (code_size <= 8) + { + code <<= (8 - code_size); + + for (l = 1 << (8 - code_size); l > 0; l--) + { + JPGD_ASSERT(i < 256); + + pH->look_up[code] = i; + + bool has_extrabits = false; + int extra_bits = 0; + int num_extra_bits = i & 15; + + int bits_to_fetch = code_size; + if (num_extra_bits) + { + int total_codesize = code_size + num_extra_bits; + if (total_codesize <= 8) + { + has_extrabits = true; + extra_bits = ((1 << num_extra_bits) - 1) & (code >> (8 - total_codesize)); + JPGD_ASSERT(extra_bits <= 0x7FFF); + bits_to_fetch += num_extra_bits; + } + } + + if (!has_extrabits) + pH->look_up2[code] = i | (bits_to_fetch << 8); + else + pH->look_up2[code] = i | 0x8000 | (extra_bits << 16) | (bits_to_fetch << 8); + + code++; + } + } + else + { + subtree = (code >> (code_size - 8)) & 0xFF; + + currententry = pH->look_up[subtree]; + + if (currententry == 0) + { + pH->look_up[subtree] = currententry = nextfreeentry; + pH->look_up2[subtree] = currententry = nextfreeentry; + + nextfreeentry -= 2; + } + + code <<= (16 - (code_size - 8)); + + for (l = code_size; l > 9; l--) + { + if ((code & 0x8000) == 0) + currententry--; + + if (pH->tree[-currententry - 1] == 0) + { + pH->tree[-currententry - 1] = nextfreeentry; + + currententry = nextfreeentry; + + nextfreeentry -= 2; + } + else + currententry = pH->tree[-currententry - 1]; + + code <<= 1; + } + + if ((code & 0x8000) == 0) + currententry--; + + pH->tree[-currententry - 1] = i; + } + + p++; + } +} + +// Verifies the quantization tables needed for this scan are available. +void jpeg_decoder::check_quant_tables() +{ + for (int i = 0; i < m_comps_in_scan; i++) + if (m_quant[m_comp_quant[m_comp_list[i]]] == NULL) + stop_decoding(JPGD_UNDEFINED_QUANT_TABLE); +} + +// Verifies that all the Huffman tables needed for this scan are available. +void jpeg_decoder::check_huff_tables() +{ + for (int i = 0; i < m_comps_in_scan; i++) + { + if ((m_spectral_start == 0) && (m_huff_num[m_comp_dc_tab[m_comp_list[i]]] == NULL)) + stop_decoding(JPGD_UNDEFINED_HUFF_TABLE); + + if ((m_spectral_end > 0) && (m_huff_num[m_comp_ac_tab[m_comp_list[i]]] == NULL)) + stop_decoding(JPGD_UNDEFINED_HUFF_TABLE); + } + + for (int i = 0; i < JPGD_MAX_HUFF_TABLES; i++) + if (m_huff_num[i]) + { + if (!m_pHuff_tabs[i]) + m_pHuff_tabs[i] = (huff_tables *)alloc(sizeof(huff_tables)); + + make_huff_table(i, m_pHuff_tabs[i]); + } +} + +// Determines the component order inside each MCU. +// Also calcs how many MCU's are on each row, etc. +void jpeg_decoder::calc_mcu_block_order() +{ + int component_num, component_id; + int max_h_samp = 0, max_v_samp = 0; + + for (component_id = 0; component_id < m_comps_in_frame; component_id++) + { + if (m_comp_h_samp[component_id] > max_h_samp) + max_h_samp = m_comp_h_samp[component_id]; + + if (m_comp_v_samp[component_id] > max_v_samp) + max_v_samp = m_comp_v_samp[component_id]; + } + + for (component_id = 0; component_id < m_comps_in_frame; component_id++) + { + m_comp_h_blocks[component_id] = ((((m_image_x_size * m_comp_h_samp[component_id]) + (max_h_samp - 1)) / max_h_samp) + 7) / 8; + m_comp_v_blocks[component_id] = ((((m_image_y_size * m_comp_v_samp[component_id]) + (max_v_samp - 1)) / max_v_samp) + 7) / 8; + } + + if (m_comps_in_scan == 1) + { + m_mcus_per_row = m_comp_h_blocks[m_comp_list[0]]; + m_mcus_per_col = m_comp_v_blocks[m_comp_list[0]]; + } + else + { + m_mcus_per_row = (((m_image_x_size + 7) / 8) + (max_h_samp - 1)) / max_h_samp; + m_mcus_per_col = (((m_image_y_size + 7) / 8) + (max_v_samp - 1)) / max_v_samp; + } + + if (m_comps_in_scan == 1) + { + m_mcu_org[0] = m_comp_list[0]; + + m_blocks_per_mcu = 1; + } + else + { + m_blocks_per_mcu = 0; + + for (component_num = 0; component_num < m_comps_in_scan; component_num++) + { + int num_blocks; + + component_id = m_comp_list[component_num]; + + num_blocks = m_comp_h_samp[component_id] * m_comp_v_samp[component_id]; + + while (num_blocks--) + m_mcu_org[m_blocks_per_mcu++] = component_id; + } + } +} + +// Starts a new scan. +int jpeg_decoder::init_scan() +{ + if (!locate_sos_marker()) + return JPGD_FALSE; + + calc_mcu_block_order(); + + check_huff_tables(); + + check_quant_tables(); + + memset(m_last_dc_val, 0, m_comps_in_frame * sizeof(uint)); + + m_eob_run = 0; + + if (m_restart_interval) + { + m_restarts_left = m_restart_interval; + m_next_restart_num = 0; + } + + fix_in_buffer(); + + return JPGD_TRUE; +} + +// Starts a frame. Determines if the number of components or sampling factors +// are supported. +void jpeg_decoder::init_frame() +{ + int i; + + if (m_comps_in_frame == 1) + { + if ((m_comp_h_samp[0] != 1) || (m_comp_v_samp[0] != 1)) + stop_decoding(JPGD_UNSUPPORTED_SAMP_FACTORS); + + m_scan_type = JPGD_GRAYSCALE; + m_max_blocks_per_mcu = 1; + m_max_mcu_x_size = 8; + m_max_mcu_y_size = 8; + } + else if (m_comps_in_frame == 3) + { + if ( ((m_comp_h_samp[1] != 1) || (m_comp_v_samp[1] != 1)) || + ((m_comp_h_samp[2] != 1) || (m_comp_v_samp[2] != 1)) ) + stop_decoding(JPGD_UNSUPPORTED_SAMP_FACTORS); + + if ((m_comp_h_samp[0] == 1) && (m_comp_v_samp[0] == 1)) + { + m_scan_type = JPGD_YH1V1; + + m_max_blocks_per_mcu = 3; + m_max_mcu_x_size = 8; + m_max_mcu_y_size = 8; + } + else if ((m_comp_h_samp[0] == 2) && (m_comp_v_samp[0] == 1)) + { + m_scan_type = JPGD_YH2V1; + m_max_blocks_per_mcu = 4; + m_max_mcu_x_size = 16; + m_max_mcu_y_size = 8; + } + else if ((m_comp_h_samp[0] == 1) && (m_comp_v_samp[0] == 2)) + { + m_scan_type = JPGD_YH1V2; + m_max_blocks_per_mcu = 4; + m_max_mcu_x_size = 8; + m_max_mcu_y_size = 16; + } + else if ((m_comp_h_samp[0] == 2) && (m_comp_v_samp[0] == 2)) + { + m_scan_type = JPGD_YH2V2; + m_max_blocks_per_mcu = 6; + m_max_mcu_x_size = 16; + m_max_mcu_y_size = 16; + } + else + stop_decoding(JPGD_UNSUPPORTED_SAMP_FACTORS); + } + else + stop_decoding(JPGD_UNSUPPORTED_COLORSPACE); + + m_max_mcus_per_row = (m_image_x_size + (m_max_mcu_x_size - 1)) / m_max_mcu_x_size; + m_max_mcus_per_col = (m_image_y_size + (m_max_mcu_y_size - 1)) / m_max_mcu_y_size; + + // These values are for the *destination* pixels: after conversion. + if (m_scan_type == JPGD_GRAYSCALE) + m_dest_bytes_per_pixel = 1; + else + m_dest_bytes_per_pixel = 4; + + m_dest_bytes_per_scan_line = ((m_image_x_size + 15) & 0xFFF0) * m_dest_bytes_per_pixel; + + m_real_dest_bytes_per_scan_line = (m_image_x_size * m_dest_bytes_per_pixel); + + // Initialize two scan line buffers. + m_pScan_line_0 = (uint8 *)alloc(m_dest_bytes_per_scan_line, true); + if ((m_scan_type == JPGD_YH1V2) || (m_scan_type == JPGD_YH2V2)) + m_pScan_line_1 = (uint8 *)alloc(m_dest_bytes_per_scan_line, true); + + m_max_blocks_per_row = m_max_mcus_per_row * m_max_blocks_per_mcu; + + // Should never happen + if (m_max_blocks_per_row > JPGD_MAX_BLOCKS_PER_ROW) + stop_decoding(JPGD_ASSERTION_ERROR); + + // Allocate the coefficient buffer, enough for one MCU + m_pMCU_coefficients = (jpgd_block_t*)alloc(m_max_blocks_per_mcu * 64 * sizeof(jpgd_block_t)); + + for (i = 0; i < m_max_blocks_per_mcu; i++) + m_mcu_block_max_zag[i] = 64; + + m_expanded_blocks_per_component = m_comp_h_samp[0] * m_comp_v_samp[0]; + m_expanded_blocks_per_mcu = m_expanded_blocks_per_component * m_comps_in_frame; + m_expanded_blocks_per_row = m_max_mcus_per_row * m_expanded_blocks_per_mcu; + // Freq. domain chroma upsampling is only supported for H2V2 subsampling factor (the most common one I've seen). + m_freq_domain_chroma_upsample = false; +#if JPGD_SUPPORT_FREQ_DOMAIN_UPSAMPLING + m_freq_domain_chroma_upsample = (m_expanded_blocks_per_mcu == 4*3); +#endif + + if (m_freq_domain_chroma_upsample) + m_pSample_buf = (uint8 *)alloc(m_expanded_blocks_per_row * 64); + else + m_pSample_buf = (uint8 *)alloc(m_max_blocks_per_row * 64); + + m_total_lines_left = m_image_y_size; + + m_mcu_lines_left = 0; + + create_look_ups(); +} + +// The coeff_buf series of methods originally stored the coefficients +// into a "virtual" file which was located in EMS, XMS, or a disk file. A cache +// was used to make this process more efficient. Now, we can store the entire +// thing in RAM. +jpeg_decoder::coeff_buf* jpeg_decoder::coeff_buf_open(int block_num_x, int block_num_y, int block_len_x, int block_len_y) +{ + coeff_buf* cb = (coeff_buf*)alloc(sizeof(coeff_buf)); + + cb->block_num_x = block_num_x; + cb->block_num_y = block_num_y; + cb->block_len_x = block_len_x; + cb->block_len_y = block_len_y; + cb->block_size = (block_len_x * block_len_y) * sizeof(jpgd_block_t); + cb->pData = (uint8 *)alloc(cb->block_size * block_num_x * block_num_y, true); + return cb; +} + +inline jpgd_block_t *jpeg_decoder::coeff_buf_getp(coeff_buf *cb, int block_x, int block_y) +{ + JPGD_ASSERT((block_x < cb->block_num_x) && (block_y < cb->block_num_y)); + return (jpgd_block_t *)(cb->pData + block_x * cb->block_size + block_y * (cb->block_size * cb->block_num_x)); +} + +// The following methods decode the various types of m_blocks encountered +// in progressively encoded images. +void jpeg_decoder::decode_block_dc_first(jpeg_decoder *pD, int component_id, int block_x, int block_y) +{ + int s, r; + jpgd_block_t *p = pD->coeff_buf_getp(pD->m_dc_coeffs[component_id], block_x, block_y); + + if ((s = pD->huff_decode(pD->m_pHuff_tabs[pD->m_comp_dc_tab[component_id]])) != 0) + { + r = pD->get_bits_no_markers(s); + s = JPGD_HUFF_EXTEND(r, s); + } + + pD->m_last_dc_val[component_id] = (s += pD->m_last_dc_val[component_id]); + + p[0] = static_cast(s << pD->m_successive_low); +} + +void jpeg_decoder::decode_block_dc_refine(jpeg_decoder *pD, int component_id, int block_x, int block_y) +{ + if (pD->get_bits_no_markers(1)) + { + jpgd_block_t *p = pD->coeff_buf_getp(pD->m_dc_coeffs[component_id], block_x, block_y); + + p[0] |= (1 << pD->m_successive_low); + } +} + +void jpeg_decoder::decode_block_ac_first(jpeg_decoder *pD, int component_id, int block_x, int block_y) +{ + int k, s, r; + + if (pD->m_eob_run) + { + pD->m_eob_run--; + return; + } + + jpgd_block_t *p = pD->coeff_buf_getp(pD->m_ac_coeffs[component_id], block_x, block_y); + + for (k = pD->m_spectral_start; k <= pD->m_spectral_end; k++) + { + s = pD->huff_decode(pD->m_pHuff_tabs[pD->m_comp_ac_tab[component_id]]); + + r = s >> 4; + s &= 15; + + if (s) + { + if ((k += r) > 63) + pD->stop_decoding(JPGD_DECODE_ERROR); + + r = pD->get_bits_no_markers(s); + s = JPGD_HUFF_EXTEND(r, s); + + p[g_ZAG[k]] = static_cast(s << pD->m_successive_low); + } + else + { + if (r == 15) + { + if ((k += 15) > 63) + pD->stop_decoding(JPGD_DECODE_ERROR); + } + else + { + pD->m_eob_run = 1 << r; + + if (r) + pD->m_eob_run += pD->get_bits_no_markers(r); + + pD->m_eob_run--; + + break; + } + } + } +} + +void jpeg_decoder::decode_block_ac_refine(jpeg_decoder *pD, int component_id, int block_x, int block_y) +{ + int s, k, r; + int p1 = 1 << pD->m_successive_low; + int m1 = (-1) << pD->m_successive_low; + jpgd_block_t *p = pD->coeff_buf_getp(pD->m_ac_coeffs[component_id], block_x, block_y); + + JPGD_ASSERT(pD->m_spectral_end <= 63); + + k = pD->m_spectral_start; + + if (pD->m_eob_run == 0) + { + for ( ; k <= pD->m_spectral_end; k++) + { + s = pD->huff_decode(pD->m_pHuff_tabs[pD->m_comp_ac_tab[component_id]]); + + r = s >> 4; + s &= 15; + + if (s) + { + if (s != 1) + pD->stop_decoding(JPGD_DECODE_ERROR); + + if (pD->get_bits_no_markers(1)) + s = p1; + else + s = m1; + } + else + { + if (r != 15) + { + pD->m_eob_run = 1 << r; + + if (r) + pD->m_eob_run += pD->get_bits_no_markers(r); + + break; + } + } + + do + { + jpgd_block_t *this_coef = p + g_ZAG[k & 63]; + + if (*this_coef != 0) + { + if (pD->get_bits_no_markers(1)) + { + if ((*this_coef & p1) == 0) + { + if (*this_coef >= 0) + *this_coef = static_cast(*this_coef + p1); + else + *this_coef = static_cast(*this_coef + m1); + } + } + } + else + { + if (--r < 0) + break; + } + + k++; + + } while (k <= pD->m_spectral_end); + + if ((s) && (k < 64)) + { + p[g_ZAG[k]] = static_cast(s); + } + } + } + + if (pD->m_eob_run > 0) + { + for ( ; k <= pD->m_spectral_end; k++) + { + jpgd_block_t *this_coef = p + g_ZAG[k & 63]; // logical AND to shut up static code analysis + + if (*this_coef != 0) + { + if (pD->get_bits_no_markers(1)) + { + if ((*this_coef & p1) == 0) + { + if (*this_coef >= 0) + *this_coef = static_cast(*this_coef + p1); + else + *this_coef = static_cast(*this_coef + m1); + } + } + } + } + + pD->m_eob_run--; + } +} + +// Decode a scan in a progressively encoded image. +void jpeg_decoder::decode_scan(pDecode_block_func decode_block_func) +{ + int mcu_row, mcu_col, mcu_block; + int block_x_mcu[JPGD_MAX_COMPONENTS], m_block_y_mcu[JPGD_MAX_COMPONENTS]; + + memset(m_block_y_mcu, 0, sizeof(m_block_y_mcu)); + + for (mcu_col = 0; mcu_col < m_mcus_per_col; mcu_col++) + { + int component_num, component_id; + + memset(block_x_mcu, 0, sizeof(block_x_mcu)); + + for (mcu_row = 0; mcu_row < m_mcus_per_row; mcu_row++) + { + int block_x_mcu_ofs = 0, block_y_mcu_ofs = 0; + + if ((m_restart_interval) && (m_restarts_left == 0)) + process_restart(); + + for (mcu_block = 0; mcu_block < m_blocks_per_mcu; mcu_block++) + { + component_id = m_mcu_org[mcu_block]; + + decode_block_func(this, component_id, block_x_mcu[component_id] + block_x_mcu_ofs, m_block_y_mcu[component_id] + block_y_mcu_ofs); + + if (m_comps_in_scan == 1) + block_x_mcu[component_id]++; + else + { + if (++block_x_mcu_ofs == m_comp_h_samp[component_id]) + { + block_x_mcu_ofs = 0; + + if (++block_y_mcu_ofs == m_comp_v_samp[component_id]) + { + block_y_mcu_ofs = 0; + block_x_mcu[component_id] += m_comp_h_samp[component_id]; + } + } + } + } + + m_restarts_left--; + } + + if (m_comps_in_scan == 1) + m_block_y_mcu[m_comp_list[0]]++; + else + { + for (component_num = 0; component_num < m_comps_in_scan; component_num++) + { + component_id = m_comp_list[component_num]; + m_block_y_mcu[component_id] += m_comp_v_samp[component_id]; + } + } + } +} + +// Decode a progressively encoded image. +void jpeg_decoder::init_progressive() +{ + int i; + + if (m_comps_in_frame == 4) + stop_decoding(JPGD_UNSUPPORTED_COLORSPACE); + + // Allocate the coefficient buffers. + for (i = 0; i < m_comps_in_frame; i++) + { + m_dc_coeffs[i] = coeff_buf_open(m_max_mcus_per_row * m_comp_h_samp[i], m_max_mcus_per_col * m_comp_v_samp[i], 1, 1); + m_ac_coeffs[i] = coeff_buf_open(m_max_mcus_per_row * m_comp_h_samp[i], m_max_mcus_per_col * m_comp_v_samp[i], 8, 8); + } + + for ( ; ; ) + { + int dc_only_scan, refinement_scan; + pDecode_block_func decode_block_func; + + if (!init_scan()) + break; + + dc_only_scan = (m_spectral_start == 0); + refinement_scan = (m_successive_high != 0); + + if ((m_spectral_start > m_spectral_end) || (m_spectral_end > 63)) + stop_decoding(JPGD_BAD_SOS_SPECTRAL); + + if (dc_only_scan) + { + if (m_spectral_end) + stop_decoding(JPGD_BAD_SOS_SPECTRAL); + } + else if (m_comps_in_scan != 1) /* AC scans can only contain one component */ + stop_decoding(JPGD_BAD_SOS_SPECTRAL); + + if ((refinement_scan) && (m_successive_low != m_successive_high - 1)) + stop_decoding(JPGD_BAD_SOS_SUCCESSIVE); + + if (dc_only_scan) + { + if (refinement_scan) + decode_block_func = decode_block_dc_refine; + else + decode_block_func = decode_block_dc_first; + } + else + { + if (refinement_scan) + decode_block_func = decode_block_ac_refine; + else + decode_block_func = decode_block_ac_first; + } + + decode_scan(decode_block_func); + + m_bits_left = 16; + get_bits(16); + get_bits(16); + } + + m_comps_in_scan = m_comps_in_frame; + + for (i = 0; i < m_comps_in_frame; i++) + m_comp_list[i] = i; + + calc_mcu_block_order(); +} + +void jpeg_decoder::init_sequential() +{ + if (!init_scan()) + stop_decoding(JPGD_UNEXPECTED_MARKER); +} + +void jpeg_decoder::decode_start() +{ + init_frame(); + + if (m_progressive_flag) + init_progressive(); + else + init_sequential(); +} + +void jpeg_decoder::decode_init(jpeg_decoder_stream *pStream) +{ + init(pStream); + locate_sof_marker(); +} + +jpeg_decoder::jpeg_decoder(jpeg_decoder_stream *pStream) +{ + if (setjmp(m_jmp_state)) + return; + decode_init(pStream); +} + +int jpeg_decoder::begin_decoding() +{ + if (m_ready_flag) + return JPGD_SUCCESS; + + if (m_error_code) + return JPGD_FAILED; + + if (setjmp(m_jmp_state)) + return JPGD_FAILED; + + decode_start(); + + m_ready_flag = true; + + return JPGD_SUCCESS; +} + +jpeg_decoder::~jpeg_decoder() +{ + free_all_blocks(); +} + +jpeg_decoder_file_stream::jpeg_decoder_file_stream() +{ + m_pFile = NULL; + m_eof_flag = false; + m_error_flag = false; +} + +void jpeg_decoder_file_stream::close() +{ + if (m_pFile) + { + fclose(m_pFile); + m_pFile = NULL; + } + + m_eof_flag = false; + m_error_flag = false; +} + +jpeg_decoder_file_stream::~jpeg_decoder_file_stream() +{ + close(); +} + +bool jpeg_decoder_file_stream::open(const char *Pfilename) +{ + close(); + + m_eof_flag = false; + m_error_flag = false; + +#if defined(_MSC_VER) + m_pFile = NULL; + fopen_s(&m_pFile, Pfilename, "rb"); +#else + m_pFile = fopen(Pfilename, "rb"); +#endif + return m_pFile != NULL; +} + +int jpeg_decoder_file_stream::read(uint8 *pBuf, int max_bytes_to_read, bool *pEOF_flag) +{ + if (!m_pFile) + return -1; + + if (m_eof_flag) + { + *pEOF_flag = true; + return 0; + } + + if (m_error_flag) + return -1; + + int bytes_read = static_cast(fread(pBuf, 1, max_bytes_to_read, m_pFile)); + if (bytes_read < max_bytes_to_read) + { + if (ferror(m_pFile)) + { + m_error_flag = true; + return -1; + } + + m_eof_flag = true; + *pEOF_flag = true; + } + + return bytes_read; +} + +bool jpeg_decoder_mem_stream::open(const uint8 *pSrc_data, uint size) +{ + close(); + m_pSrc_data = pSrc_data; + m_ofs = 0; + m_size = size; + return true; +} + +int jpeg_decoder_mem_stream::read(uint8 *pBuf, int max_bytes_to_read, bool *pEOF_flag) +{ + *pEOF_flag = false; + + if (!m_pSrc_data) + return -1; + + uint bytes_remaining = m_size - m_ofs; + if ((uint)max_bytes_to_read > bytes_remaining) + { + max_bytes_to_read = bytes_remaining; + *pEOF_flag = true; + } + + memcpy(pBuf, m_pSrc_data + m_ofs, max_bytes_to_read); + m_ofs += max_bytes_to_read; + + return max_bytes_to_read; +} + +unsigned char *decompress_jpeg_image_from_stream(jpeg_decoder_stream *pStream, int *width, int *height, int *actual_comps, int req_comps) +{ + if (!actual_comps) + return NULL; + *actual_comps = 0; + + if ((!pStream) || (!width) || (!height) || (!req_comps)) + return NULL; + + if ((req_comps != 1) && (req_comps != 3) && (req_comps != 4)) + return NULL; + + jpeg_decoder decoder(pStream); + if (decoder.get_error_code() != JPGD_SUCCESS) + return NULL; + + const int image_width = decoder.get_width(), image_height = decoder.get_height(); + *width = image_width; + *height = image_height; + *actual_comps = decoder.get_num_components(); + + if (decoder.begin_decoding() != JPGD_SUCCESS) + return NULL; + + const int dst_bpl = image_width * req_comps; + + uint8 *pImage_data = (uint8*)jpgd_malloc(dst_bpl * image_height); + if (!pImage_data) + return NULL; + + for (int y = 0; y < image_height; y++) + { + const uint8* pScan_line; + uint scan_line_len; + if (decoder.decode((const void**)&pScan_line, &scan_line_len) != JPGD_SUCCESS) + { + jpgd_free(pImage_data); + return NULL; + } + + uint8 *pDst = pImage_data + y * dst_bpl; + + if (((req_comps == 1) && (decoder.get_num_components() == 1)) || ((req_comps == 4) && (decoder.get_num_components() == 3))) + memcpy(pDst, pScan_line, dst_bpl); + else if (decoder.get_num_components() == 1) + { + if (req_comps == 3) + { + for (int x = 0; x < image_width; x++) + { + uint8 luma = pScan_line[x]; + pDst[0] = luma; + pDst[1] = luma; + pDst[2] = luma; + pDst += 3; + } + } + else + { + for (int x = 0; x < image_width; x++) + { + uint8 luma = pScan_line[x]; + pDst[0] = luma; + pDst[1] = luma; + pDst[2] = luma; + pDst[3] = 255; + pDst += 4; + } + } + } + else if (decoder.get_num_components() == 3) + { + if (req_comps == 1) + { + const int YR = 19595, YG = 38470, YB = 7471; + for (int x = 0; x < image_width; x++) + { + int r = pScan_line[x*4+0]; + int g = pScan_line[x*4+1]; + int b = pScan_line[x*4+2]; + *pDst++ = static_cast((r * YR + g * YG + b * YB + 32768) >> 16); + } + } + else + { + for (int x = 0; x < image_width; x++) + { + pDst[0] = pScan_line[x*4+0]; + pDst[1] = pScan_line[x*4+1]; + pDst[2] = pScan_line[x*4+2]; + pDst += 3; + } + } + } + } + + return pImage_data; +} + +unsigned char *decompress_jpeg_image_from_memory(const unsigned char *pSrc_data, int src_data_size, int *width, int *height, int *actual_comps, int req_comps) +{ + jpgd::jpeg_decoder_mem_stream mem_stream(pSrc_data, src_data_size); + return decompress_jpeg_image_from_stream(&mem_stream, width, height, actual_comps, req_comps); +} + +unsigned char *decompress_jpeg_image_from_file(const char *pSrc_filename, int *width, int *height, int *actual_comps, int req_comps) +{ + jpgd::jpeg_decoder_file_stream file_stream; + if (!file_stream.open(pSrc_filename)) + return NULL; + return decompress_jpeg_image_from_stream(&file_stream, width, height, actual_comps, req_comps); +} + +} // namespace jpgd \ No newline at end of file diff --git a/libs/jpeg/jpgd.h b/libs/jpeg/jpgd.h new file mode 100644 index 0000000..46069a1 --- /dev/null +++ b/libs/jpeg/jpgd.h @@ -0,0 +1,319 @@ +// jpgd.h - C++ class for JPEG decompression. +// Public domain, Rich Geldreich +#ifndef JPEG_DECODER_H +#define JPEG_DECODER_H + +#include +#include +#include + +#ifdef _MSC_VER + #define JPGD_NORETURN __declspec(noreturn) +#elif defined(__GNUC__) + #define JPGD_NORETURN __attribute__ ((noreturn)) +#else + #define JPGD_NORETURN +#endif + +namespace jpgd +{ + typedef unsigned char uint8; + typedef signed short int16; + typedef unsigned short uint16; + typedef unsigned int uint; + typedef signed int int32; + + // Loads a JPEG image from a memory buffer or a file. + // req_comps can be 1 (grayscale), 3 (RGB), or 4 (RGBA). + // On return, width/height will be set to the image's dimensions, and actual_comps will be set to the either 1 (grayscale) or 3 (RGB). + // Notes: For more control over where and how the source data is read, see the decompress_jpeg_image_from_stream() function below, or call the jpeg_decoder class directly. + // Requesting a 8 or 32bpp image is currently a little faster than 24bpp because the jpeg_decoder class itself currently always unpacks to either 8 or 32bpp. + unsigned char *decompress_jpeg_image_from_memory(const unsigned char *pSrc_data, int src_data_size, int *width, int *height, int *actual_comps, int req_comps); + unsigned char *decompress_jpeg_image_from_file(const char *pSrc_filename, int *width, int *height, int *actual_comps, int req_comps); + + // Success/failure error codes. + enum jpgd_status + { + JPGD_SUCCESS = 0, JPGD_FAILED = -1, JPGD_DONE = 1, + JPGD_BAD_DHT_COUNTS = -256, JPGD_BAD_DHT_INDEX, JPGD_BAD_DHT_MARKER, JPGD_BAD_DQT_MARKER, JPGD_BAD_DQT_TABLE, + JPGD_BAD_PRECISION, JPGD_BAD_HEIGHT, JPGD_BAD_WIDTH, JPGD_TOO_MANY_COMPONENTS, + JPGD_BAD_SOF_LENGTH, JPGD_BAD_VARIABLE_MARKER, JPGD_BAD_DRI_LENGTH, JPGD_BAD_SOS_LENGTH, + JPGD_BAD_SOS_COMP_ID, JPGD_W_EXTRA_BYTES_BEFORE_MARKER, JPGD_NO_ARITHMITIC_SUPPORT, JPGD_UNEXPECTED_MARKER, + JPGD_NOT_JPEG, JPGD_UNSUPPORTED_MARKER, JPGD_BAD_DQT_LENGTH, JPGD_TOO_MANY_BLOCKS, + JPGD_UNDEFINED_QUANT_TABLE, JPGD_UNDEFINED_HUFF_TABLE, JPGD_NOT_SINGLE_SCAN, JPGD_UNSUPPORTED_COLORSPACE, + JPGD_UNSUPPORTED_SAMP_FACTORS, JPGD_DECODE_ERROR, JPGD_BAD_RESTART_MARKER, JPGD_ASSERTION_ERROR, + JPGD_BAD_SOS_SPECTRAL, JPGD_BAD_SOS_SUCCESSIVE, JPGD_STREAM_READ, JPGD_NOTENOUGHMEM + }; + + // Input stream interface. + // Derive from this class to read input data from sources other than files or memory. Set m_eof_flag to true when no more data is available. + // The decoder is rather greedy: it will keep on calling this method until its internal input buffer is full, or until the EOF flag is set. + // It the input stream contains data after the JPEG stream's EOI (end of image) marker it will probably be pulled into the internal buffer. + // Call the get_total_bytes_read() method to determine the actual size of the JPEG stream after successful decoding. + class jpeg_decoder_stream + { + public: + jpeg_decoder_stream() { } + virtual ~jpeg_decoder_stream() { } + + // The read() method is called when the internal input buffer is empty. + // Parameters: + // pBuf - input buffer + // max_bytes_to_read - maximum bytes that can be written to pBuf + // pEOF_flag - set this to true if at end of stream (no more bytes remaining) + // Returns -1 on error, otherwise return the number of bytes actually written to the buffer (which may be 0). + // Notes: This method will be called in a loop until you set *pEOF_flag to true or the internal buffer is full. + virtual int read(uint8 *pBuf, int max_bytes_to_read, bool *pEOF_flag) = 0; + }; + + // stdio FILE stream class. + class jpeg_decoder_file_stream : public jpeg_decoder_stream + { + jpeg_decoder_file_stream(const jpeg_decoder_file_stream &); + jpeg_decoder_file_stream &operator =(const jpeg_decoder_file_stream &); + + FILE *m_pFile; + bool m_eof_flag, m_error_flag; + + public: + jpeg_decoder_file_stream(); + virtual ~jpeg_decoder_file_stream(); + + bool open(const char *Pfilename); + void close(); + + virtual int read(uint8 *pBuf, int max_bytes_to_read, bool *pEOF_flag); + }; + + // Memory stream class. + class jpeg_decoder_mem_stream : public jpeg_decoder_stream + { + const uint8 *m_pSrc_data; + uint m_ofs, m_size; + + public: + jpeg_decoder_mem_stream() : m_pSrc_data(NULL), m_ofs(0), m_size(0) { } + jpeg_decoder_mem_stream(const uint8 *pSrc_data, uint size) : m_pSrc_data(pSrc_data), m_ofs(0), m_size(size) { } + + virtual ~jpeg_decoder_mem_stream() { } + + bool open(const uint8 *pSrc_data, uint size); + void close() { m_pSrc_data = NULL; m_ofs = 0; m_size = 0; } + + virtual int read(uint8 *pBuf, int max_bytes_to_read, bool *pEOF_flag); + }; + + // Loads JPEG file from a jpeg_decoder_stream. + unsigned char *decompress_jpeg_image_from_stream(jpeg_decoder_stream *pStream, int *width, int *height, int *actual_comps, int req_comps); + + enum + { + JPGD_IN_BUF_SIZE = 8192, JPGD_MAX_BLOCKS_PER_MCU = 10, JPGD_MAX_HUFF_TABLES = 8, JPGD_MAX_QUANT_TABLES = 4, + JPGD_MAX_COMPONENTS = 4, JPGD_MAX_COMPS_IN_SCAN = 4, JPGD_MAX_BLOCKS_PER_ROW = 8192, JPGD_MAX_HEIGHT = 16384, JPGD_MAX_WIDTH = 16384 + }; + + typedef int16 jpgd_quant_t; + typedef int16 jpgd_block_t; + + class jpeg_decoder + { + public: + // Call get_error_code() after constructing to determine if the stream is valid or not. You may call the get_width(), get_height(), etc. + // methods after the constructor is called. You may then either destruct the object, or begin decoding the image by calling begin_decoding(), then decode() on each scanline. + jpeg_decoder(jpeg_decoder_stream *pStream); + + ~jpeg_decoder(); + + // Call this method after constructing the object to begin decompression. + // If JPGD_SUCCESS is returned you may then call decode() on each scanline. + int begin_decoding(); + + // Returns the next scan line. + // For grayscale images, pScan_line will point to a buffer containing 8-bit pixels (get_bytes_per_pixel() will return 1). + // Otherwise, it will always point to a buffer containing 32-bit RGBA pixels (A will always be 255, and get_bytes_per_pixel() will return 4). + // Returns JPGD_SUCCESS if a scan line has been returned. + // Returns JPGD_DONE if all scan lines have been returned. + // Returns JPGD_FAILED if an error occurred. Call get_error_code() for a more info. + int decode(const void** pScan_line, uint* pScan_line_len); + + inline jpgd_status get_error_code() const { return m_error_code; } + + inline int get_width() const { return m_image_x_size; } + inline int get_height() const { return m_image_y_size; } + + inline int get_num_components() const { return m_comps_in_frame; } + + inline int get_bytes_per_pixel() const { return m_dest_bytes_per_pixel; } + inline int get_bytes_per_scan_line() const { return m_image_x_size * get_bytes_per_pixel(); } + + // Returns the total number of bytes actually consumed by the decoder (which should equal the actual size of the JPEG file). + inline int get_total_bytes_read() const { return m_total_bytes_read; } + + private: + jpeg_decoder(const jpeg_decoder &); + jpeg_decoder &operator =(const jpeg_decoder &); + + typedef void (*pDecode_block_func)(jpeg_decoder *, int, int, int); + + struct huff_tables + { + bool ac_table; + uint look_up[256]; + uint look_up2[256]; + uint8 code_size[256]; + uint tree[512]; + }; + + struct coeff_buf + { + uint8 *pData; + int block_num_x, block_num_y; + int block_len_x, block_len_y; + int block_size; + }; + + struct mem_block + { + mem_block *m_pNext; + size_t m_used_count; + size_t m_size; + char m_data[1]; + }; + + jmp_buf m_jmp_state; + mem_block *m_pMem_blocks; + int m_image_x_size; + int m_image_y_size; + jpeg_decoder_stream *m_pStream; + int m_progressive_flag; + uint8 m_huff_ac[JPGD_MAX_HUFF_TABLES]; + uint8* m_huff_num[JPGD_MAX_HUFF_TABLES]; // pointer to number of Huffman codes per bit size + uint8* m_huff_val[JPGD_MAX_HUFF_TABLES]; // pointer to Huffman codes per bit size + jpgd_quant_t* m_quant[JPGD_MAX_QUANT_TABLES]; // pointer to quantization tables + int m_scan_type; // Gray, Yh1v1, Yh1v2, Yh2v1, Yh2v2 (CMYK111, CMYK4114 no longer supported) + int m_comps_in_frame; // # of components in frame + int m_comp_h_samp[JPGD_MAX_COMPONENTS]; // component's horizontal sampling factor + int m_comp_v_samp[JPGD_MAX_COMPONENTS]; // component's vertical sampling factor + int m_comp_quant[JPGD_MAX_COMPONENTS]; // component's quantization table selector + int m_comp_ident[JPGD_MAX_COMPONENTS]; // component's ID + int m_comp_h_blocks[JPGD_MAX_COMPONENTS]; + int m_comp_v_blocks[JPGD_MAX_COMPONENTS]; + int m_comps_in_scan; // # of components in scan + int m_comp_list[JPGD_MAX_COMPS_IN_SCAN]; // components in this scan + int m_comp_dc_tab[JPGD_MAX_COMPONENTS]; // component's DC Huffman coding table selector + int m_comp_ac_tab[JPGD_MAX_COMPONENTS]; // component's AC Huffman coding table selector + int m_spectral_start; // spectral selection start + int m_spectral_end; // spectral selection end + int m_successive_low; // successive approximation low + int m_successive_high; // successive approximation high + int m_max_mcu_x_size; // MCU's max. X size in pixels + int m_max_mcu_y_size; // MCU's max. Y size in pixels + int m_blocks_per_mcu; + int m_max_blocks_per_row; + int m_mcus_per_row, m_mcus_per_col; + int m_mcu_org[JPGD_MAX_BLOCKS_PER_MCU]; + int m_total_lines_left; // total # lines left in image + int m_mcu_lines_left; // total # lines left in this MCU + int m_real_dest_bytes_per_scan_line; + int m_dest_bytes_per_scan_line; // rounded up + int m_dest_bytes_per_pixel; // 4 (RGB) or 1 (Y) + huff_tables* m_pHuff_tabs[JPGD_MAX_HUFF_TABLES]; + coeff_buf* m_dc_coeffs[JPGD_MAX_COMPONENTS]; + coeff_buf* m_ac_coeffs[JPGD_MAX_COMPONENTS]; + int m_eob_run; + int m_block_y_mcu[JPGD_MAX_COMPONENTS]; + uint8* m_pIn_buf_ofs; + int m_in_buf_left; + int m_tem_flag; + bool m_eof_flag; + uint8 m_in_buf_pad_start[128]; + uint8 m_in_buf[JPGD_IN_BUF_SIZE + 128]; + uint8 m_in_buf_pad_end[128]; + int m_bits_left; + uint m_bit_buf; + int m_restart_interval; + int m_restarts_left; + int m_next_restart_num; + int m_max_mcus_per_row; + int m_max_blocks_per_mcu; + int m_expanded_blocks_per_mcu; + int m_expanded_blocks_per_row; + int m_expanded_blocks_per_component; + bool m_freq_domain_chroma_upsample; + int m_max_mcus_per_col; + uint m_last_dc_val[JPGD_MAX_COMPONENTS]; + jpgd_block_t* m_pMCU_coefficients; + int m_mcu_block_max_zag[JPGD_MAX_BLOCKS_PER_MCU]; + uint8* m_pSample_buf; + int m_crr[256]; + int m_cbb[256]; + int m_crg[256]; + int m_cbg[256]; + uint8* m_pScan_line_0; + uint8* m_pScan_line_1; + jpgd_status m_error_code; + bool m_ready_flag; + int m_total_bytes_read; + + void free_all_blocks(); + JPGD_NORETURN void stop_decoding(jpgd_status status); + void *alloc(size_t n, bool zero = false); + void word_clear(void *p, uint16 c, uint n); + void prep_in_buffer(); + void read_dht_marker(); + void read_dqt_marker(); + void read_sof_marker(); + void skip_variable_marker(); + void read_dri_marker(); + void read_sos_marker(); + int next_marker(); + int process_markers(); + void locate_soi_marker(); + void locate_sof_marker(); + int locate_sos_marker(); + void init(jpeg_decoder_stream * pStream); + void create_look_ups(); + void fix_in_buffer(); + void transform_mcu(int mcu_row); + void transform_mcu_expand(int mcu_row); + coeff_buf* coeff_buf_open(int block_num_x, int block_num_y, int block_len_x, int block_len_y); + inline jpgd_block_t *coeff_buf_getp(coeff_buf *cb, int block_x, int block_y); + void load_next_row(); + void decode_next_row(); + void make_huff_table(int index, huff_tables *pH); + void check_quant_tables(); + void check_huff_tables(); + void calc_mcu_block_order(); + int init_scan(); + void init_frame(); + void process_restart(); + void decode_scan(pDecode_block_func decode_block_func); + void init_progressive(); + void init_sequential(); + void decode_start(); + void decode_init(jpeg_decoder_stream * pStream); + void H2V2Convert(); + void H2V1Convert(); + void H1V2Convert(); + void H1V1Convert(); + void gray_convert(); + void expanded_convert(); + void find_eoi(); + inline uint get_char(); + inline uint get_char(bool *pPadding_flag); + inline void stuff_char(uint8 q); + inline uint8 get_octet(); + inline uint get_bits(int num_bits); + inline uint get_bits_no_markers(int numbits); + inline int huff_decode(huff_tables *pH); + inline int huff_decode(huff_tables *pH, int& extrabits); + static inline uint8 clamp(int i); + static void decode_block_dc_first(jpeg_decoder *pD, int component_id, int block_x, int block_y); + static void decode_block_dc_refine(jpeg_decoder *pD, int component_id, int block_x, int block_y); + static void decode_block_ac_first(jpeg_decoder *pD, int component_id, int block_x, int block_y); + static void decode_block_ac_refine(jpeg_decoder *pD, int component_id, int block_x, int block_y); + }; + +} // namespace jpgd + +#endif // JPEG_DECODER_H diff --git a/libs/jpeg/jpge.cbp b/libs/jpeg/jpge.cbp new file mode 100644 index 0000000..5fc535a --- /dev/null +++ b/libs/jpeg/jpge.cbp @@ -0,0 +1,53 @@ + + + + + + diff --git a/libs/jpeg/jpge.cpp b/libs/jpeg/jpge.cpp new file mode 100644 index 0000000..7284a5c --- /dev/null +++ b/libs/jpeg/jpge.cpp @@ -0,0 +1,1038 @@ +// jpge.cpp - C++ class for JPEG compression. +// Public domain, Rich Geldreich +// v1.01, Dec. 18, 2010 - Initial release +// v1.02, Apr. 6, 2011 - Removed 2x2 ordered dither in H2V1 chroma subsampling method load_block_16_8_8(). (The rounding factor was 2, when it should have been 1. Either way, it wasn't helping.) +// v1.03, Apr. 16, 2011 - Added support for optimized Huffman code tables, optimized dynamic memory allocation down to only 1 alloc. +// Also from Alex Evans: Added RGBA support, linear memory allocator (no longer needed in v1.03). +// v1.04, May. 19, 2012: Forgot to set m_pFile ptr to NULL in cfile_stream::close(). Thanks to Owen Kaluza for reporting this bug. +// Code tweaks to fix VS2008 static code analysis warnings (all looked harmless). +// Code review revealed method load_block_16_8_8() (used for the non-default H2V1 sampling mode to downsample chroma) somehow didn't get the rounding factor fix from v1.02. + +#include "jpge.h" + +#include +#include +#include + +#define JPGE_MAX(a,b) (((a)>(b))?(a):(b)) +#define JPGE_MIN(a,b) (((a)<(b))?(a):(b)) + +namespace jpge { + +static inline void *jpge_malloc(size_t nSize) { return malloc(nSize); } +static inline void jpge_free(void *p) { free(p); } + +// Various JPEG enums and tables. +enum { M_SOF0 = 0xC0, M_DHT = 0xC4, M_SOI = 0xD8, M_EOI = 0xD9, M_SOS = 0xDA, M_DQT = 0xDB, M_APP0 = 0xE0 }; +enum { DC_LUM_CODES = 12, AC_LUM_CODES = 256, DC_CHROMA_CODES = 12, AC_CHROMA_CODES = 256, MAX_HUFF_SYMBOLS = 257, MAX_HUFF_CODESIZE = 32 }; + +static uint8 s_zag[64] = { 0,1,8,16,9,2,3,10,17,24,32,25,18,11,4,5,12,19,26,33,40,48,41,34,27,20,13,6,7,14,21,28,35,42,49,56,57,50,43,36,29,22,15,23,30,37,44,51,58,59,52,45,38,31,39,46,53,60,61,54,47,55,62,63 }; +static int16 s_std_lum_quant[64] = { 16,11,12,14,12,10,16,14,13,14,18,17,16,19,24,40,26,24,22,22,24,49,35,37,29,40,58,51,61,60,57,51,56,55,64,72,92,78,64,68,87,69,55,56,80,109,81,87,95,98,103,104,103,62,77,113,121,112,100,120,92,101,103,99 }; +static int16 s_std_croma_quant[64] = { 17,18,18,24,21,24,47,26,26,47,99,66,56,66,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99 }; +static uint8 s_dc_lum_bits[17] = { 0,0,1,5,1,1,1,1,1,1,0,0,0,0,0,0,0 }; +static uint8 s_dc_lum_val[DC_LUM_CODES] = { 0,1,2,3,4,5,6,7,8,9,10,11 }; +static uint8 s_ac_lum_bits[17] = { 0,0,2,1,3,3,2,4,3,5,5,4,4,0,0,1,0x7d }; +static uint8 s_ac_lum_val[AC_LUM_CODES] = +{ + 0x01,0x02,0x03,0x00,0x04,0x11,0x05,0x12,0x21,0x31,0x41,0x06,0x13,0x51,0x61,0x07,0x22,0x71,0x14,0x32,0x81,0x91,0xa1,0x08,0x23,0x42,0xb1,0xc1,0x15,0x52,0xd1,0xf0, + 0x24,0x33,0x62,0x72,0x82,0x09,0x0a,0x16,0x17,0x18,0x19,0x1a,0x25,0x26,0x27,0x28,0x29,0x2a,0x34,0x35,0x36,0x37,0x38,0x39,0x3a,0x43,0x44,0x45,0x46,0x47,0x48,0x49, + 0x4a,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5a,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6a,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7a,0x83,0x84,0x85,0x86,0x87,0x88,0x89, + 0x8a,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9a,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,0xaa,0xb2,0xb3,0xb4,0xb5,0xb6,0xb7,0xb8,0xb9,0xba,0xc2,0xc3,0xc4,0xc5, + 0xc6,0xc7,0xc8,0xc9,0xca,0xd2,0xd3,0xd4,0xd5,0xd6,0xd7,0xd8,0xd9,0xda,0xe1,0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,0xe9,0xea,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,0xf8, + 0xf9,0xfa +}; +static uint8 s_dc_chroma_bits[17] = { 0,0,3,1,1,1,1,1,1,1,1,1,0,0,0,0,0 }; +static uint8 s_dc_chroma_val[DC_CHROMA_CODES] = { 0,1,2,3,4,5,6,7,8,9,10,11 }; +static uint8 s_ac_chroma_bits[17] = { 0,0,2,1,2,4,4,3,4,7,5,4,4,0,1,2,0x77 }; +static uint8 s_ac_chroma_val[AC_CHROMA_CODES] = +{ + 0x00,0x01,0x02,0x03,0x11,0x04,0x05,0x21,0x31,0x06,0x12,0x41,0x51,0x07,0x61,0x71,0x13,0x22,0x32,0x81,0x08,0x14,0x42,0x91,0xa1,0xb1,0xc1,0x09,0x23,0x33,0x52,0xf0, + 0x15,0x62,0x72,0xd1,0x0a,0x16,0x24,0x34,0xe1,0x25,0xf1,0x17,0x18,0x19,0x1a,0x26,0x27,0x28,0x29,0x2a,0x35,0x36,0x37,0x38,0x39,0x3a,0x43,0x44,0x45,0x46,0x47,0x48, + 0x49,0x4a,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5a,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6a,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7a,0x82,0x83,0x84,0x85,0x86,0x87, + 0x88,0x89,0x8a,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9a,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,0xaa,0xb2,0xb3,0xb4,0xb5,0xb6,0xb7,0xb8,0xb9,0xba,0xc2,0xc3, + 0xc4,0xc5,0xc6,0xc7,0xc8,0xc9,0xca,0xd2,0xd3,0xd4,0xd5,0xd6,0xd7,0xd8,0xd9,0xda,0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,0xe9,0xea,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,0xf8, + 0xf9,0xfa +}; + +// Low-level helper functions. +template inline void clear_obj(T &obj) { memset(&obj, 0, sizeof(obj)); } + +const int YR = 19595, YG = 38470, YB = 7471, CB_R = -11059, CB_G = -21709, CB_B = 32768, CR_R = 32768, CR_G = -27439, CR_B = -5329; +static inline uint8 clamp(int i) { if (static_cast(i) > 255U) { if (i < 0) i = 0; else if (i > 255) i = 255; } return static_cast(i); } + +static void RGB_to_YCC(uint8* pDst, const uint8 *pSrc, int num_pixels) +{ + for ( ; num_pixels; pDst += 3, pSrc += 3, num_pixels--) + { + const int r = pSrc[0], g = pSrc[1], b = pSrc[2]; + pDst[0] = static_cast((r * YR + g * YG + b * YB + 32768) >> 16); + pDst[1] = clamp(128 + ((r * CB_R + g * CB_G + b * CB_B + 32768) >> 16)); + pDst[2] = clamp(128 + ((r * CR_R + g * CR_G + b * CR_B + 32768) >> 16)); + } +} + +static void RGB_to_Y(uint8* pDst, const uint8 *pSrc, int num_pixels) +{ + for ( ; num_pixels; pDst++, pSrc += 3, num_pixels--) + pDst[0] = static_cast((pSrc[0] * YR + pSrc[1] * YG + pSrc[2] * YB + 32768) >> 16); +} + +static void RGBA_to_YCC(uint8* pDst, const uint8 *pSrc, int num_pixels) +{ + for ( ; num_pixels; pDst += 3, pSrc += 4, num_pixels--) + { + const int r = pSrc[0], g = pSrc[1], b = pSrc[2]; + pDst[0] = static_cast((r * YR + g * YG + b * YB + 32768) >> 16); + pDst[1] = clamp(128 + ((r * CB_R + g * CB_G + b * CB_B + 32768) >> 16)); + pDst[2] = clamp(128 + ((r * CR_R + g * CR_G + b * CR_B + 32768) >> 16)); + } +} + +static void RGBA_to_Y(uint8* pDst, const uint8 *pSrc, int num_pixels) +{ + for ( ; num_pixels; pDst++, pSrc += 4, num_pixels--) + pDst[0] = static_cast((pSrc[0] * YR + pSrc[1] * YG + pSrc[2] * YB + 32768) >> 16); +} + +static void Y_to_YCC(uint8* pDst, const uint8* pSrc, int num_pixels) +{ + for( ; num_pixels; pDst += 3, pSrc++, num_pixels--) { pDst[0] = pSrc[0]; pDst[1] = 128; pDst[2] = 128; } +} + +// Forward DCT - DCT derived from jfdctint. +enum { CONST_BITS = 13, ROW_BITS = 2 }; +#define DCT_DESCALE(x, n) (((x) + (((int32)1) << ((n) - 1))) >> (n)) +#define DCT_MUL(var, c) (static_cast(var) * static_cast(c)) +#define DCT1D(s0, s1, s2, s3, s4, s5, s6, s7) \ + int32 t0 = s0 + s7, t7 = s0 - s7, t1 = s1 + s6, t6 = s1 - s6, t2 = s2 + s5, t5 = s2 - s5, t3 = s3 + s4, t4 = s3 - s4; \ + int32 t10 = t0 + t3, t13 = t0 - t3, t11 = t1 + t2, t12 = t1 - t2; \ + int32 u1 = DCT_MUL(t12 + t13, 4433); \ + s2 = u1 + DCT_MUL(t13, 6270); \ + s6 = u1 + DCT_MUL(t12, -15137); \ + u1 = t4 + t7; \ + int32 u2 = t5 + t6, u3 = t4 + t6, u4 = t5 + t7; \ + int32 z5 = DCT_MUL(u3 + u4, 9633); \ + t4 = DCT_MUL(t4, 2446); t5 = DCT_MUL(t5, 16819); \ + t6 = DCT_MUL(t6, 25172); t7 = DCT_MUL(t7, 12299); \ + u1 = DCT_MUL(u1, -7373); u2 = DCT_MUL(u2, -20995); \ + u3 = DCT_MUL(u3, -16069); u4 = DCT_MUL(u4, -3196); \ + u3 += z5; u4 += z5; \ + s0 = t10 + t11; s1 = t7 + u1 + u4; s3 = t6 + u2 + u3; s4 = t10 - t11; s5 = t5 + u2 + u4; s7 = t4 + u1 + u3; + +static void DCT2D(int32 *p) +{ + int32 c, *q = p; + for (c = 7; c >= 0; c--, q += 8) + { + int32 s0 = q[0], s1 = q[1], s2 = q[2], s3 = q[3], s4 = q[4], s5 = q[5], s6 = q[6], s7 = q[7]; + DCT1D(s0, s1, s2, s3, s4, s5, s6, s7); + q[0] = s0 << ROW_BITS; q[1] = DCT_DESCALE(s1, CONST_BITS-ROW_BITS); q[2] = DCT_DESCALE(s2, CONST_BITS-ROW_BITS); q[3] = DCT_DESCALE(s3, CONST_BITS-ROW_BITS); + q[4] = s4 << ROW_BITS; q[5] = DCT_DESCALE(s5, CONST_BITS-ROW_BITS); q[6] = DCT_DESCALE(s6, CONST_BITS-ROW_BITS); q[7] = DCT_DESCALE(s7, CONST_BITS-ROW_BITS); + } + for (q = p, c = 7; c >= 0; c--, q++) + { + int32 s0 = q[0*8], s1 = q[1*8], s2 = q[2*8], s3 = q[3*8], s4 = q[4*8], s5 = q[5*8], s6 = q[6*8], s7 = q[7*8]; + DCT1D(s0, s1, s2, s3, s4, s5, s6, s7); + q[0*8] = DCT_DESCALE(s0, ROW_BITS+3); q[1*8] = DCT_DESCALE(s1, CONST_BITS+ROW_BITS+3); q[2*8] = DCT_DESCALE(s2, CONST_BITS+ROW_BITS+3); q[3*8] = DCT_DESCALE(s3, CONST_BITS+ROW_BITS+3); + q[4*8] = DCT_DESCALE(s4, ROW_BITS+3); q[5*8] = DCT_DESCALE(s5, CONST_BITS+ROW_BITS+3); q[6*8] = DCT_DESCALE(s6, CONST_BITS+ROW_BITS+3); q[7*8] = DCT_DESCALE(s7, CONST_BITS+ROW_BITS+3); + } +} + +struct sym_freq { uint m_key, m_sym_index; }; + +// Radix sorts sym_freq[] array by 32-bit key m_key. Returns ptr to sorted values. +static inline sym_freq* radix_sort_syms(uint num_syms, sym_freq* pSyms0, sym_freq* pSyms1) +{ + const uint cMaxPasses = 4; + uint32 hist[256 * cMaxPasses]; clear_obj(hist); + for (uint i = 0; i < num_syms; i++) { uint freq = pSyms0[i].m_key; hist[freq & 0xFF]++; hist[256 + ((freq >> 8) & 0xFF)]++; hist[256*2 + ((freq >> 16) & 0xFF)]++; hist[256*3 + ((freq >> 24) & 0xFF)]++; } + sym_freq* pCur_syms = pSyms0, *pNew_syms = pSyms1; + uint total_passes = cMaxPasses; while ((total_passes > 1) && (num_syms == hist[(total_passes - 1) * 256])) total_passes--; + for (uint pass_shift = 0, pass = 0; pass < total_passes; pass++, pass_shift += 8) + { + const uint32* pHist = &hist[pass << 8]; + uint offsets[256], cur_ofs = 0; + for (uint i = 0; i < 256; i++) { offsets[i] = cur_ofs; cur_ofs += pHist[i]; } + for (uint i = 0; i < num_syms; i++) + pNew_syms[offsets[(pCur_syms[i].m_key >> pass_shift) & 0xFF]++] = pCur_syms[i]; + sym_freq* t = pCur_syms; pCur_syms = pNew_syms; pNew_syms = t; + } + return pCur_syms; +} + +// calculate_minimum_redundancy() originally written by: Alistair Moffat, alistair@cs.mu.oz.au, Jyrki Katajainen, jyrki@diku.dk, November 1996. +static void calculate_minimum_redundancy(sym_freq *A, int n) +{ + int root, leaf, next, avbl, used, dpth; + if (n==0) return; else if (n==1) { A[0].m_key = 1; return; } + A[0].m_key += A[1].m_key; root = 0; leaf = 2; + for (next=1; next < n-1; next++) + { + if (leaf>=n || A[root].m_key=n || (root=0; next--) A[next].m_key = A[A[next].m_key].m_key+1; + avbl = 1; used = dpth = 0; root = n-2; next = n-1; + while (avbl>0) + { + while (root>=0 && (int)A[root].m_key==dpth) { used++; root--; } + while (avbl>used) { A[next--].m_key = dpth; avbl--; } + avbl = 2*used; dpth++; used = 0; + } +} + +// Limits canonical Huffman code table's max code size to max_code_size. +static void huffman_enforce_max_code_size(int *pNum_codes, int code_list_len, int max_code_size) +{ + if (code_list_len <= 1) return; + + for (int i = max_code_size + 1; i <= MAX_HUFF_CODESIZE; i++) pNum_codes[max_code_size] += pNum_codes[i]; + + uint32 total = 0; + for (int i = max_code_size; i > 0; i--) + total += (((uint32)pNum_codes[i]) << (max_code_size - i)); + + while (total != (1UL << max_code_size)) + { + pNum_codes[max_code_size]--; + for (int i = max_code_size - 1; i > 0; i--) + { + if (pNum_codes[i]) { pNum_codes[i]--; pNum_codes[i + 1] += 2; break; } + } + total--; + } +} + +// Generates an optimized offman table. +void jpeg_encoder::optimize_huffman_table(int table_num, int table_len) +{ + sym_freq syms0[MAX_HUFF_SYMBOLS], syms1[MAX_HUFF_SYMBOLS]; + syms0[0].m_key = 1; syms0[0].m_sym_index = 0; // dummy symbol, assures that no valid code contains all 1's + int num_used_syms = 1; + const uint32 *pSym_count = &m_huff_count[table_num][0]; + for (int i = 0; i < table_len; i++) + if (pSym_count[i]) { syms0[num_used_syms].m_key = pSym_count[i]; syms0[num_used_syms++].m_sym_index = i + 1; } + sym_freq* pSyms = radix_sort_syms(num_used_syms, syms0, syms1); + calculate_minimum_redundancy(pSyms, num_used_syms); + + // Count the # of symbols of each code size. + int num_codes[1 + MAX_HUFF_CODESIZE]; clear_obj(num_codes); + for (int i = 0; i < num_used_syms; i++) + num_codes[pSyms[i].m_key]++; + + const uint JPGE_CODE_SIZE_LIMIT = 16; // the maximum possible size of a JPEG Huffman code (valid range is [9,16] - 9 vs. 8 because of the dummy symbol) + huffman_enforce_max_code_size(num_codes, num_used_syms, JPGE_CODE_SIZE_LIMIT); + + // Compute m_huff_bits array, which contains the # of symbols per code size. + clear_obj(m_huff_bits[table_num]); + for (int i = 1; i <= (int)JPGE_CODE_SIZE_LIMIT; i++) + m_huff_bits[table_num][i] = static_cast(num_codes[i]); + + // Remove the dummy symbol added above, which must be in largest bucket. + for (int i = JPGE_CODE_SIZE_LIMIT; i >= 1; i--) + { + if (m_huff_bits[table_num][i]) { m_huff_bits[table_num][i]--; break; } + } + + // Compute the m_huff_val array, which contains the symbol indices sorted by code size (smallest to largest). + for (int i = num_used_syms - 1; i >= 1; i--) + m_huff_val[table_num][num_used_syms - 1 - i] = static_cast(pSyms[i].m_sym_index - 1); +} + +// JPEG marker generation. +void jpeg_encoder::emit_byte(uint8 i) +{ + m_all_stream_writes_succeeded = m_all_stream_writes_succeeded && m_pStream->put_obj(i); +} + +void jpeg_encoder::emit_word(uint i) +{ + emit_byte(uint8(i >> 8)); emit_byte(uint8(i & 0xFF)); +} + +void jpeg_encoder::emit_marker(int marker) +{ + emit_byte(uint8(0xFF)); emit_byte(uint8(marker)); +} + +// Emit JFIF marker +void jpeg_encoder::emit_jfif_app0() +{ + emit_marker(M_APP0); + emit_word(2 + 4 + 1 + 2 + 1 + 2 + 2 + 1 + 1); + emit_byte(0x4A); emit_byte(0x46); emit_byte(0x49); emit_byte(0x46); /* Identifier: ASCII "JFIF" */ + emit_byte(0); + emit_byte(1); /* Major version */ + emit_byte(1); /* Minor version */ + emit_byte(0); /* Density unit */ + emit_word(1); + emit_word(1); + emit_byte(0); /* No thumbnail image */ + emit_byte(0); +} + +// Emit quantization tables +void jpeg_encoder::emit_dqt() +{ + for (int i = 0; i < ((m_num_components == 3) ? 2 : 1); i++) + { + emit_marker(M_DQT); + emit_word(64 + 1 + 2); + emit_byte(static_cast(i)); + for (int j = 0; j < 64; j++) + emit_byte(static_cast(m_quantization_tables[i][j])); + } +} + +// Emit start of frame marker +void jpeg_encoder::emit_sof() +{ + emit_marker(M_SOF0); /* baseline */ + emit_word(3 * m_num_components + 2 + 5 + 1); + emit_byte(8); /* precision */ + emit_word(m_image_y); + emit_word(m_image_x); + emit_byte(m_num_components); + for (int i = 0; i < m_num_components; i++) + { + emit_byte(static_cast(i + 1)); /* component ID */ + emit_byte((m_comp_h_samp[i] << 4) + m_comp_v_samp[i]); /* h and v sampling */ + emit_byte(i > 0); /* quant. table num */ + } +} + +// Emit Huffman table. +void jpeg_encoder::emit_dht(uint8 *bits, uint8 *val, int index, bool ac_flag) +{ + emit_marker(M_DHT); + + int length = 0; + for (int i = 1; i <= 16; i++) + length += bits[i]; + + emit_word(length + 2 + 1 + 16); + emit_byte(static_cast(index + (ac_flag << 4))); + + for (int i = 1; i <= 16; i++) + emit_byte(bits[i]); + + for (int i = 0; i < length; i++) + emit_byte(val[i]); +} + +// Emit all Huffman tables. +void jpeg_encoder::emit_dhts() +{ + emit_dht(m_huff_bits[0+0], m_huff_val[0+0], 0, false); + emit_dht(m_huff_bits[2+0], m_huff_val[2+0], 0, true); + if (m_num_components == 3) + { + emit_dht(m_huff_bits[0+1], m_huff_val[0+1], 1, false); + emit_dht(m_huff_bits[2+1], m_huff_val[2+1], 1, true); + } +} + +// emit start of scan +void jpeg_encoder::emit_sos() +{ + emit_marker(M_SOS); + emit_word(2 * m_num_components + 2 + 1 + 3); + emit_byte(m_num_components); + for (int i = 0; i < m_num_components; i++) + { + emit_byte(static_cast(i + 1)); + if (i == 0) + emit_byte((0 << 4) + 0); + else + emit_byte((1 << 4) + 1); + } + emit_byte(0); /* spectral selection */ + emit_byte(63); + emit_byte(0); +} + +// Emit all markers at beginning of image file. +void jpeg_encoder::emit_markers() +{ + emit_marker(M_SOI); + emit_jfif_app0(); + emit_dqt(); + emit_sof(); + emit_dhts(); + emit_sos(); +} + +// Compute the actual canonical Huffman codes/code sizes given the JPEG huff bits and val arrays. +void jpeg_encoder::compute_huffman_table(uint *codes, uint8 *code_sizes, uint8 *bits, uint8 *val) +{ + int i, l, last_p, si; + uint8 huff_size[257]; + uint huff_code[257]; + uint code; + + int p = 0; + for (l = 1; l <= 16; l++) + for (i = 1; i <= bits[l]; i++) + huff_size[p++] = (char)l; + + huff_size[p] = 0; last_p = p; // write sentinel + + code = 0; si = huff_size[0]; p = 0; + + while (huff_size[p]) + { + while (huff_size[p] == si) + huff_code[p++] = code++; + code <<= 1; + si++; + } + + memset(codes, 0, sizeof(codes[0])*256); + memset(code_sizes, 0, sizeof(code_sizes[0])*256); + for (p = 0; p < last_p; p++) + { + codes[val[p]] = huff_code[p]; + code_sizes[val[p]] = huff_size[p]; + } +} + +// Quantization table generation. +void jpeg_encoder::compute_quant_table(int32 *pDst, int16 *pSrc) +{ + int32 q; + if (m_params.m_quality < 50) + q = 5000 / m_params.m_quality; + else + q = 200 - m_params.m_quality * 2; + for (int i = 0; i < 64; i++) + { + int32 j = *pSrc++; j = (j * q + 50L) / 100L; + *pDst++ = JPGE_MIN(JPGE_MAX(j, 1), 255); + } +} + +// Higher-level methods. +void jpeg_encoder::first_pass_init() +{ + m_bit_buffer = 0; m_bits_in = 0; + memset(m_last_dc_val, 0, 3 * sizeof(m_last_dc_val[0])); + m_mcu_y_ofs = 0; + m_pass_num = 1; +} + +bool jpeg_encoder::second_pass_init() +{ + compute_huffman_table(&m_huff_codes[0+0][0], &m_huff_code_sizes[0+0][0], m_huff_bits[0+0], m_huff_val[0+0]); + compute_huffman_table(&m_huff_codes[2+0][0], &m_huff_code_sizes[2+0][0], m_huff_bits[2+0], m_huff_val[2+0]); + if (m_num_components > 1) + { + compute_huffman_table(&m_huff_codes[0+1][0], &m_huff_code_sizes[0+1][0], m_huff_bits[0+1], m_huff_val[0+1]); + compute_huffman_table(&m_huff_codes[2+1][0], &m_huff_code_sizes[2+1][0], m_huff_bits[2+1], m_huff_val[2+1]); + } + first_pass_init(); + emit_markers(); + m_pass_num = 2; + return true; +} + +bool jpeg_encoder::jpg_open(int p_x_res, int p_y_res, int src_channels) +{ + m_num_components = 3; + switch (m_params.m_subsampling) + { + case Y_ONLY: + { + m_num_components = 1; + m_comp_h_samp[0] = 1; m_comp_v_samp[0] = 1; + m_mcu_x = 8; m_mcu_y = 8; + break; + } + case H1V1: + { + m_comp_h_samp[0] = 1; m_comp_v_samp[0] = 1; + m_comp_h_samp[1] = 1; m_comp_v_samp[1] = 1; + m_comp_h_samp[2] = 1; m_comp_v_samp[2] = 1; + m_mcu_x = 8; m_mcu_y = 8; + break; + } + case H2V1: + { + m_comp_h_samp[0] = 2; m_comp_v_samp[0] = 1; + m_comp_h_samp[1] = 1; m_comp_v_samp[1] = 1; + m_comp_h_samp[2] = 1; m_comp_v_samp[2] = 1; + m_mcu_x = 16; m_mcu_y = 8; + break; + } + case H2V2: + { + m_comp_h_samp[0] = 2; m_comp_v_samp[0] = 2; + m_comp_h_samp[1] = 1; m_comp_v_samp[1] = 1; + m_comp_h_samp[2] = 1; m_comp_v_samp[2] = 1; + m_mcu_x = 16; m_mcu_y = 16; + } + } + + m_image_x = p_x_res; m_image_y = p_y_res; + m_image_bpp = src_channels; + m_image_bpl = m_image_x * src_channels; + m_image_x_mcu = (m_image_x + m_mcu_x - 1) & (~(m_mcu_x - 1)); + m_image_y_mcu = (m_image_y + m_mcu_y - 1) & (~(m_mcu_y - 1)); + m_image_bpl_xlt = m_image_x * m_num_components; + m_image_bpl_mcu = m_image_x_mcu * m_num_components; + m_mcus_per_row = m_image_x_mcu / m_mcu_x; + + if ((m_mcu_lines[0] = static_cast(jpge_malloc(m_image_bpl_mcu * m_mcu_y))) == NULL) return false; + for (int i = 1; i < m_mcu_y; i++) + m_mcu_lines[i] = m_mcu_lines[i-1] + m_image_bpl_mcu; + + compute_quant_table(m_quantization_tables[0], s_std_lum_quant); + compute_quant_table(m_quantization_tables[1], m_params.m_no_chroma_discrim_flag ? s_std_lum_quant : s_std_croma_quant); + + m_out_buf_left = JPGE_OUT_BUF_SIZE; + m_pOut_buf = m_out_buf; + + if (m_params.m_two_pass_flag) + { + clear_obj(m_huff_count); + first_pass_init(); + } + else + { + memcpy(m_huff_bits[0+0], s_dc_lum_bits, 17); memcpy(m_huff_val [0+0], s_dc_lum_val, DC_LUM_CODES); + memcpy(m_huff_bits[2+0], s_ac_lum_bits, 17); memcpy(m_huff_val [2+0], s_ac_lum_val, AC_LUM_CODES); + memcpy(m_huff_bits[0+1], s_dc_chroma_bits, 17); memcpy(m_huff_val [0+1], s_dc_chroma_val, DC_CHROMA_CODES); + memcpy(m_huff_bits[2+1], s_ac_chroma_bits, 17); memcpy(m_huff_val [2+1], s_ac_chroma_val, AC_CHROMA_CODES); + if (!second_pass_init()) return false; // in effect, skip over the first pass + } + return m_all_stream_writes_succeeded; +} + +void jpeg_encoder::load_block_8_8_grey(int x) +{ + uint8 *pSrc; + sample_array_t *pDst = m_sample_array; + x <<= 3; + for (int i = 0; i < 8; i++, pDst += 8) + { + pSrc = m_mcu_lines[i] + x; + pDst[0] = pSrc[0] - 128; pDst[1] = pSrc[1] - 128; pDst[2] = pSrc[2] - 128; pDst[3] = pSrc[3] - 128; + pDst[4] = pSrc[4] - 128; pDst[5] = pSrc[5] - 128; pDst[6] = pSrc[6] - 128; pDst[7] = pSrc[7] - 128; + } +} + +void jpeg_encoder::load_block_8_8(int x, int y, int c) +{ + uint8 *pSrc; + sample_array_t *pDst = m_sample_array; + x = (x * (8 * 3)) + c; + y <<= 3; + for (int i = 0; i < 8; i++, pDst += 8) + { + pSrc = m_mcu_lines[y + i] + x; + pDst[0] = pSrc[0 * 3] - 128; pDst[1] = pSrc[1 * 3] - 128; pDst[2] = pSrc[2 * 3] - 128; pDst[3] = pSrc[3 * 3] - 128; + pDst[4] = pSrc[4 * 3] - 128; pDst[5] = pSrc[5 * 3] - 128; pDst[6] = pSrc[6 * 3] - 128; pDst[7] = pSrc[7 * 3] - 128; + } +} + +void jpeg_encoder::load_block_16_8(int x, int c) +{ + uint8 *pSrc1, *pSrc2; + sample_array_t *pDst = m_sample_array; + x = (x * (16 * 3)) + c; + int a = 0, b = 2; + for (int i = 0; i < 16; i += 2, pDst += 8) + { + pSrc1 = m_mcu_lines[i + 0] + x; + pSrc2 = m_mcu_lines[i + 1] + x; + pDst[0] = ((pSrc1[ 0 * 3] + pSrc1[ 1 * 3] + pSrc2[ 0 * 3] + pSrc2[ 1 * 3] + a) >> 2) - 128; pDst[1] = ((pSrc1[ 2 * 3] + pSrc1[ 3 * 3] + pSrc2[ 2 * 3] + pSrc2[ 3 * 3] + b) >> 2) - 128; + pDst[2] = ((pSrc1[ 4 * 3] + pSrc1[ 5 * 3] + pSrc2[ 4 * 3] + pSrc2[ 5 * 3] + a) >> 2) - 128; pDst[3] = ((pSrc1[ 6 * 3] + pSrc1[ 7 * 3] + pSrc2[ 6 * 3] + pSrc2[ 7 * 3] + b) >> 2) - 128; + pDst[4] = ((pSrc1[ 8 * 3] + pSrc1[ 9 * 3] + pSrc2[ 8 * 3] + pSrc2[ 9 * 3] + a) >> 2) - 128; pDst[5] = ((pSrc1[10 * 3] + pSrc1[11 * 3] + pSrc2[10 * 3] + pSrc2[11 * 3] + b) >> 2) - 128; + pDst[6] = ((pSrc1[12 * 3] + pSrc1[13 * 3] + pSrc2[12 * 3] + pSrc2[13 * 3] + a) >> 2) - 128; pDst[7] = ((pSrc1[14 * 3] + pSrc1[15 * 3] + pSrc2[14 * 3] + pSrc2[15 * 3] + b) >> 2) - 128; + int temp = a; a = b; b = temp; + } +} + +void jpeg_encoder::load_block_16_8_8(int x, int c) +{ + uint8 *pSrc1; + sample_array_t *pDst = m_sample_array; + x = (x * (16 * 3)) + c; + for (int i = 0; i < 8; i++, pDst += 8) + { + pSrc1 = m_mcu_lines[i + 0] + x; + pDst[0] = ((pSrc1[ 0 * 3] + pSrc1[ 1 * 3]) >> 1) - 128; pDst[1] = ((pSrc1[ 2 * 3] + pSrc1[ 3 * 3]) >> 1) - 128; + pDst[2] = ((pSrc1[ 4 * 3] + pSrc1[ 5 * 3]) >> 1) - 128; pDst[3] = ((pSrc1[ 6 * 3] + pSrc1[ 7 * 3]) >> 1) - 128; + pDst[4] = ((pSrc1[ 8 * 3] + pSrc1[ 9 * 3]) >> 1) - 128; pDst[5] = ((pSrc1[10 * 3] + pSrc1[11 * 3]) >> 1) - 128; + pDst[6] = ((pSrc1[12 * 3] + pSrc1[13 * 3]) >> 1) - 128; pDst[7] = ((pSrc1[14 * 3] + pSrc1[15 * 3]) >> 1) - 128; + } +} + +void jpeg_encoder::load_quantized_coefficients(int component_num) +{ + int32 *q = m_quantization_tables[component_num > 0]; + int16 *pDst = m_coefficient_array; + for (int i = 0; i < 64; i++) + { + sample_array_t j = m_sample_array[s_zag[i]]; + if (j < 0) + { + if ((j = -j + (*q >> 1)) < *q) + *pDst++ = 0; + else + *pDst++ = static_cast(-(j / *q)); + } + else + { + if ((j = j + (*q >> 1)) < *q) + *pDst++ = 0; + else + *pDst++ = static_cast((j / *q)); + } + q++; + } +} + +void jpeg_encoder::flush_output_buffer() +{ + if (m_out_buf_left != JPGE_OUT_BUF_SIZE) + m_all_stream_writes_succeeded = m_all_stream_writes_succeeded && m_pStream->put_buf(m_out_buf, JPGE_OUT_BUF_SIZE - m_out_buf_left); + m_pOut_buf = m_out_buf; + m_out_buf_left = JPGE_OUT_BUF_SIZE; +} + +void jpeg_encoder::put_bits(uint bits, uint len) +{ + m_bit_buffer |= ((uint32)bits << (24 - (m_bits_in += len))); + while (m_bits_in >= 8) + { + uint8 c; + #define JPGE_PUT_BYTE(c) { *m_pOut_buf++ = (c); if (--m_out_buf_left == 0) flush_output_buffer(); } + JPGE_PUT_BYTE(c = (uint8)((m_bit_buffer >> 16) & 0xFF)); + if (c == 0xFF) JPGE_PUT_BYTE(0); + m_bit_buffer <<= 8; + m_bits_in -= 8; + } +} + +void jpeg_encoder::code_coefficients_pass_one(int component_num) +{ + if (component_num >= 3) return; // just to shut up static analysis + int i, run_len, nbits, temp1; + int16 *src = m_coefficient_array; + uint32 *dc_count = component_num ? m_huff_count[0 + 1] : m_huff_count[0 + 0], *ac_count = component_num ? m_huff_count[2 + 1] : m_huff_count[2 + 0]; + + temp1 = src[0] - m_last_dc_val[component_num]; + m_last_dc_val[component_num] = src[0]; + if (temp1 < 0) temp1 = -temp1; + + nbits = 0; + while (temp1) + { + nbits++; temp1 >>= 1; + } + + dc_count[nbits]++; + for (run_len = 0, i = 1; i < 64; i++) + { + if ((temp1 = m_coefficient_array[i]) == 0) + run_len++; + else + { + while (run_len >= 16) + { + ac_count[0xF0]++; + run_len -= 16; + } + if (temp1 < 0) temp1 = -temp1; + nbits = 1; + while (temp1 >>= 1) nbits++; + ac_count[(run_len << 4) + nbits]++; + run_len = 0; + } + } + if (run_len) ac_count[0]++; +} + +void jpeg_encoder::code_coefficients_pass_two(int component_num) +{ + int i, j, run_len, nbits, temp1, temp2; + int16 *pSrc = m_coefficient_array; + uint *codes[2]; + uint8 *code_sizes[2]; + + if (component_num == 0) + { + codes[0] = m_huff_codes[0 + 0]; codes[1] = m_huff_codes[2 + 0]; + code_sizes[0] = m_huff_code_sizes[0 + 0]; code_sizes[1] = m_huff_code_sizes[2 + 0]; + } + else + { + codes[0] = m_huff_codes[0 + 1]; codes[1] = m_huff_codes[2 + 1]; + code_sizes[0] = m_huff_code_sizes[0 + 1]; code_sizes[1] = m_huff_code_sizes[2 + 1]; + } + + temp1 = temp2 = pSrc[0] - m_last_dc_val[component_num]; + m_last_dc_val[component_num] = pSrc[0]; + + if (temp1 < 0) + { + temp1 = -temp1; temp2--; + } + + nbits = 0; + while (temp1) + { + nbits++; temp1 >>= 1; + } + + put_bits(codes[0][nbits], code_sizes[0][nbits]); + if (nbits) put_bits(temp2 & ((1 << nbits) - 1), nbits); + + for (run_len = 0, i = 1; i < 64; i++) + { + if ((temp1 = m_coefficient_array[i]) == 0) + run_len++; + else + { + while (run_len >= 16) + { + put_bits(codes[1][0xF0], code_sizes[1][0xF0]); + run_len -= 16; + } + if ((temp2 = temp1) < 0) + { + temp1 = -temp1; + temp2--; + } + nbits = 1; + while (temp1 >>= 1) + nbits++; + j = (run_len << 4) + nbits; + put_bits(codes[1][j], code_sizes[1][j]); + put_bits(temp2 & ((1 << nbits) - 1), nbits); + run_len = 0; + } + } + if (run_len) + put_bits(codes[1][0], code_sizes[1][0]); +} + +void jpeg_encoder::code_block(int component_num) +{ + DCT2D(m_sample_array); + load_quantized_coefficients(component_num); + if (m_pass_num == 1) + code_coefficients_pass_one(component_num); + else + code_coefficients_pass_two(component_num); +} + +void jpeg_encoder::process_mcu_row() +{ + if (m_num_components == 1) + { + for (int i = 0; i < m_mcus_per_row; i++) + { + load_block_8_8_grey(i); code_block(0); + } + } + else if ((m_comp_h_samp[0] == 1) && (m_comp_v_samp[0] == 1)) + { + for (int i = 0; i < m_mcus_per_row; i++) + { + load_block_8_8(i, 0, 0); code_block(0); load_block_8_8(i, 0, 1); code_block(1); load_block_8_8(i, 0, 2); code_block(2); + } + } + else if ((m_comp_h_samp[0] == 2) && (m_comp_v_samp[0] == 1)) + { + for (int i = 0; i < m_mcus_per_row; i++) + { + load_block_8_8(i * 2 + 0, 0, 0); code_block(0); load_block_8_8(i * 2 + 1, 0, 0); code_block(0); + load_block_16_8_8(i, 1); code_block(1); load_block_16_8_8(i, 2); code_block(2); + } + } + else if ((m_comp_h_samp[0] == 2) && (m_comp_v_samp[0] == 2)) + { + for (int i = 0; i < m_mcus_per_row; i++) + { + load_block_8_8(i * 2 + 0, 0, 0); code_block(0); load_block_8_8(i * 2 + 1, 0, 0); code_block(0); + load_block_8_8(i * 2 + 0, 1, 0); code_block(0); load_block_8_8(i * 2 + 1, 1, 0); code_block(0); + load_block_16_8(i, 1); code_block(1); load_block_16_8(i, 2); code_block(2); + } + } +} + +bool jpeg_encoder::terminate_pass_one() +{ + optimize_huffman_table(0+0, DC_LUM_CODES); optimize_huffman_table(2+0, AC_LUM_CODES); + if (m_num_components > 1) + { + optimize_huffman_table(0+1, DC_CHROMA_CODES); optimize_huffman_table(2+1, AC_CHROMA_CODES); + } + return second_pass_init(); +} + +bool jpeg_encoder::terminate_pass_two() +{ + put_bits(0x7F, 7); + flush_output_buffer(); + emit_marker(M_EOI); + m_pass_num++; // purposely bump up m_pass_num, for debugging + return true; +} + +bool jpeg_encoder::process_end_of_image() +{ + if (m_mcu_y_ofs) + { + if (m_mcu_y_ofs < 16) // check here just to shut up static analysis + { + for (int i = m_mcu_y_ofs; i < m_mcu_y; i++) + memcpy(m_mcu_lines[i], m_mcu_lines[m_mcu_y_ofs - 1], m_image_bpl_mcu); + } + + process_mcu_row(); + } + + if (m_pass_num == 1) + return terminate_pass_one(); + else + return terminate_pass_two(); +} + +void jpeg_encoder::load_mcu(const void *pSrc) +{ + const uint8* Psrc = reinterpret_cast(pSrc); + + uint8* pDst = m_mcu_lines[m_mcu_y_ofs]; // OK to write up to m_image_bpl_xlt bytes to pDst + + if (m_num_components == 1) + { + if (m_image_bpp == 4) + RGBA_to_Y(pDst, Psrc, m_image_x); + else if (m_image_bpp == 3) + RGB_to_Y(pDst, Psrc, m_image_x); + else + memcpy(pDst, Psrc, m_image_x); + } + else + { + if (m_image_bpp == 4) + RGBA_to_YCC(pDst, Psrc, m_image_x); + else if (m_image_bpp == 3) + RGB_to_YCC(pDst, Psrc, m_image_x); + else + Y_to_YCC(pDst, Psrc, m_image_x); + } + + // Possibly duplicate pixels at end of scanline if not a multiple of 8 or 16 + if (m_num_components == 1) + memset(m_mcu_lines[m_mcu_y_ofs] + m_image_bpl_xlt, pDst[m_image_bpl_xlt - 1], m_image_x_mcu - m_image_x); + else + { + const uint8 y = pDst[m_image_bpl_xlt - 3 + 0], cb = pDst[m_image_bpl_xlt - 3 + 1], cr = pDst[m_image_bpl_xlt - 3 + 2]; + uint8 *q = m_mcu_lines[m_mcu_y_ofs] + m_image_bpl_xlt; + for (int i = m_image_x; i < m_image_x_mcu; i++) + { + *q++ = y; *q++ = cb; *q++ = cr; + } + } + + if (++m_mcu_y_ofs == m_mcu_y) + { + process_mcu_row(); + m_mcu_y_ofs = 0; + } +} + +void jpeg_encoder::clear() +{ + m_mcu_lines[0] = NULL; + m_pass_num = 0; + m_all_stream_writes_succeeded = true; +} + +jpeg_encoder::jpeg_encoder() +{ + clear(); +} + +jpeg_encoder::~jpeg_encoder() +{ + deinit(); +} + +bool jpeg_encoder::init(output_stream *pStream, int width, int height, int src_channels, const params &comp_params) +{ + deinit(); + if (((!pStream) || (width < 1) || (height < 1)) || ((src_channels != 1) && (src_channels != 3) && (src_channels != 4)) || (!comp_params.check())) return false; + m_pStream = pStream; + m_params = comp_params; + return jpg_open(width, height, src_channels); +} + +void jpeg_encoder::deinit() +{ + jpge_free(m_mcu_lines[0]); + clear(); +} + +bool jpeg_encoder::process_scanline(const void* pScanline) +{ + if ((m_pass_num < 1) || (m_pass_num > 2)) return false; + if (m_all_stream_writes_succeeded) + { + if (!pScanline) + { + if (!process_end_of_image()) return false; + } + else + { + load_mcu(pScanline); + } + } + return m_all_stream_writes_succeeded; +} + +// Higher level wrappers/examples (optional). +#include + +class cfile_stream : public output_stream +{ + cfile_stream(const cfile_stream &); + cfile_stream &operator= (const cfile_stream &); + + FILE* m_pFile; + bool m_bStatus; + +public: + cfile_stream() : m_pFile(NULL), m_bStatus(false) { } + + virtual ~cfile_stream() + { + close(); + } + + bool open(const char *pFilename) + { + close(); + m_pFile = fopen(pFilename, "wb"); + m_bStatus = (m_pFile != NULL); + return m_bStatus; + } + + bool close() + { + if (m_pFile) + { + if (fclose(m_pFile) == EOF) + { + m_bStatus = false; + } + m_pFile = NULL; + } + return m_bStatus; + } + + virtual bool put_buf(const void* pBuf, int len) + { + m_bStatus = m_bStatus && (fwrite(pBuf, len, 1, m_pFile) == 1); + return m_bStatus; + } + + uint get_size() const + { + return m_pFile ? ftell(m_pFile) : 0; + } +}; + +// Writes JPEG image to file. +bool compress_image_to_jpeg_file(const char *pFilename, int width, int height, int num_channels, const uint8 *pImage_data, const params &comp_params) +{ + cfile_stream dst_stream; + if (!dst_stream.open(pFilename)) + return false; + + jpge::jpeg_encoder dst_image; + if (!dst_image.init(&dst_stream, width, height, num_channels, comp_params)) + return false; + + for (uint pass_index = 0; pass_index < dst_image.get_total_passes(); pass_index++) + { + for (int i = 0; i < height; i++) + { + const uint8* pBuf = pImage_data + i * width * num_channels; + if (!dst_image.process_scanline(pBuf)) + return false; + } + if (!dst_image.process_scanline(NULL)) + return false; + } + + dst_image.deinit(); + + return dst_stream.close(); +} + +class memory_stream : public output_stream +{ + memory_stream(const memory_stream &); + memory_stream &operator= (const memory_stream &); + + uint8 *m_pBuf; + uint m_buf_size, m_buf_ofs; + +public: + memory_stream(void *pBuf, uint buf_size) : m_pBuf(static_cast(pBuf)), m_buf_size(buf_size), m_buf_ofs(0) { } + + virtual ~memory_stream() { } + + virtual bool put_buf(const void* pBuf, int len) + { + uint buf_remaining = m_buf_size - m_buf_ofs; + if ((uint)len > buf_remaining) + return false; + memcpy(m_pBuf + m_buf_ofs, pBuf, len); + m_buf_ofs += len; + return true; + } + + uint get_size() const + { + return m_buf_ofs; + } +}; + +bool compress_image_to_jpeg_file_in_memory(void *pDstBuf, int &buf_size, int width, int height, int num_channels, const uint8 *pImage_data, const params &comp_params) +{ + if ((!pDstBuf) || (!buf_size)) + return false; + + memory_stream dst_stream(pDstBuf, buf_size); + + buf_size = 0; + + jpge::jpeg_encoder dst_image; + if (!dst_image.init(&dst_stream, width, height, num_channels, comp_params)) + return false; + + for (uint pass_index = 0; pass_index < dst_image.get_total_passes(); pass_index++) + { + for (int i = 0; i < height; i++) + { + const uint8* pScanline = pImage_data + i * width * num_channels; + if (!dst_image.process_scanline(pScanline)) + return false; + } + if (!dst_image.process_scanline(NULL)) + return false; + } + + dst_image.deinit(); + + buf_size = dst_stream.get_size(); + return true; +} + +} // namespace jpge diff --git a/libs/jpeg/jpge.h b/libs/jpeg/jpge.h new file mode 100644 index 0000000..2052eec --- /dev/null +++ b/libs/jpeg/jpge.h @@ -0,0 +1,169 @@ +// jpge.h - C++ class for JPEG compression. +// Public domain, Rich Geldreich +// Alex Evans: Added RGBA support, linear memory allocator. +#ifndef JPEG_ENCODER_H +#define JPEG_ENCODER_H + +namespace jpge +{ + typedef unsigned char uint8; + typedef signed short int16; + typedef signed int int32; + typedef unsigned short uint16; + typedef unsigned int uint32; + typedef unsigned int uint; + + // JPEG chroma subsampling factors. Y_ONLY (grayscale images) and H2V2 (color images) are the most common. + enum subsampling_t { Y_ONLY = 0, H1V1 = 1, H2V1 = 2, H2V2 = 3 }; + + // JPEG compression parameters structure. + struct params + { + inline params() : m_quality(85), m_subsampling(H2V2), m_no_chroma_discrim_flag(false), m_two_pass_flag(false) { } + + inline bool check() const + { + if ((m_quality < 1) || (m_quality > 100)) return false; + if ((uint)m_subsampling > (uint)H2V2) return false; + return true; + } + + // Quality: 1-100, higher is better. Typical values are around 50-95. + int m_quality; + + // m_subsampling: + // 0 = Y (grayscale) only + // 1 = YCbCr, no subsampling (H1V1, YCbCr 1x1x1, 3 blocks per MCU) + // 2 = YCbCr, H2V1 subsampling (YCbCr 2x1x1, 4 blocks per MCU) + // 3 = YCbCr, H2V2 subsampling (YCbCr 4x1x1, 6 blocks per MCU-- very common) + subsampling_t m_subsampling; + + // Disables CbCr discrimination - only intended for testing. + // If true, the Y quantization table is also used for the CbCr channels. + bool m_no_chroma_discrim_flag; + + bool m_two_pass_flag; + }; + + // Writes JPEG image to a file. + // num_channels must be 1 (Y) or 3 (RGB), image pitch must be width*num_channels. + bool compress_image_to_jpeg_file(const char *pFilename, int width, int height, int num_channels, const uint8 *pImage_data, const params &comp_params = params()); + + // Writes JPEG image to memory buffer. + // On entry, buf_size is the size of the output buffer pointed at by pBuf, which should be at least ~1024 bytes. + // If return value is true, buf_size will be set to the size of the compressed data. + bool compress_image_to_jpeg_file_in_memory(void *pBuf, int &buf_size, int width, int height, int num_channels, const uint8 *pImage_data, const params &comp_params = params()); + + // Output stream abstract class - used by the jpeg_encoder class to write to the output stream. + // put_buf() is generally called with len==JPGE_OUT_BUF_SIZE bytes, but for headers it'll be called with smaller amounts. + class output_stream + { + public: + virtual ~output_stream() { }; + virtual bool put_buf(const void* Pbuf, int len) = 0; + template inline bool put_obj(const T& obj) { return put_buf(&obj, sizeof(T)); } + }; + + // Lower level jpeg_encoder class - useful if more control is needed than the above helper functions. + class jpeg_encoder + { + public: + jpeg_encoder(); + ~jpeg_encoder(); + + // Initializes the compressor. + // pStream: The stream object to use for writing compressed data. + // params - Compression parameters structure, defined above. + // width, height - Image dimensions. + // channels - May be 1, or 3. 1 indicates grayscale, 3 indicates RGB source data. + // Returns false on out of memory or if a stream write fails. + bool init(output_stream *pStream, int width, int height, int src_channels, const params &comp_params = params()); + + const params &get_params() const { return m_params; } + + // Deinitializes the compressor, freeing any allocated memory. May be called at any time. + void deinit(); + + uint get_total_passes() const { return m_params.m_two_pass_flag ? 2 : 1; } + inline uint get_cur_pass() { return m_pass_num; } + + // Call this method with each source scanline. + // width * src_channels bytes per scanline is expected (RGB or Y format). + // You must call with NULL after all scanlines are processed to finish compression. + // Returns false on out of memory or if a stream write fails. + bool process_scanline(const void* pScanline); + + private: + jpeg_encoder(const jpeg_encoder &); + jpeg_encoder &operator =(const jpeg_encoder &); + + typedef int32 sample_array_t; + + output_stream *m_pStream; + params m_params; + uint8 m_num_components; + uint8 m_comp_h_samp[3], m_comp_v_samp[3]; + int m_image_x, m_image_y, m_image_bpp, m_image_bpl; + int m_image_x_mcu, m_image_y_mcu; + int m_image_bpl_xlt, m_image_bpl_mcu; + int m_mcus_per_row; + int m_mcu_x, m_mcu_y; + uint8 *m_mcu_lines[16]; + uint8 m_mcu_y_ofs; + sample_array_t m_sample_array[64]; + int16 m_coefficient_array[64]; + int32 m_quantization_tables[2][64]; + uint m_huff_codes[4][256]; + uint8 m_huff_code_sizes[4][256]; + uint8 m_huff_bits[4][17]; + uint8 m_huff_val[4][256]; + uint32 m_huff_count[4][256]; + int m_last_dc_val[3]; + enum { JPGE_OUT_BUF_SIZE = 2048 }; + uint8 m_out_buf[JPGE_OUT_BUF_SIZE]; + uint8 *m_pOut_buf; + uint m_out_buf_left; + uint32 m_bit_buffer; + uint m_bits_in; + uint8 m_pass_num; + bool m_all_stream_writes_succeeded; + + void optimize_huffman_table(int table_num, int table_len); + void emit_byte(uint8 i); + void emit_word(uint i); + void emit_marker(int marker); + void emit_jfif_app0(); + void emit_dqt(); + void emit_sof(); + void emit_dht(uint8 *bits, uint8 *val, int index, bool ac_flag); + void emit_dhts(); + void emit_sos(); + void emit_markers(); + void compute_huffman_table(uint *codes, uint8 *code_sizes, uint8 *bits, uint8 *val); + void compute_quant_table(int32 *dst, int16 *src); + void adjust_quant_table(int32 *dst, int32 *src); + void first_pass_init(); + bool second_pass_init(); + bool jpg_open(int p_x_res, int p_y_res, int src_channels); + void load_block_8_8_grey(int x); + void load_block_8_8(int x, int y, int c); + void load_block_16_8(int x, int c); + void load_block_16_8_8(int x, int c); + void load_quantized_coefficients(int component_num); + void flush_output_buffer(); + void put_bits(uint bits, uint len); + void code_coefficients_pass_one(int component_num); + void code_coefficients_pass_two(int component_num); + void code_block(int component_num); + void process_mcu_row(); + bool terminate_pass_one(); + bool terminate_pass_two(); + bool process_end_of_image(); + void load_mcu(const void* src); + void clear(); + void init(); + }; + +} // namespace jpge + +#endif // JPEG_ENCODER diff --git a/libs/jpeg/jpge.sln b/libs/jpeg/jpge.sln new file mode 100644 index 0000000..2c79f69 --- /dev/null +++ b/libs/jpeg/jpge.sln @@ -0,0 +1,26 @@ + +Microsoft Visual Studio Solution File, Format Version 10.00 +# Visual Studio 2008 +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "jpge", "jpge.vcproj", "{DE273522-92D8-4B60-95C7-C3AEE10A303E}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|Win32 = Debug|Win32 + Debug|x64 = Debug|x64 + Release|Win32 = Release|Win32 + Release|x64 = Release|x64 + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {DE273522-92D8-4B60-95C7-C3AEE10A303E}.Debug|Win32.ActiveCfg = Debug|Win32 + {DE273522-92D8-4B60-95C7-C3AEE10A303E}.Debug|Win32.Build.0 = Debug|Win32 + {DE273522-92D8-4B60-95C7-C3AEE10A303E}.Debug|x64.ActiveCfg = Debug|x64 + {DE273522-92D8-4B60-95C7-C3AEE10A303E}.Debug|x64.Build.0 = Debug|x64 + {DE273522-92D8-4B60-95C7-C3AEE10A303E}.Release|Win32.ActiveCfg = Release|Win32 + {DE273522-92D8-4B60-95C7-C3AEE10A303E}.Release|Win32.Build.0 = Release|Win32 + {DE273522-92D8-4B60-95C7-C3AEE10A303E}.Release|x64.ActiveCfg = Release|x64 + {DE273522-92D8-4B60-95C7-C3AEE10A303E}.Release|x64.Build.0 = Release|x64 + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection +EndGlobal diff --git a/libs/jpeg/jpge.vcproj b/libs/jpeg/jpge.vcproj new file mode 100644 index 0000000..1d5dea5 --- /dev/null +++ b/libs/jpeg/jpge.vcproj @@ -0,0 +1,376 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/libs/jpeg/jpge.workspace b/libs/jpeg/jpge.workspace new file mode 100644 index 0000000..a200a07 --- /dev/null +++ b/libs/jpeg/jpge.workspace @@ -0,0 +1,6 @@ + + + + + + diff --git a/libs/jpeg/stb_image.c b/libs/jpeg/stb_image.c new file mode 100644 index 0000000..984e3f0 --- /dev/null +++ b/libs/jpeg/stb_image.c @@ -0,0 +1,5055 @@ +/* stbi-1.29 - public domain JPEG/PNG reader - http://nothings.org/stb_image.c + when you control the images you're loading + no warranty implied; use at your own risk + + QUICK NOTES: + Primarily of interest to game developers and other people who can + avoid problematic images and only need the trivial interface + + JPEG baseline (no JPEG progressive) + PNG 8-bit only + + TGA (not sure what subset, if a subset) + BMP non-1bpp, non-RLE + PSD (composited view only, no extra channels) + + GIF (*comp always reports as 4-channel) + HDR (radiance rgbE format) + PIC (Softimage PIC) + + - decoded from memory or through stdio FILE (define STBI_NO_STDIO to remove code) + - supports installable dequantizing-IDCT, YCbCr-to-RGB conversion (define STBI_SIMD) + + Latest revisions: + 1.29 (2010-08-16) various warning fixes from Aurelien Pocheville + 1.28 (2010-08-01) fix bug in GIF palette transparency (SpartanJ) + 1.27 (2010-08-01) cast-to-uint8 to fix warnings (Laurent Gomila) + allow trailing 0s at end of image data (Laurent Gomila) + 1.26 (2010-07-24) fix bug in file buffering for PNG reported by SpartanJ + 1.25 (2010-07-17) refix trans_data warning (Won Chun) + 1.24 (2010-07-12) perf improvements reading from files + minor perf improvements for jpeg + deprecated type-specific functions in hope of feedback + attempt to fix trans_data warning (Won Chun) + 1.23 fixed bug in iPhone support + 1.22 (2010-07-10) removed image *writing* support to stb_image_write.h + stbi_info support from Jetro Lauha + GIF support from Jean-Marc Lienher + iPhone PNG-extensions from James Brown + warning-fixes from Nicolas Schulz and Janez Zemva + 1.21 fix use of 'uint8' in header (reported by jon blow) + 1.20 added support for Softimage PIC, by Tom Seddon + + See end of file for full revision history. + + TODO: + stbi_info support for BMP,PSD,HDR,PIC + rewrite stbi_info and load_file variations to share file handling code + (current system allows individual functions to be called directly, + since each does all the work, but I doubt anyone uses this in practice) + + + ============================ Contributors ========================= + + Image formats Optimizations & bugfixes + Sean Barrett (jpeg, png, bmp) Fabian "ryg" Giesen + Nicolas Schulz (hdr, psd) + Jonathan Dummer (tga) Bug fixes & warning fixes + Jean-Marc Lienher (gif) Marc LeBlanc + Tom Seddon (pic) Christpher Lloyd + Thatcher Ulrich (psd) Dave Moore + Won Chun + the Horde3D community + Extensions, features Janez Zemva + Jetro Lauha (stbi_info) Jonathan Blow + James "moose2000" Brown (iPhone PNG) Laurent Gomila + Aruelien Pocheville + + If your name should be here but isn't, let Sean know. + +*/ + +#ifndef STBI_INCLUDE_STB_IMAGE_H +#define STBI_INCLUDE_STB_IMAGE_H + +// To get a header file for this, either cut and paste the header, +// or create stb_image.h, #define STBI_HEADER_FILE_ONLY, and +// then include stb_image.c from it. + +//// begin header file //////////////////////////////////////////////////// +// +// Limitations: +// - no jpeg progressive support +// - non-HDR formats support 8-bit samples only (jpeg, png) +// - no delayed line count (jpeg) -- IJG doesn't support either +// - no 1-bit BMP +// - GIF always returns *comp=4 +// +// Basic usage (see HDR discussion below): +// int x,y,n; +// unsigned char *data = stbi_load(filename, &x, &y, &n, 0); +// // ... process data if not NULL ... +// // ... x = width, y = height, n = # 8-bit components per pixel ... +// // ... replace '0' with '1'..'4' to force that many components per pixel +// stbi_image_free(data) +// +// Standard parameters: +// int *x -- outputs image width in pixels +// int *y -- outputs image height in pixels +// int *comp -- outputs # of image components in image file +// int req_comp -- if non-zero, # of image components requested in result +// +// The return value from an image loader is an 'unsigned char *' which points +// to the pixel data. The pixel data consists of *y scanlines of *x pixels, +// with each pixel consisting of N interleaved 8-bit components; the first +// pixel pointed to is top-left-most in the image. There is no padding between +// image scanlines or between pixels, regardless of format. The number of +// components N is 'req_comp' if req_comp is non-zero, or *comp otherwise. +// If req_comp is non-zero, *comp has the number of components that _would_ +// have been output otherwise. E.g. if you set req_comp to 4, you will always +// get RGBA output, but you can check *comp to easily see if it's opaque. +// +// An output image with N components has the following components interleaved +// in this order in each pixel: +// +// N=#comp components +// 1 grey +// 2 grey, alpha +// 3 red, green, blue +// 4 red, green, blue, alpha +// +// If image loading fails for any reason, the return value will be NULL, +// and *x, *y, *comp will be unchanged. The function stbi_failure_reason() +// can be queried for an extremely brief, end-user unfriendly explanation +// of why the load failed. Define STBI_NO_FAILURE_STRINGS to avoid +// compiling these strings at all, and STBI_FAILURE_USERMSG to get slightly +// more user-friendly ones. +// +// Paletted PNG, BMP, GIF, and PIC images are automatically depalettized. +// +// =========================================================================== +// +// iPhone PNG support: +// +// By default we convert iphone-formatted PNGs back to RGB; nominally they +// would silently load as BGR, except the existing code should have just +// failed on such iPhone PNGs. But you can disable this conversion by +// by calling stbi_convert_iphone_png_to_rgb(0), in which case +// you will always just get the native iphone "format" through. +// +// Call stbi_set_unpremultiply_on_load(1) as well to force a divide per +// pixel to remove any premultiplied alpha *only* if the image file explicitly +// says there's premultiplied data (currently only happens in iPhone images, +// and only if iPhone convert-to-rgb processing is on). +// +// =========================================================================== +// +// HDR image support (disable by defining STBI_NO_HDR) +// +// stb_image now supports loading HDR images in general, and currently +// the Radiance .HDR file format, although the support is provided +// generically. You can still load any file through the existing interface; +// if you attempt to load an HDR file, it will be automatically remapped to +// LDR, assuming gamma 2.2 and an arbitrary scale factor defaulting to 1; +// both of these constants can be reconfigured through this interface: +// +// stbi_hdr_to_ldr_gamma(2.2f); +// stbi_hdr_to_ldr_scale(1.0f); +// +// (note, do not use _inverse_ constants; stbi_image will invert them +// appropriately). +// +// Additionally, there is a new, parallel interface for loading files as +// (linear) floats to preserve the full dynamic range: +// +// float *data = stbi_loadf(filename, &x, &y, &n, 0); +// +// If you load LDR images through this interface, those images will +// be promoted to floating point values, run through the inverse of +// constants corresponding to the above: +// +// stbi_ldr_to_hdr_scale(1.0f); +// stbi_ldr_to_hdr_gamma(2.2f); +// +// Finally, given a filename (or an open file or memory block--see header +// file for details) containing image data, you can query for the "most +// appropriate" interface to use (that is, whether the image is HDR or +// not), using: +// +// stbi_is_hdr(char *filename); + +#ifndef STBI_NO_STDIO +#include +#endif + +#define STBI_VERSION 1 + +enum +{ + STBI_default = 0, // only used for req_comp + + STBI_grey = 1, + STBI_grey_alpha = 2, + STBI_rgb = 3, + STBI_rgb_alpha = 4 +}; + +typedef unsigned char stbi_uc; + +#ifdef __cplusplus +extern "C" { +#endif + +// PRIMARY API - works on images of any type + +// load image by filename, open file, or memory buffer +extern stbi_uc *stbi_load_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp); + +#ifndef STBI_NO_STDIO +extern stbi_uc *stbi_load (char const *filename, int *x, int *y, int *comp, int req_comp); +extern stbi_uc *stbi_load_from_file (FILE *f, int *x, int *y, int *comp, int req_comp); +// for stbi_load_from_file, file pointer is left pointing immediately after image +#endif + +#ifndef STBI_NO_HDR + extern float *stbi_loadf_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp); + + #ifndef STBI_NO_STDIO + extern float *stbi_loadf (char const *filename, int *x, int *y, int *comp, int req_comp); + extern float *stbi_loadf_from_file (FILE *f, int *x, int *y, int *comp, int req_comp); + #endif + + extern void stbi_hdr_to_ldr_gamma(float gamma); + extern void stbi_hdr_to_ldr_scale(float scale); + + extern void stbi_ldr_to_hdr_gamma(float gamma); + extern void stbi_ldr_to_hdr_scale(float scale); +#endif // STBI_NO_HDR + +// get a VERY brief reason for failure +// NOT THREADSAFE +extern const char *stbi_failure_reason (void); + +// free the loaded image -- this is just free() +extern void stbi_image_free (void *retval_from_stbi_load); + +// get image dimensions & components without fully decoding +extern int stbi_info_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp); +extern int stbi_is_hdr_from_memory(stbi_uc const *buffer, int len); + +#ifndef STBI_NO_STDIO +extern int stbi_info (char const *filename, int *x, int *y, int *comp); +extern int stbi_info_from_file (FILE *f, int *x, int *y, int *comp); + +extern int stbi_is_hdr (char const *filename); +extern int stbi_is_hdr_from_file(FILE *f); +#endif + +// for image formats that explicitly notate that they have premultiplied alpha, +// we just return the colors as stored in the file. set this flag to force +// unpremultiplication. results are undefined if the unpremultiply overflow. +extern void stbi_set_unpremultiply_on_load(int flag_true_if_should_unpremultiply); + +// indicate whether we should process iphone images back to canonical format, +// or just pass them through "as-is" +extern void stbi_convert_iphone_png_to_rgb(int flag_true_if_should_convert); + + +// ZLIB client - used by PNG, available for other purposes + +extern char *stbi_zlib_decode_malloc_guesssize(const char *buffer, int len, int initial_size, int *outlen); +extern char *stbi_zlib_decode_malloc(const char *buffer, int len, int *outlen); +extern int stbi_zlib_decode_buffer(char *obuffer, int olen, const char *ibuffer, int ilen); + +extern char *stbi_zlib_decode_noheader_malloc(const char *buffer, int len, int *outlen); +extern int stbi_zlib_decode_noheader_buffer(char *obuffer, int olen, const char *ibuffer, int ilen); + +// define new loaders +typedef struct +{ + int (*test_memory)(stbi_uc const *buffer, int len); + stbi_uc * (*load_from_memory)(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp); + #ifndef STBI_NO_STDIO + int (*test_file)(FILE *f); + stbi_uc * (*load_from_file)(FILE *f, int *x, int *y, int *comp, int req_comp); + #endif +} stbi_loader; + +// register a loader by filling out the above structure (you must define ALL functions) +// returns 1 if added or already added, 0 if not added (too many loaders) +// NOT THREADSAFE +extern int stbi_register_loader(stbi_loader *loader); + +// define faster low-level operations (typically SIMD support) +#ifdef STBI_SIMD +typedef void (*stbi_idct_8x8)(stbi_uc *out, int out_stride, short data[64], unsigned short *dequantize); +// compute an integer IDCT on "input" +// input[x] = data[x] * dequantize[x] +// write results to 'out': 64 samples, each run of 8 spaced by 'out_stride' +// CLAMP results to 0..255 +typedef void (*stbi_YCbCr_to_RGB_run)(stbi_uc *output, stbi_uc const *y, stbi_uc const *cb, stbi_uc const *cr, int count, int step); +// compute a conversion from YCbCr to RGB +// 'count' pixels +// write pixels to 'output'; each pixel is 'step' bytes (either 3 or 4; if 4, write '255' as 4th), order R,G,B +// y: Y input channel +// cb: Cb input channel; scale/biased to be 0..255 +// cr: Cr input channel; scale/biased to be 0..255 + +extern void stbi_install_idct(stbi_idct_8x8 func); +extern void stbi_install_YCbCr_to_RGB(stbi_YCbCr_to_RGB_run func); +#endif // STBI_SIMD + + + + +// TYPE-SPECIFIC ACCESS + +#ifdef STBI_TYPE_SPECIFIC_FUNCTIONS + +// is it a jpeg? +extern int stbi_jpeg_test_memory (stbi_uc const *buffer, int len); +extern stbi_uc *stbi_jpeg_load_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp); +extern int stbi_jpeg_info_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp); + +#ifndef STBI_NO_STDIO +extern stbi_uc *stbi_jpeg_load (char const *filename, int *x, int *y, int *comp, int req_comp); +extern int stbi_jpeg_test_file (FILE *f); +extern stbi_uc *stbi_jpeg_load_from_file (FILE *f, int *x, int *y, int *comp, int req_comp); + +extern int stbi_jpeg_info (char const *filename, int *x, int *y, int *comp); +extern int stbi_jpeg_info_from_file (FILE *f, int *x, int *y, int *comp); +#endif + +// is it a png? +extern int stbi_png_test_memory (stbi_uc const *buffer, int len); +extern stbi_uc *stbi_png_load_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp); +extern int stbi_png_info_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *comp); + +#ifndef STBI_NO_STDIO +extern stbi_uc *stbi_png_load (char const *filename, int *x, int *y, int *comp, int req_comp); +extern int stbi_png_info (char const *filename, int *x, int *y, int *comp); +extern int stbi_png_test_file (FILE *f); +extern stbi_uc *stbi_png_load_from_file (FILE *f, int *x, int *y, int *comp, int req_comp); +extern int stbi_png_info_from_file (FILE *f, int *x, int *y, int *comp); +#endif + +// is it a bmp? +extern int stbi_bmp_test_memory (stbi_uc const *buffer, int len); + +extern stbi_uc *stbi_bmp_load (char const *filename, int *x, int *y, int *comp, int req_comp); +extern stbi_uc *stbi_bmp_load_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp); +#ifndef STBI_NO_STDIO +extern int stbi_bmp_test_file (FILE *f); +extern stbi_uc *stbi_bmp_load_from_file (FILE *f, int *x, int *y, int *comp, int req_comp); +#endif + +// is it a tga? +extern int stbi_tga_test_memory (stbi_uc const *buffer, int len); + +extern stbi_uc *stbi_tga_load (char const *filename, int *x, int *y, int *comp, int req_comp); +extern stbi_uc *stbi_tga_load_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp); +#ifndef STBI_NO_STDIO +extern int stbi_tga_test_file (FILE *f); +extern stbi_uc *stbi_tga_load_from_file (FILE *f, int *x, int *y, int *comp, int req_comp); +#endif + +// is it a psd? +extern int stbi_psd_test_memory (stbi_uc const *buffer, int len); + +extern stbi_uc *stbi_psd_load (char const *filename, int *x, int *y, int *comp, int req_comp); +extern stbi_uc *stbi_psd_load_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp); +#ifndef STBI_NO_STDIO +extern int stbi_psd_test_file (FILE *f); +extern stbi_uc *stbi_psd_load_from_file (FILE *f, int *x, int *y, int *comp, int req_comp); +#endif + +// is it an hdr? +extern int stbi_hdr_test_memory (stbi_uc const *buffer, int len); + +extern float * stbi_hdr_load (char const *filename, int *x, int *y, int *comp, int req_comp); +extern float * stbi_hdr_load_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp); +#ifndef STBI_NO_STDIO +extern int stbi_hdr_test_file (FILE *f); +extern float * stbi_hdr_load_from_file (FILE *f, int *x, int *y, int *comp, int req_comp); +#endif + +// is it a pic? +extern int stbi_pic_test_memory (stbi_uc const *buffer, int len); + +extern stbi_uc *stbi_pic_load (char const *filename, int *x, int *y, int *comp, int req_comp); +extern stbi_uc *stbi_pic_load_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp); +#ifndef STBI_NO_STDIO +extern int stbi_pic_test_file (FILE *f); +extern stbi_uc *stbi_pic_load_from_file (FILE *f, int *x, int *y, int *comp, int req_comp); +#endif + +// is it a gif? +extern int stbi_gif_test_memory (stbi_uc const *buffer, int len); + +extern stbi_uc *stbi_gif_load (char const *filename, int *x, int *y, int *comp, int req_comp); +extern stbi_uc *stbi_gif_load_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp); +extern int stbi_gif_info_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *comp); + +#ifndef STBI_NO_STDIO +extern int stbi_gif_test_file (FILE *f); +extern stbi_uc *stbi_gif_load_from_file (FILE *f, int *x, int *y, int *comp, int req_comp); +extern int stbi_gif_info (char const *filename, int *x, int *y, int *comp); +extern int stbi_gif_info_from_file (FILE *f, int *x, int *y, int *comp); +#endif + +#endif//STBI_TYPE_SPECIFIC_FUNCTIONS + + + + +#ifdef __cplusplus +} +#endif + +// +// +//// end header file ///////////////////////////////////////////////////// +#endif // STBI_INCLUDE_STB_IMAGE_H + +#ifndef STBI_HEADER_FILE_ONLY + +#ifndef STBI_NO_HDR +#include // ldexp +#include // strcmp +#endif + +#ifndef STBI_NO_STDIO +#include +#endif +#include +#include +#include +#include + +#if !defined(_MSC_VER) && !defined(__forceinline) + #ifdef __cplusplus + #define __forceinline inline + #else + #define __forceinline + #endif +#endif + + +// implementation: +typedef unsigned char uint8; +typedef unsigned short uint16; +typedef signed short int16; +typedef unsigned int uint32; +typedef signed int int32; +typedef unsigned int uint; + +// should produce compiler error if size is wrong +typedef unsigned char validate_uint32[sizeof(uint32)==4 ? 1 : -1]; + +#if defined(STBI_NO_STDIO) && !defined(STBI_NO_WRITE) +#define STBI_NO_WRITE +#endif + +#define STBI_NOTUSED(v) v=v + +#ifdef _MSC_VER +#define STBI_HAS_LRTOL +#endif + +#ifdef STBI_HAS_LRTOL + #define stbi_lrot(x,y) _lrotl(x,y) +#else + #define stbi_lrot(x,y) (((x) << (y)) | ((x) >> (32 - (y)))) +#endif + +////////////////////////////////////////////////////////////////////////////// +// +// Generic API that works on all image types +// + +// deprecated functions + +// is it a jpeg? +extern int stbi_jpeg_test_memory (stbi_uc const *buffer, int len); +extern stbi_uc *stbi_jpeg_load_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp); +extern int stbi_jpeg_info_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp); + +#ifndef STBI_NO_STDIO +extern stbi_uc *stbi_jpeg_load (char const *filename, int *x, int *y, int *comp, int req_comp); +extern int stbi_jpeg_test_file (FILE *f); +extern stbi_uc *stbi_jpeg_load_from_file (FILE *f, int *x, int *y, int *comp, int req_comp); + +extern int stbi_jpeg_info (char const *filename, int *x, int *y, int *comp); +extern int stbi_jpeg_info_from_file (FILE *f, int *x, int *y, int *comp); +#endif + +// is it a png? +extern int stbi_png_test_memory (stbi_uc const *buffer, int len); +extern stbi_uc *stbi_png_load_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp); +extern int stbi_png_info_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *comp); + +#ifndef STBI_NO_STDIO +extern stbi_uc *stbi_png_load (char const *filename, int *x, int *y, int *comp, int req_comp); +extern int stbi_png_info (char const *filename, int *x, int *y, int *comp); +extern int stbi_png_test_file (FILE *f); +extern stbi_uc *stbi_png_load_from_file (FILE *f, int *x, int *y, int *comp, int req_comp); +extern int stbi_png_info_from_file (FILE *f, int *x, int *y, int *comp); +#endif + +// is it a bmp? +extern int stbi_bmp_test_memory (stbi_uc const *buffer, int len); + +extern stbi_uc *stbi_bmp_load (char const *filename, int *x, int *y, int *comp, int req_comp); +extern stbi_uc *stbi_bmp_load_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp); +#ifndef STBI_NO_STDIO +extern int stbi_bmp_test_file (FILE *f); +extern stbi_uc *stbi_bmp_load_from_file (FILE *f, int *x, int *y, int *comp, int req_comp); +#endif + +// is it a tga? +extern int stbi_tga_test_memory (stbi_uc const *buffer, int len); + +extern stbi_uc *stbi_tga_load (char const *filename, int *x, int *y, int *comp, int req_comp); +extern stbi_uc *stbi_tga_load_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp); +#ifndef STBI_NO_STDIO +extern int stbi_tga_test_file (FILE *f); +extern stbi_uc *stbi_tga_load_from_file (FILE *f, int *x, int *y, int *comp, int req_comp); +#endif + +// is it a psd? +extern int stbi_psd_test_memory (stbi_uc const *buffer, int len); + +extern stbi_uc *stbi_psd_load (char const *filename, int *x, int *y, int *comp, int req_comp); +extern stbi_uc *stbi_psd_load_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp); +#ifndef STBI_NO_STDIO +extern int stbi_psd_test_file (FILE *f); +extern stbi_uc *stbi_psd_load_from_file (FILE *f, int *x, int *y, int *comp, int req_comp); +#endif + +// is it an hdr? +extern int stbi_hdr_test_memory (stbi_uc const *buffer, int len); + +extern float * stbi_hdr_load (char const *filename, int *x, int *y, int *comp, int req_comp); +extern float * stbi_hdr_load_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp); +#ifndef STBI_NO_STDIO +extern int stbi_hdr_test_file (FILE *f); +extern float * stbi_hdr_load_from_file (FILE *f, int *x, int *y, int *comp, int req_comp); +#endif + +// is it a pic? +extern int stbi_pic_test_memory (stbi_uc const *buffer, int len); + +extern stbi_uc *stbi_pic_load (char const *filename, int *x, int *y, int *comp, int req_comp); +extern stbi_uc *stbi_pic_load_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp); +#ifndef STBI_NO_STDIO +extern int stbi_pic_test_file (FILE *f); +extern stbi_uc *stbi_pic_load_from_file (FILE *f, int *x, int *y, int *comp, int req_comp); +#endif + +// is it a gif? +extern int stbi_gif_test_memory (stbi_uc const *buffer, int len); + +extern stbi_uc *stbi_gif_load (char const *filename, int *x, int *y, int *comp, int req_comp); +extern stbi_uc *stbi_gif_load_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp); +extern int stbi_gif_info_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *comp); + +#ifndef STBI_NO_STDIO +extern int stbi_gif_test_file (FILE *f); +extern stbi_uc *stbi_gif_load_from_file (FILE *f, int *x, int *y, int *comp, int req_comp); +extern int stbi_gif_info (char const *filename, int *x, int *y, int *comp); +extern int stbi_gif_info_from_file (FILE *f, int *x, int *y, int *comp); +#endif + + +// this is not threadsafe +static const char *failure_reason; + +const char *stbi_failure_reason(void) +{ + return failure_reason; +} + +static int e(const char *str) +{ + failure_reason = str; + return 0; +} + +#ifdef STBI_NO_FAILURE_STRINGS + #define e(x,y) 0 +#elif defined(STBI_FAILURE_USERMSG) + #define e(x,y) e(y) +#else + #define e(x,y) e(x) +#endif + +#define epf(x,y) ((float *) (e(x,y)?NULL:NULL)) +#define epuc(x,y) ((unsigned char *) (e(x,y)?NULL:NULL)) + +void stbi_image_free(void *retval_from_stbi_load) +{ + free(retval_from_stbi_load); +} + +#define MAX_LOADERS 32 +stbi_loader *loaders[MAX_LOADERS]; +static int max_loaders = 0; + +int stbi_register_loader(stbi_loader *loader) +{ + int i; + for (i=0; i < MAX_LOADERS; ++i) { + // already present? + if (loaders[i] == loader) + return 1; + // end of the list? + if (loaders[i] == NULL) { + loaders[i] = loader; + max_loaders = i+1; + return 1; + } + } + // no room for it + return 0; +} + +#ifndef STBI_NO_HDR +static float *ldr_to_hdr(stbi_uc *data, int x, int y, int comp); +static stbi_uc *hdr_to_ldr(float *data, int x, int y, int comp); +#endif + +#ifndef STBI_NO_STDIO +unsigned char *stbi_load(char const *filename, int *x, int *y, int *comp, int req_comp) +{ + FILE *f = fopen(filename, "rb"); + unsigned char *result; + if (!f) return epuc("can't fopen", "Unable to open file"); + result = stbi_load_from_file(f,x,y,comp,req_comp); + fclose(f); + return result; +} + +unsigned char *stbi_load_from_file(FILE *f, int *x, int *y, int *comp, int req_comp) +{ + int i; + if (stbi_jpeg_test_file(f)) return stbi_jpeg_load_from_file(f,x,y,comp,req_comp); + if (stbi_png_test_file(f)) return stbi_png_load_from_file(f,x,y,comp,req_comp); + if (stbi_bmp_test_file(f)) return stbi_bmp_load_from_file(f,x,y,comp,req_comp); + if (stbi_gif_test_file(f)) return stbi_gif_load_from_file(f,x,y,comp,req_comp); + if (stbi_psd_test_file(f)) return stbi_psd_load_from_file(f,x,y,comp,req_comp); + if (stbi_pic_test_file(f)) return stbi_pic_load_from_file(f,x,y,comp,req_comp); + + #ifndef STBI_NO_HDR + if (stbi_hdr_test_file(f)) { + float *hdr = stbi_hdr_load_from_file(f, x,y,comp,req_comp); + return hdr_to_ldr(hdr, *x, *y, req_comp ? req_comp : *comp); + } + #endif + + for (i=0; i < max_loaders; ++i) + if (loaders[i]->test_file(f)) + return loaders[i]->load_from_file(f,x,y,comp,req_comp); + // test tga last because it's a crappy test! + if (stbi_tga_test_file(f)) + return stbi_tga_load_from_file(f,x,y,comp,req_comp); + return epuc("unknown image type", "Image not of any known type, or corrupt"); +} +#endif + +unsigned char *stbi_load_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp) +{ + int i; + if (stbi_jpeg_test_memory(buffer,len)) return stbi_jpeg_load_from_memory(buffer,len,x,y,comp,req_comp); + if (stbi_png_test_memory(buffer,len)) return stbi_png_load_from_memory(buffer,len,x,y,comp,req_comp); + if (stbi_bmp_test_memory(buffer,len)) return stbi_bmp_load_from_memory(buffer,len,x,y,comp,req_comp); + if (stbi_gif_test_memory(buffer,len)) return stbi_gif_load_from_memory(buffer,len,x,y,comp,req_comp); + if (stbi_psd_test_memory(buffer,len)) return stbi_psd_load_from_memory(buffer,len,x,y,comp,req_comp); + if (stbi_pic_test_memory(buffer,len)) return stbi_pic_load_from_memory(buffer,len,x,y,comp,req_comp); + + #ifndef STBI_NO_HDR + if (stbi_hdr_test_memory(buffer, len)) { + float *hdr = stbi_hdr_load_from_memory(buffer, len,x,y,comp,req_comp); + return hdr_to_ldr(hdr, *x, *y, req_comp ? req_comp : *comp); + } + #endif + + for (i=0; i < max_loaders; ++i) + if (loaders[i]->test_memory(buffer,len)) + return loaders[i]->load_from_memory(buffer,len,x,y,comp,req_comp); + // test tga last because it's a crappy test! + if (stbi_tga_test_memory(buffer,len)) + return stbi_tga_load_from_memory(buffer,len,x,y,comp,req_comp); + return epuc("unknown image type", "Image not of any known type, or corrupt"); +} + +#ifndef STBI_NO_HDR + +#ifndef STBI_NO_STDIO +float *stbi_loadf(char const *filename, int *x, int *y, int *comp, int req_comp) +{ + FILE *f = fopen(filename, "rb"); + float *result; + if (!f) return epf("can't fopen", "Unable to open file"); + result = stbi_loadf_from_file(f,x,y,comp,req_comp); + fclose(f); + return result; +} + +float *stbi_loadf_from_file(FILE *f, int *x, int *y, int *comp, int req_comp) +{ + unsigned char *data; + #ifndef STBI_NO_HDR + if (stbi_hdr_test_file(f)) + return stbi_hdr_load_from_file(f,x,y,comp,req_comp); + #endif + data = stbi_load_from_file(f, x, y, comp, req_comp); + if (data) + return ldr_to_hdr(data, *x, *y, req_comp ? req_comp : *comp); + return epf("unknown image type", "Image not of any known type, or corrupt"); +} +#endif + +float *stbi_loadf_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp) +{ + stbi_uc *data; + #ifndef STBI_NO_HDR + if (stbi_hdr_test_memory(buffer, len)) + return stbi_hdr_load_from_memory(buffer, len,x,y,comp,req_comp); + #endif + data = stbi_load_from_memory(buffer, len, x, y, comp, req_comp); + if (data) + return ldr_to_hdr(data, *x, *y, req_comp ? req_comp : *comp); + return epf("unknown image type", "Image not of any known type, or corrupt"); +} +#endif + +// these is-hdr-or-not is defined independent of whether STBI_NO_HDR is +// defined, for API simplicity; if STBI_NO_HDR is defined, it always +// reports false! + +int stbi_is_hdr_from_memory(stbi_uc const *buffer, int len) +{ + #ifndef STBI_NO_HDR + return stbi_hdr_test_memory(buffer, len); + #else + STBI_NOTUSED(buffer); + STBI_NOTUSED(len); + return 0; + #endif +} + +#ifndef STBI_NO_STDIO +extern int stbi_is_hdr (char const *filename) +{ + FILE *f = fopen(filename, "rb"); + int result=0; + if (f) { + result = stbi_is_hdr_from_file(f); + fclose(f); + } + return result; +} + +extern int stbi_is_hdr_from_file(FILE *f) +{ + #ifndef STBI_NO_HDR + return stbi_hdr_test_file(f); + #else + return 0; + #endif +} + +#endif + +#ifndef STBI_NO_HDR +static float h2l_gamma_i=1.0f/2.2f, h2l_scale_i=1.0f; +static float l2h_gamma=2.2f, l2h_scale=1.0f; + +void stbi_hdr_to_ldr_gamma(float gamma) { h2l_gamma_i = 1/gamma; } +void stbi_hdr_to_ldr_scale(float scale) { h2l_scale_i = 1/scale; } + +void stbi_ldr_to_hdr_gamma(float gamma) { l2h_gamma = gamma; } +void stbi_ldr_to_hdr_scale(float scale) { l2h_scale = scale; } +#endif + + +////////////////////////////////////////////////////////////////////////////// +// +// Common code used by all image loaders +// + +enum +{ + SCAN_load=0, + SCAN_type, + SCAN_header +}; + +typedef struct +{ + uint32 img_x, img_y; + int img_n, img_out_n; + + #ifndef STBI_NO_STDIO + FILE *img_file; + int buflen; + uint8 buffer_start[128]; + int from_file; + #endif + uint8 *img_buffer, *img_buffer_end; +} stbi; + +#ifndef STBI_NO_STDIO +static void start_file(stbi *s, FILE *f) +{ + s->img_file = f; + s->buflen = sizeof(s->buffer_start); + s->img_buffer_end = s->buffer_start + s->buflen; + s->img_buffer = s->img_buffer_end; + s->from_file = 1; +} +#endif + +static void start_mem(stbi *s, uint8 const *buffer, int len) +{ +#ifndef STBI_NO_STDIO + s->img_file = NULL; + s->from_file = 0; +#endif + s->img_buffer = (uint8 *) buffer; + s->img_buffer_end = (uint8 *) buffer+len; +} + +#ifndef STBI_NO_STDIO +static void refill_buffer(stbi *s) +{ + int n = (int)fread(s->buffer_start, 1, s->buflen, s->img_file); + if (n == 0) { + s->from_file = 0; + s->img_buffer = s->img_buffer_end-1; + *s->img_buffer = 0; + } else { + s->img_buffer = s->buffer_start; + s->img_buffer_end = s->buffer_start + n; + } +} +#endif + +__forceinline static int get8(stbi *s) +{ + if (s->img_buffer < s->img_buffer_end) + return *s->img_buffer++; +#ifndef STBI_NO_STDIO + if (s->from_file) { + refill_buffer(s); + return *s->img_buffer++; + } +#endif + return 0; +} + +__forceinline static int at_eof(stbi *s) +{ +#ifndef STBI_NO_STDIO + if (s->img_file) { + if (!feof(s->img_file)) return 0; + // if feof() is true, check if buffer = end + // special case: we've only got the special 0 character at the end + if (s->from_file == 0) return 1; + } +#endif + return s->img_buffer >= s->img_buffer_end; +} + +__forceinline static uint8 get8u(stbi *s) +{ + return (uint8) get8(s); +} + +static void skip(stbi *s, int n) +{ +#ifndef STBI_NO_STDIO + if (s->img_file) { + int blen = (int)(s->img_buffer_end - s->img_buffer); + if (blen < n) { + s->img_buffer = s->img_buffer_end; + fseek(s->img_file, n - blen, SEEK_CUR); + return; + } + } +#endif + s->img_buffer += n; +} + +static int getn(stbi *s, stbi_uc *buffer, int n) +{ +#ifndef STBI_NO_STDIO + if (s->img_file) { + int blen = (int)(s->img_buffer_end - s->img_buffer); + if (blen < n) { + int res; + memcpy(buffer, s->img_buffer, blen); + res = ((int) fread(buffer + blen, 1, n - blen, s->img_file) == (n-blen)); + s->img_buffer = s->img_buffer_end; + return res; + } + } +#endif + if (s->img_buffer+n <= s->img_buffer_end) { + memcpy(buffer, s->img_buffer, n); + s->img_buffer += n; + return 1; + } else + return 0; +} + +static int get16(stbi *s) +{ + int z = get8(s); + return (z << 8) + get8(s); +} + +static uint32 get32(stbi *s) +{ + uint32 z = get16(s); + return (z << 16) + get16(s); +} + +static int get16le(stbi *s) +{ + int z = get8(s); + return z + (get8(s) << 8); +} + +static uint32 get32le(stbi *s) +{ + uint32 z = get16le(s); + return z + (get16le(s) << 16); +} + +////////////////////////////////////////////////////////////////////////////// +// +// generic converter from built-in img_n to req_comp +// individual types do this automatically as much as possible (e.g. jpeg +// does all cases internally since it needs to colorspace convert anyway, +// and it never has alpha, so very few cases ). png can automatically +// interleave an alpha=255 channel, but falls back to this for other cases +// +// assume data buffer is malloced, so malloc a new one and free that one +// only failure mode is malloc failing + +static uint8 compute_y(int r, int g, int b) +{ + return (uint8) (((r*77) + (g*150) + (29*b)) >> 8); +} + +static unsigned char *convert_format(unsigned char *data, int img_n, int req_comp, uint x, uint y) +{ + int i,j; + unsigned char *good; + + if (req_comp == img_n) return data; + assert(req_comp >= 1 && req_comp <= 4); + + good = (unsigned char *) malloc(req_comp * x * y); + if (good == NULL) { + free(data); + return epuc("outofmem", "Out of memory"); + } + + for (j=0; j < (int) y; ++j) { + unsigned char *src = data + j * x * img_n ; + unsigned char *dest = good + j * x * req_comp; + + #define COMBO(a,b) ((a)*8+(b)) + #define CASE(a,b) case COMBO(a,b): for(i=x-1; i >= 0; --i, src += a, dest += b) + // convert source image with img_n components to one with req_comp components; + // avoid switch per pixel, so use switch per scanline and massive macros + switch (COMBO(img_n, req_comp)) { + CASE(1,2) dest[0]=src[0], dest[1]=255; break; + CASE(1,3) dest[0]=dest[1]=dest[2]=src[0]; break; + CASE(1,4) dest[0]=dest[1]=dest[2]=src[0], dest[3]=255; break; + CASE(2,1) dest[0]=src[0]; break; + CASE(2,3) dest[0]=dest[1]=dest[2]=src[0]; break; + CASE(2,4) dest[0]=dest[1]=dest[2]=src[0], dest[3]=src[1]; break; + CASE(3,4) dest[0]=src[0],dest[1]=src[1],dest[2]=src[2],dest[3]=255; break; + CASE(3,1) dest[0]=compute_y(src[0],src[1],src[2]); break; + CASE(3,2) dest[0]=compute_y(src[0],src[1],src[2]), dest[1] = 255; break; + CASE(4,1) dest[0]=compute_y(src[0],src[1],src[2]); break; + CASE(4,2) dest[0]=compute_y(src[0],src[1],src[2]), dest[1] = src[3]; break; + CASE(4,3) dest[0]=src[0],dest[1]=src[1],dest[2]=src[2]; break; + default: assert(0); + } + #undef CASE + } + + free(data); + return good; +} + +#ifndef STBI_NO_HDR +static float *ldr_to_hdr(stbi_uc *data, int x, int y, int comp) +{ + int i,k,n; + float *output = (float *) malloc(x * y * comp * sizeof(float)); + if (output == NULL) { free(data); return epf("outofmem", "Out of memory"); } + // compute number of non-alpha components + if (comp & 1) n = comp; else n = comp-1; + for (i=0; i < x*y; ++i) { + for (k=0; k < n; ++k) { + output[i*comp + k] = (float) pow(data[i*comp+k]/255.0f, l2h_gamma) * l2h_scale; + } + if (k < comp) output[i*comp + k] = data[i*comp+k]/255.0f; + } + free(data); + return output; +} + +#define float2int(x) ((int) (x)) +static stbi_uc *hdr_to_ldr(float *data, int x, int y, int comp) +{ + int i,k,n; + stbi_uc *output = (stbi_uc *) malloc(x * y * comp); + if (output == NULL) { free(data); return epuc("outofmem", "Out of memory"); } + // compute number of non-alpha components + if (comp & 1) n = comp; else n = comp-1; + for (i=0; i < x*y; ++i) { + for (k=0; k < n; ++k) { + float z = (float) pow(data[i*comp+k]*h2l_scale_i, h2l_gamma_i) * 255 + 0.5f; + if (z < 0) z = 0; + if (z > 255) z = 255; + output[i*comp + k] = (uint8) float2int(z); + } + if (k < comp) { + float z = data[i*comp+k] * 255 + 0.5f; + if (z < 0) z = 0; + if (z > 255) z = 255; + output[i*comp + k] = (uint8) float2int(z); + } + } + free(data); + return output; +} +#endif + +////////////////////////////////////////////////////////////////////////////// +// +// "baseline" JPEG/JFIF decoder (not actually fully baseline implementation) +// +// simple implementation +// - channel subsampling of at most 2 in each dimension +// - doesn't support delayed output of y-dimension +// - simple interface (only one output format: 8-bit interleaved RGB) +// - doesn't try to recover corrupt jpegs +// - doesn't allow partial loading, loading multiple at once +// - still fast on x86 (copying globals into locals doesn't help x86) +// - allocates lots of intermediate memory (full size of all components) +// - non-interleaved case requires this anyway +// - allows good upsampling (see next) +// high-quality +// - upsampled channels are bilinearly interpolated, even across blocks +// - quality integer IDCT derived from IJG's 'slow' +// performance +// - fast huffman; reasonable integer IDCT +// - uses a lot of intermediate memory, could cache poorly +// - load http://nothings.org/remote/anemones.jpg 3 times on 2.8Ghz P4 +// stb_jpeg: 1.34 seconds (MSVC6, default release build) +// stb_jpeg: 1.06 seconds (MSVC6, processor = Pentium Pro) +// IJL11.dll: 1.08 seconds (compiled by intel) +// IJG 1998: 0.98 seconds (MSVC6, makefile provided by IJG) +// IJG 1998: 0.95 seconds (MSVC6, makefile + proc=PPro) + +// huffman decoding acceleration +#define FAST_BITS 9 // larger handles more cases; smaller stomps less cache + +typedef struct +{ + uint8 fast[1 << FAST_BITS]; + // weirdly, repacking this into AoS is a 10% speed loss, instead of a win + uint16 code[256]; + uint8 values[256]; + uint8 size[257]; + unsigned int maxcode[18]; + int delta[17]; // old 'firstsymbol' - old 'firstcode' +} huffman; + +typedef struct +{ + #ifdef STBI_SIMD + unsigned short dequant2[4][64]; + #endif + stbi s; + huffman huff_dc[4]; + huffman huff_ac[4]; + uint8 dequant[4][64]; + +// sizes for components, interleaved MCUs + int img_h_max, img_v_max; + int img_mcu_x, img_mcu_y; + int img_mcu_w, img_mcu_h; + +// definition of jpeg image component + struct + { + int id; + int h,v; + int tq; + int hd,ha; + int dc_pred; + + int x,y,w2,h2; + uint8 *data; + void *raw_data; + uint8 *linebuf; + } img_comp[4]; + + uint32 code_buffer; // jpeg entropy-coded buffer + int code_bits; // number of valid bits + unsigned char marker; // marker seen while filling entropy buffer + int nomore; // flag if we saw a marker so must stop + + int scan_n, order[4]; + int restart_interval, todo; +} jpeg; + +static int build_huffman(huffman *h, int *count) +{ + int i,j,k=0,code; + // build size list for each symbol (from JPEG spec) + for (i=0; i < 16; ++i) + for (j=0; j < count[i]; ++j) + h->size[k++] = (uint8) (i+1); + h->size[k] = 0; + + // compute actual symbols (from jpeg spec) + code = 0; + k = 0; + for(j=1; j <= 16; ++j) { + // compute delta to add to code to compute symbol id + h->delta[j] = k - code; + if (h->size[k] == j) { + while (h->size[k] == j) + h->code[k++] = (uint16) (code++); + if (code-1 >= (1 << j)) return e("bad code lengths","Corrupt JPEG"); + } + // compute largest code + 1 for this size, preshifted as needed later + h->maxcode[j] = code << (16-j); + code <<= 1; + } + h->maxcode[j] = 0xffffffff; + + // build non-spec acceleration table; 255 is flag for not-accelerated + memset(h->fast, 255, 1 << FAST_BITS); + for (i=0; i < k; ++i) { + int s = h->size[i]; + if (s <= FAST_BITS) { + int c = h->code[i] << (FAST_BITS-s); + int m = 1 << (FAST_BITS-s); + for (j=0; j < m; ++j) { + h->fast[c+j] = (uint8) i; + } + } + } + return 1; +} + +static void grow_buffer_unsafe(jpeg *j) +{ + do { + int b = j->nomore ? 0 : get8(&j->s); + if (b == 0xff) { + int c = get8(&j->s); + if (c != 0) { + j->marker = (unsigned char) c; + j->nomore = 1; + return; + } + } + j->code_buffer |= b << (24 - j->code_bits); + j->code_bits += 8; + } while (j->code_bits <= 24); +} + +// (1 << n) - 1 +static uint32 bmask[17]={0,1,3,7,15,31,63,127,255,511,1023,2047,4095,8191,16383,32767,65535}; + +// decode a jpeg huffman value from the bitstream +__forceinline static int decode(jpeg *j, huffman *h) +{ + unsigned int temp; + int c,k; + + if (j->code_bits < 16) grow_buffer_unsafe(j); + + // look at the top FAST_BITS and determine what symbol ID it is, + // if the code is <= FAST_BITS + c = (j->code_buffer >> (32 - FAST_BITS)) & ((1 << FAST_BITS)-1); + k = h->fast[c]; + if (k < 255) { + int s = h->size[k]; + if (s > j->code_bits) + return -1; + j->code_buffer <<= s; + j->code_bits -= s; + return h->values[k]; + } + + // naive test is to shift the code_buffer down so k bits are + // valid, then test against maxcode. To speed this up, we've + // preshifted maxcode left so that it has (16-k) 0s at the + // end; in other words, regardless of the number of bits, it + // wants to be compared against something shifted to have 16; + // that way we don't need to shift inside the loop. + temp = j->code_buffer >> 16; + for (k=FAST_BITS+1 ; ; ++k) + if (temp < h->maxcode[k]) + break; + if (k == 17) { + // error! code not found + j->code_bits -= 16; + return -1; + } + + if (k > j->code_bits) + return -1; + + // convert the huffman code to the symbol id + c = ((j->code_buffer >> (32 - k)) & bmask[k]) + h->delta[k]; + assert((((j->code_buffer) >> (32 - h->size[c])) & bmask[h->size[c]]) == h->code[c]); + + // convert the id to a symbol + j->code_bits -= k; + j->code_buffer <<= k; + return h->values[c]; +} + +// combined JPEG 'receive' and JPEG 'extend', since baseline +// always extends everything it receives. +__forceinline static int extend_receive(jpeg *j, int n) +{ + unsigned int m = 1 << (n-1); + unsigned int k; + if (j->code_bits < n) grow_buffer_unsafe(j); + + #if 1 + k = stbi_lrot(j->code_buffer, n); + j->code_buffer = k & ~bmask[n]; + k &= bmask[n]; + j->code_bits -= n; + #else + k = (j->code_buffer >> (32 - n)) & bmask[n]; + j->code_bits -= n; + j->code_buffer <<= n; + #endif + // the following test is probably a random branch that won't + // predict well. I tried to table accelerate it but failed. + // maybe it's compiling as a conditional move? + if (k < m) + return (-1 << n) + k + 1; + else + return k; +} + +// given a value that's at position X in the zigzag stream, +// where does it appear in the 8x8 matrix coded as row-major? +static uint8 dezigzag[64+15] = +{ + 0, 1, 8, 16, 9, 2, 3, 10, + 17, 24, 32, 25, 18, 11, 4, 5, + 12, 19, 26, 33, 40, 48, 41, 34, + 27, 20, 13, 6, 7, 14, 21, 28, + 35, 42, 49, 56, 57, 50, 43, 36, + 29, 22, 15, 23, 30, 37, 44, 51, + 58, 59, 52, 45, 38, 31, 39, 46, + 53, 60, 61, 54, 47, 55, 62, 63, + // let corrupt input sample past end + 63, 63, 63, 63, 63, 63, 63, 63, + 63, 63, 63, 63, 63, 63, 63 +}; + +// decode one 64-entry block-- +static int decode_block(jpeg *j, short data[64], huffman *hdc, huffman *hac, int b) +{ + int diff,dc,k; + int t = decode(j, hdc); + if (t < 0) return e("bad huffman code","Corrupt JPEG"); + + // 0 all the ac values now so we can do it 32-bits at a time + memset(data,0,64*sizeof(data[0])); + + diff = t ? extend_receive(j, t) : 0; + dc = j->img_comp[b].dc_pred + diff; + j->img_comp[b].dc_pred = dc; + data[0] = (short) dc; + + // decode AC components, see JPEG spec + k = 1; + do { + int r,s; + int rs = decode(j, hac); + if (rs < 0) return e("bad huffman code","Corrupt JPEG"); + s = rs & 15; + r = rs >> 4; + if (s == 0) { + if (rs != 0xf0) break; // end block + k += 16; + } else { + k += r; + // decode into unzigzag'd location + data[dezigzag[k++]] = (short) extend_receive(j,s); + } + } while (k < 64); + return 1; +} + +// take a -128..127 value and clamp it and convert to 0..255 +__forceinline static uint8 clamp(int x) +{ + // trick to use a single test to catch both cases + if ((unsigned int) x > 255) { + if (x < 0) return 0; + if (x > 255) return 255; + } + return (uint8) x; +} + +#define f2f(x) (int) (((x) * 4096 + 0.5)) +#define fsh(x) ((x) << 12) + +// derived from jidctint -- DCT_ISLOW +#define IDCT_1D(s0,s1,s2,s3,s4,s5,s6,s7) \ + int t0,t1,t2,t3,p1,p2,p3,p4,p5,x0,x1,x2,x3; \ + p2 = s2; \ + p3 = s6; \ + p1 = (p2+p3) * f2f(0.5411961f); \ + t2 = p1 + p3*f2f(-1.847759065f); \ + t3 = p1 + p2*f2f( 0.765366865f); \ + p2 = s0; \ + p3 = s4; \ + t0 = fsh(p2+p3); \ + t1 = fsh(p2-p3); \ + x0 = t0+t3; \ + x3 = t0-t3; \ + x1 = t1+t2; \ + x2 = t1-t2; \ + t0 = s7; \ + t1 = s5; \ + t2 = s3; \ + t3 = s1; \ + p3 = t0+t2; \ + p4 = t1+t3; \ + p1 = t0+t3; \ + p2 = t1+t2; \ + p5 = (p3+p4)*f2f( 1.175875602f); \ + t0 = t0*f2f( 0.298631336f); \ + t1 = t1*f2f( 2.053119869f); \ + t2 = t2*f2f( 3.072711026f); \ + t3 = t3*f2f( 1.501321110f); \ + p1 = p5 + p1*f2f(-0.899976223f); \ + p2 = p5 + p2*f2f(-2.562915447f); \ + p3 = p3*f2f(-1.961570560f); \ + p4 = p4*f2f(-0.390180644f); \ + t3 += p1+p4; \ + t2 += p2+p3; \ + t1 += p2+p4; \ + t0 += p1+p3; + +#ifdef STBI_SIMD +typedef unsigned short stbi_dequantize_t; +#else +typedef uint8 stbi_dequantize_t; +#endif + +// .344 seconds on 3*anemones.jpg +static void idct_block(uint8 *out, int out_stride, short data[64], stbi_dequantize_t *dequantize) +{ + int i,val[64],*v=val; + stbi_dequantize_t *dq = dequantize; + uint8 *o; + short *d = data; + + // columns + for (i=0; i < 8; ++i,++d,++dq, ++v) { + // if all zeroes, shortcut -- this avoids dequantizing 0s and IDCTing + if (d[ 8]==0 && d[16]==0 && d[24]==0 && d[32]==0 + && d[40]==0 && d[48]==0 && d[56]==0) { + // no shortcut 0 seconds + // (1|2|3|4|5|6|7)==0 0 seconds + // all separate -0.047 seconds + // 1 && 2|3 && 4|5 && 6|7: -0.047 seconds + int dcterm = d[0] * dq[0] << 2; + v[0] = v[8] = v[16] = v[24] = v[32] = v[40] = v[48] = v[56] = dcterm; + } else { + IDCT_1D(d[ 0]*dq[ 0],d[ 8]*dq[ 8],d[16]*dq[16],d[24]*dq[24], + d[32]*dq[32],d[40]*dq[40],d[48]*dq[48],d[56]*dq[56]) + // constants scaled things up by 1<<12; let's bring them back + // down, but keep 2 extra bits of precision + x0 += 512; x1 += 512; x2 += 512; x3 += 512; + v[ 0] = (x0+t3) >> 10; + v[56] = (x0-t3) >> 10; + v[ 8] = (x1+t2) >> 10; + v[48] = (x1-t2) >> 10; + v[16] = (x2+t1) >> 10; + v[40] = (x2-t1) >> 10; + v[24] = (x3+t0) >> 10; + v[32] = (x3-t0) >> 10; + } + } + + for (i=0, v=val, o=out; i < 8; ++i,v+=8,o+=out_stride) { + // no fast case since the first 1D IDCT spread components out + IDCT_1D(v[0],v[1],v[2],v[3],v[4],v[5],v[6],v[7]) + // constants scaled things up by 1<<12, plus we had 1<<2 from first + // loop, plus horizontal and vertical each scale by sqrt(8) so together + // we've got an extra 1<<3, so 1<<17 total we need to remove. + // so we want to round that, which means adding 0.5 * 1<<17, + // aka 65536. Also, we'll end up with -128 to 127 that we want + // to encode as 0..255 by adding 128, so we'll add that before the shift + x0 += 65536 + (128<<17); + x1 += 65536 + (128<<17); + x2 += 65536 + (128<<17); + x3 += 65536 + (128<<17); + // tried computing the shifts into temps, or'ing the temps to see + // if any were out of range, but that was slower + o[0] = clamp((x0+t3) >> 17); + o[7] = clamp((x0-t3) >> 17); + o[1] = clamp((x1+t2) >> 17); + o[6] = clamp((x1-t2) >> 17); + o[2] = clamp((x2+t1) >> 17); + o[5] = clamp((x2-t1) >> 17); + o[3] = clamp((x3+t0) >> 17); + o[4] = clamp((x3-t0) >> 17); + } +} + +#ifdef STBI_SIMD +static stbi_idct_8x8 stbi_idct_installed = idct_block; + +extern void stbi_install_idct(stbi_idct_8x8 func) +{ + stbi_idct_installed = func; +} +#endif + +#define MARKER_none 0xff +// if there's a pending marker from the entropy stream, return that +// otherwise, fetch from the stream and get a marker. if there's no +// marker, return 0xff, which is never a valid marker value +static uint8 get_marker(jpeg *j) +{ + uint8 x; + if (j->marker != MARKER_none) { x = j->marker; j->marker = MARKER_none; return x; } + x = get8u(&j->s); + if (x != 0xff) return MARKER_none; + while (x == 0xff) + x = get8u(&j->s); + return x; +} + +// in each scan, we'll have scan_n components, and the order +// of the components is specified by order[] +#define RESTART(x) ((x) >= 0xd0 && (x) <= 0xd7) + +// after a restart interval, reset the entropy decoder and +// the dc prediction +static void reset(jpeg *j) +{ + j->code_bits = 0; + j->code_buffer = 0; + j->nomore = 0; + j->img_comp[0].dc_pred = j->img_comp[1].dc_pred = j->img_comp[2].dc_pred = 0; + j->marker = MARKER_none; + j->todo = j->restart_interval ? j->restart_interval : 0x7fffffff; + // no more than 1<<31 MCUs if no restart_interal? that's plenty safe, + // since we don't even allow 1<<30 pixels +} + +static int parse_entropy_coded_data(jpeg *z) +{ + reset(z); + if (z->scan_n == 1) { + int i,j; + #ifdef STBI_SIMD + __declspec(align(16)) + #endif + short data[64]; + int n = z->order[0]; + // non-interleaved data, we just need to process one block at a time, + // in trivial scanline order + // number of blocks to do just depends on how many actual "pixels" this + // component has, independent of interleaved MCU blocking and such + int w = (z->img_comp[n].x+7) >> 3; + int h = (z->img_comp[n].y+7) >> 3; + for (j=0; j < h; ++j) { + for (i=0; i < w; ++i) { + if (!decode_block(z, data, z->huff_dc+z->img_comp[n].hd, z->huff_ac+z->img_comp[n].ha, n)) return 0; + #ifdef STBI_SIMD + stbi_idct_installed(z->img_comp[n].data+z->img_comp[n].w2*j*8+i*8, z->img_comp[n].w2, data, z->dequant2[z->img_comp[n].tq]); + #else + idct_block(z->img_comp[n].data+z->img_comp[n].w2*j*8+i*8, z->img_comp[n].w2, data, z->dequant[z->img_comp[n].tq]); + #endif + // every data block is an MCU, so countdown the restart interval + if (--z->todo <= 0) { + if (z->code_bits < 24) grow_buffer_unsafe(z); + // if it's NOT a restart, then just bail, so we get corrupt data + // rather than no data + if (!RESTART(z->marker)) return 1; + reset(z); + } + } + } + } else { // interleaved! + int i,j,k,x,y; + short data[64]; + for (j=0; j < z->img_mcu_y; ++j) { + for (i=0; i < z->img_mcu_x; ++i) { + // scan an interleaved mcu... process scan_n components in order + for (k=0; k < z->scan_n; ++k) { + int n = z->order[k]; + // scan out an mcu's worth of this component; that's just determined + // by the basic H and V specified for the component + for (y=0; y < z->img_comp[n].v; ++y) { + for (x=0; x < z->img_comp[n].h; ++x) { + int x2 = (i*z->img_comp[n].h + x)*8; + int y2 = (j*z->img_comp[n].v + y)*8; + if (!decode_block(z, data, z->huff_dc+z->img_comp[n].hd, z->huff_ac+z->img_comp[n].ha, n)) return 0; + #ifdef STBI_SIMD + stbi_idct_installed(z->img_comp[n].data+z->img_comp[n].w2*y2+x2, z->img_comp[n].w2, data, z->dequant2[z->img_comp[n].tq]); + #else + idct_block(z->img_comp[n].data+z->img_comp[n].w2*y2+x2, z->img_comp[n].w2, data, z->dequant[z->img_comp[n].tq]); + #endif + } + } + } + // after all interleaved components, that's an interleaved MCU, + // so now count down the restart interval + if (--z->todo <= 0) { + if (z->code_bits < 24) grow_buffer_unsafe(z); + // if it's NOT a restart, then just bail, so we get corrupt data + // rather than no data + if (!RESTART(z->marker)) return 1; + reset(z); + } + } + } + } + return 1; +} + +static int process_marker(jpeg *z, int m) +{ + int L; + switch (m) { + case MARKER_none: // no marker found + return e("expected marker","Corrupt JPEG"); + + case 0xC2: // SOF - progressive + return e("progressive jpeg","JPEG format not supported (progressive)"); + + case 0xDD: // DRI - specify restart interval + if (get16(&z->s) != 4) return e("bad DRI len","Corrupt JPEG"); + z->restart_interval = get16(&z->s); + return 1; + + case 0xDB: // DQT - define quantization table + L = get16(&z->s)-2; + while (L > 0) { + int q = get8(&z->s); + int p = q >> 4; + int t = q & 15,i; + if (p != 0) return e("bad DQT type","Corrupt JPEG"); + if (t > 3) return e("bad DQT table","Corrupt JPEG"); + for (i=0; i < 64; ++i) + z->dequant[t][dezigzag[i]] = get8u(&z->s); + #ifdef STBI_SIMD + for (i=0; i < 64; ++i) + z->dequant2[t][i] = z->dequant[t][i]; + #endif + L -= 65; + } + return L==0; + + case 0xC4: // DHT - define huffman table + L = get16(&z->s)-2; + while (L > 0) { + uint8 *v; + int sizes[16],i,m=0; + int q = get8(&z->s); + int tc = q >> 4; + int th = q & 15; + if (tc > 1 || th > 3) return e("bad DHT header","Corrupt JPEG"); + for (i=0; i < 16; ++i) { + sizes[i] = get8(&z->s); + m += sizes[i]; + } + L -= 17; + if (tc == 0) { + if (!build_huffman(z->huff_dc+th, sizes)) return 0; + v = z->huff_dc[th].values; + } else { + if (!build_huffman(z->huff_ac+th, sizes)) return 0; + v = z->huff_ac[th].values; + } + for (i=0; i < m; ++i) + v[i] = get8u(&z->s); + L -= m; + } + return L==0; + } + // check for comment block or APP blocks + if ((m >= 0xE0 && m <= 0xEF) || m == 0xFE) { + skip(&z->s, get16(&z->s)-2); + return 1; + } + return 0; +} + +// after we see SOS +static int process_scan_header(jpeg *z) +{ + int i; + int Ls = get16(&z->s); + z->scan_n = get8(&z->s); + if (z->scan_n < 1 || z->scan_n > 4 || z->scan_n > (int) z->s.img_n) return e("bad SOS component count","Corrupt JPEG"); + if (Ls != 6+2*z->scan_n) return e("bad SOS len","Corrupt JPEG"); + for (i=0; i < z->scan_n; ++i) { + int id = get8(&z->s), which; + int q = get8(&z->s); + for (which = 0; which < z->s.img_n; ++which) + if (z->img_comp[which].id == id) + break; + if (which == z->s.img_n) return 0; + z->img_comp[which].hd = q >> 4; if (z->img_comp[which].hd > 3) return e("bad DC huff","Corrupt JPEG"); + z->img_comp[which].ha = q & 15; if (z->img_comp[which].ha > 3) return e("bad AC huff","Corrupt JPEG"); + z->order[i] = which; + } + if (get8(&z->s) != 0) return e("bad SOS","Corrupt JPEG"); + get8(&z->s); // should be 63, but might be 0 + if (get8(&z->s) != 0) return e("bad SOS","Corrupt JPEG"); + + return 1; +} + +static int process_frame_header(jpeg *z, int scan) +{ + stbi *s = &z->s; + int Lf,p,i,q, h_max=1,v_max=1,c; + Lf = get16(s); if (Lf < 11) return e("bad SOF len","Corrupt JPEG"); // JPEG + p = get8(s); if (p != 8) return e("only 8-bit","JPEG format not supported: 8-bit only"); // JPEG baseline + s->img_y = get16(s); if (s->img_y == 0) return e("no header height", "JPEG format not supported: delayed height"); // Legal, but we don't handle it--but neither does IJG + s->img_x = get16(s); if (s->img_x == 0) return e("0 width","Corrupt JPEG"); // JPEG requires + c = get8(s); + if (c != 3 && c != 1) return e("bad component count","Corrupt JPEG"); // JFIF requires + s->img_n = c; + for (i=0; i < c; ++i) { + z->img_comp[i].data = NULL; + z->img_comp[i].linebuf = NULL; + } + + if (Lf != 8+3*s->img_n) return e("bad SOF len","Corrupt JPEG"); + + for (i=0; i < s->img_n; ++i) { + z->img_comp[i].id = get8(s); + if (z->img_comp[i].id != i+1) // JFIF requires + if (z->img_comp[i].id != i) // some version of jpegtran outputs non-JFIF-compliant files! + return e("bad component ID","Corrupt JPEG"); + q = get8(s); + z->img_comp[i].h = (q >> 4); if (!z->img_comp[i].h || z->img_comp[i].h > 4) return e("bad H","Corrupt JPEG"); + z->img_comp[i].v = q & 15; if (!z->img_comp[i].v || z->img_comp[i].v > 4) return e("bad V","Corrupt JPEG"); + z->img_comp[i].tq = get8(s); if (z->img_comp[i].tq > 3) return e("bad TQ","Corrupt JPEG"); + } + + if (scan != SCAN_load) return 1; + + if ((1 << 30) / s->img_x / s->img_n < s->img_y) return e("too large", "Image too large to decode"); + + for (i=0; i < s->img_n; ++i) { + if (z->img_comp[i].h > h_max) h_max = z->img_comp[i].h; + if (z->img_comp[i].v > v_max) v_max = z->img_comp[i].v; + } + + // compute interleaved mcu info + z->img_h_max = h_max; + z->img_v_max = v_max; + z->img_mcu_w = h_max * 8; + z->img_mcu_h = v_max * 8; + z->img_mcu_x = (s->img_x + z->img_mcu_w-1) / z->img_mcu_w; + z->img_mcu_y = (s->img_y + z->img_mcu_h-1) / z->img_mcu_h; + + for (i=0; i < s->img_n; ++i) { + // number of effective pixels (e.g. for non-interleaved MCU) + z->img_comp[i].x = (s->img_x * z->img_comp[i].h + h_max-1) / h_max; + z->img_comp[i].y = (s->img_y * z->img_comp[i].v + v_max-1) / v_max; + // to simplify generation, we'll allocate enough memory to decode + // the bogus oversized data from using interleaved MCUs and their + // big blocks (e.g. a 16x16 iMCU on an image of width 33); we won't + // discard the extra data until colorspace conversion + z->img_comp[i].w2 = z->img_mcu_x * z->img_comp[i].h * 8; + z->img_comp[i].h2 = z->img_mcu_y * z->img_comp[i].v * 8; + z->img_comp[i].raw_data = malloc(z->img_comp[i].w2 * z->img_comp[i].h2+15); + if (z->img_comp[i].raw_data == NULL) { + for(--i; i >= 0; --i) { + free(z->img_comp[i].raw_data); + z->img_comp[i].data = NULL; + } + return e("outofmem", "Out of memory"); + } + // align blocks for installable-idct using mmx/sse + z->img_comp[i].data = (uint8*) (((size_t) z->img_comp[i].raw_data + 15) & ~15); + z->img_comp[i].linebuf = NULL; + } + + return 1; +} + +// use comparisons since in some cases we handle more than one case (e.g. SOF) +#define DNL(x) ((x) == 0xdc) +#define SOI(x) ((x) == 0xd8) +#define EOI(x) ((x) == 0xd9) +#define SOF(x) ((x) == 0xc0 || (x) == 0xc1) +#define SOS(x) ((x) == 0xda) + +static int decode_jpeg_header(jpeg *z, int scan) +{ + int m; + z->marker = MARKER_none; // initialize cached marker to empty + m = get_marker(z); + if (!SOI(m)) return e("no SOI","Corrupt JPEG"); + if (scan == SCAN_type) return 1; + m = get_marker(z); + while (!SOF(m)) { + if (!process_marker(z,m)) return 0; + m = get_marker(z); + while (m == MARKER_none) { + // some files have extra padding after their blocks, so ok, we'll scan + if (at_eof(&z->s)) return e("no SOF", "Corrupt JPEG"); + m = get_marker(z); + } + } + if (!process_frame_header(z, scan)) return 0; + return 1; +} + +static int decode_jpeg_image(jpeg *j) +{ + int m; + j->restart_interval = 0; + if (!decode_jpeg_header(j, SCAN_load)) return 0; + m = get_marker(j); + while (!EOI(m)) { + if (SOS(m)) { + if (!process_scan_header(j)) return 0; + if (!parse_entropy_coded_data(j)) return 0; + if (j->marker == MARKER_none ) { + // handle 0s at the end of image data from IP Kamera 9060 + while (!at_eof(&j->s)) { + int x = get8(&j->s); + if (x == 255) { + j->marker = get8u(&j->s); + break; + } else if (x != 0) { + return 0; + } + } + // if we reach eof without hitting a marker, get_marker() below will fail and we'll eventually return 0 + } + } else { + if (!process_marker(j, m)) return 0; + } + m = get_marker(j); + } + return 1; +} + +// static jfif-centered resampling (across block boundaries) + +typedef uint8 *(*resample_row_func)(uint8 *out, uint8 *in0, uint8 *in1, + int w, int hs); + +#define div4(x) ((uint8) ((x) >> 2)) + +static uint8 *resample_row_1(uint8 *out, uint8 *in_near, uint8 *in_far, int w, int hs) +{ + STBI_NOTUSED(out); + STBI_NOTUSED(in_far); + STBI_NOTUSED(w); + STBI_NOTUSED(hs); + return in_near; +} + +static uint8* resample_row_v_2(uint8 *out, uint8 *in_near, uint8 *in_far, int w, int hs) +{ + // need to generate two samples vertically for every one in input + int i; + STBI_NOTUSED(hs); + for (i=0; i < w; ++i) + out[i] = div4(3*in_near[i] + in_far[i] + 2); + return out; +} + +static uint8* resample_row_h_2(uint8 *out, uint8 *in_near, uint8 *in_far, int w, int hs) +{ + // need to generate two samples horizontally for every one in input + int i; + uint8 *input = in_near; + + if (w == 1) { + // if only one sample, can't do any interpolation + out[0] = out[1] = input[0]; + return out; + } + + out[0] = input[0]; + out[1] = div4(input[0]*3 + input[1] + 2); + for (i=1; i < w-1; ++i) { + int n = 3*input[i]+2; + out[i*2+0] = div4(n+input[i-1]); + out[i*2+1] = div4(n+input[i+1]); + } + out[i*2+0] = div4(input[w-2]*3 + input[w-1] + 2); + out[i*2+1] = input[w-1]; + + STBI_NOTUSED(in_far); + STBI_NOTUSED(hs); + + return out; +} + +#define div16(x) ((uint8) ((x) >> 4)) + +static uint8 *resample_row_hv_2(uint8 *out, uint8 *in_near, uint8 *in_far, int w, int hs) +{ + // need to generate 2x2 samples for every one in input + int i,t0,t1; + if (w == 1) { + out[0] = out[1] = div4(3*in_near[0] + in_far[0] + 2); + return out; + } + + t1 = 3*in_near[0] + in_far[0]; + out[0] = div4(t1+2); + for (i=1; i < w; ++i) { + t0 = t1; + t1 = 3*in_near[i]+in_far[i]; + out[i*2-1] = div16(3*t0 + t1 + 8); + out[i*2 ] = div16(3*t1 + t0 + 8); + } + out[w*2-1] = div4(t1+2); + + STBI_NOTUSED(hs); + + return out; +} + +static uint8 *resample_row_generic(uint8 *out, uint8 *in_near, uint8 *in_far, int w, int hs) +{ + // resample with nearest-neighbor + int i,j; + in_far = in_far; + for (i=0; i < w; ++i) + for (j=0; j < hs; ++j) + out[i*hs+j] = in_near[i]; + return out; +} + +#define float2fixed(x) ((int) ((x) * 65536 + 0.5)) + +// 0.38 seconds on 3*anemones.jpg (0.25 with processor = Pro) +// VC6 without processor=Pro is generating multiple LEAs per multiply! +static void YCbCr_to_RGB_row(uint8 *out, const uint8 *y, const uint8 *pcb, const uint8 *pcr, int count, int step) +{ + int i; + for (i=0; i < count; ++i) { + int y_fixed = (y[i] << 16) + 32768; // rounding + int r,g,b; + int cr = pcr[i] - 128; + int cb = pcb[i] - 128; + r = y_fixed + cr*float2fixed(1.40200f); + g = y_fixed - cr*float2fixed(0.71414f) - cb*float2fixed(0.34414f); + b = y_fixed + cb*float2fixed(1.77200f); + r >>= 16; + g >>= 16; + b >>= 16; + if ((unsigned) r > 255) { if (r < 0) r = 0; else r = 255; } + if ((unsigned) g > 255) { if (g < 0) g = 0; else g = 255; } + if ((unsigned) b > 255) { if (b < 0) b = 0; else b = 255; } + out[0] = (uint8)r; + out[1] = (uint8)g; + out[2] = (uint8)b; + out[3] = 255; + out += step; + } +} + +#ifdef STBI_SIMD +static stbi_YCbCr_to_RGB_run stbi_YCbCr_installed = YCbCr_to_RGB_row; + +void stbi_install_YCbCr_to_RGB(stbi_YCbCr_to_RGB_run func) +{ + stbi_YCbCr_installed = func; +} +#endif + + +// clean up the temporary component buffers +static void cleanup_jpeg(jpeg *j) +{ + int i; + for (i=0; i < j->s.img_n; ++i) { + if (j->img_comp[i].data) { + free(j->img_comp[i].raw_data); + j->img_comp[i].data = NULL; + } + if (j->img_comp[i].linebuf) { + free(j->img_comp[i].linebuf); + j->img_comp[i].linebuf = NULL; + } + } +} + +typedef struct +{ + resample_row_func resample; + uint8 *line0,*line1; + int hs,vs; // expansion factor in each axis + int w_lores; // horizontal pixels pre-expansion + int ystep; // how far through vertical expansion we are + int ypos; // which pre-expansion row we're on +} stbi_resample; + +static uint8 *load_jpeg_image(jpeg *z, int *out_x, int *out_y, int *comp, int req_comp) +{ + int n, decode_n; + // validate req_comp + if (req_comp < 0 || req_comp > 4) return epuc("bad req_comp", "Internal error"); + z->s.img_n = 0; + + // load a jpeg image from whichever source + if (!decode_jpeg_image(z)) { cleanup_jpeg(z); return NULL; } + + // determine actual number of components to generate + n = req_comp ? req_comp : z->s.img_n; + + if (z->s.img_n == 3 && n < 3) + decode_n = 1; + else + decode_n = z->s.img_n; + + // resample and color-convert + { + int k; + uint i,j; + uint8 *output; + uint8 *coutput[4]; + + stbi_resample res_comp[4]; + + for (k=0; k < decode_n; ++k) { + stbi_resample *r = &res_comp[k]; + + // allocate line buffer big enough for upsampling off the edges + // with upsample factor of 4 + z->img_comp[k].linebuf = (uint8 *) malloc(z->s.img_x + 3); + if (!z->img_comp[k].linebuf) { cleanup_jpeg(z); return epuc("outofmem", "Out of memory"); } + + r->hs = z->img_h_max / z->img_comp[k].h; + r->vs = z->img_v_max / z->img_comp[k].v; + r->ystep = r->vs >> 1; + r->w_lores = (z->s.img_x + r->hs-1) / r->hs; + r->ypos = 0; + r->line0 = r->line1 = z->img_comp[k].data; + + if (r->hs == 1 && r->vs == 1) r->resample = resample_row_1; + else if (r->hs == 1 && r->vs == 2) r->resample = resample_row_v_2; + else if (r->hs == 2 && r->vs == 1) r->resample = resample_row_h_2; + else if (r->hs == 2 && r->vs == 2) r->resample = resample_row_hv_2; + else r->resample = resample_row_generic; + } + + // can't error after this so, this is safe + output = (uint8 *) malloc(n * z->s.img_x * z->s.img_y + 1); + if (!output) { cleanup_jpeg(z); return epuc("outofmem", "Out of memory"); } + + // now go ahead and resample + for (j=0; j < z->s.img_y; ++j) { + uint8 *out = output + n * z->s.img_x * j; + for (k=0; k < decode_n; ++k) { + stbi_resample *r = &res_comp[k]; + int y_bot = r->ystep >= (r->vs >> 1); + coutput[k] = r->resample(z->img_comp[k].linebuf, + y_bot ? r->line1 : r->line0, + y_bot ? r->line0 : r->line1, + r->w_lores, r->hs); + if (++r->ystep >= r->vs) { + r->ystep = 0; + r->line0 = r->line1; + if (++r->ypos < z->img_comp[k].y) + r->line1 += z->img_comp[k].w2; + } + } + if (n >= 3) { + uint8 *y = coutput[0]; + if (z->s.img_n == 3) { + #ifdef STBI_SIMD + stbi_YCbCr_installed(out, y, coutput[1], coutput[2], z->s.img_x, n); + #else + YCbCr_to_RGB_row(out, y, coutput[1], coutput[2], z->s.img_x, n); + #endif + } else + for (i=0; i < z->s.img_x; ++i) { + out[0] = out[1] = out[2] = y[i]; + out[3] = 255; // not used if n==3 + out += n; + } + } else { + uint8 *y = coutput[0]; + if (n == 1) + for (i=0; i < z->s.img_x; ++i) out[i] = y[i]; + else + for (i=0; i < z->s.img_x; ++i) *out++ = y[i], *out++ = 255; + } + } + cleanup_jpeg(z); + *out_x = z->s.img_x; + *out_y = z->s.img_y; + if (comp) *comp = z->s.img_n; // report original components, not output + return output; + } +} + +#ifndef STBI_NO_STDIO +unsigned char *stbi_jpeg_load_from_file(FILE *f, int *x, int *y, int *comp, int req_comp) +{ + jpeg j; + start_file(&j.s, f); + return load_jpeg_image(&j, x,y,comp,req_comp); +} + +unsigned char *stbi_jpeg_load(char const *filename, int *x, int *y, int *comp, int req_comp) +{ + unsigned char *data; + FILE *f = fopen(filename, "rb"); + if (!f) return NULL; + data = stbi_jpeg_load_from_file(f,x,y,comp,req_comp); + fclose(f); + return data; +} +#endif + +unsigned char *stbi_jpeg_load_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp) +{ + #ifdef STBI_SMALL_STACK + unsigned char *result; + jpeg *j = (jpeg *) malloc(sizeof(*j)); + start_mem(&j->s, buffer, len); + result = load_jpeg_image(j,x,y,comp,req_comp); + free(j); + return result; + #else + jpeg j; + start_mem(&j.s, buffer,len); + return load_jpeg_image(&j, x,y,comp,req_comp); + #endif +} + +static int stbi_jpeg_info_raw(jpeg *j, int *x, int *y, int *comp) +{ + if (!decode_jpeg_header(j, SCAN_header)) + return 0; + if (x) *x = j->s.img_x; + if (y) *y = j->s.img_y; + if (comp) *comp = j->s.img_n; + return 1; +} + +#ifndef STBI_NO_STDIO +int stbi_jpeg_test_file(FILE *f) +{ + int n,r; + jpeg j; + n = ftell(f); + start_file(&j.s, f); + r = decode_jpeg_header(&j, SCAN_type); + fseek(f,n,SEEK_SET); + return r; +} + +int stbi_jpeg_info_from_file(FILE *f, int *x, int *y, int *comp) +{ + jpeg j; + long n = ftell(f); + int res; + start_file(&j.s, f); + res = stbi_jpeg_info_raw(&j, x, y, comp); + fseek(f, n, SEEK_SET); + return res; +} + +int stbi_jpeg_info(char const *filename, int *x, int *y, int *comp) +{ + FILE *f = fopen(filename, "rb"); + int result; + if (!f) return e("can't fopen", "Unable to open file"); + result = stbi_jpeg_info_from_file(f, x, y, comp); + fclose(f); + return result; +} +#endif + +int stbi_jpeg_test_memory(stbi_uc const *buffer, int len) +{ + jpeg j; + start_mem(&j.s, buffer,len); + return decode_jpeg_header(&j, SCAN_type); +} + +int stbi_jpeg_info_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp) +{ + jpeg j; + start_mem(&j.s, buffer, len); + return stbi_jpeg_info_raw(&j, x, y, comp); +} + +#ifndef STBI_NO_STDIO +extern int stbi_jpeg_info (char const *filename, int *x, int *y, int *comp); +extern int stbi_jpeg_info_from_file (FILE *f, int *x, int *y, int *comp); +#endif +extern int stbi_jpeg_info_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp); + +// public domain zlib decode v0.2 Sean Barrett 2006-11-18 +// simple implementation +// - all input must be provided in an upfront buffer +// - all output is written to a single output buffer (can malloc/realloc) +// performance +// - fast huffman + +// fast-way is faster to check than jpeg huffman, but slow way is slower +#define ZFAST_BITS 9 // accelerate all cases in default tables +#define ZFAST_MASK ((1 << ZFAST_BITS) - 1) + +// zlib-style huffman encoding +// (jpegs packs from left, zlib from right, so can't share code) +typedef struct +{ + uint16 fast[1 << ZFAST_BITS]; + uint16 firstcode[16]; + int maxcode[17]; + uint16 firstsymbol[16]; + uint8 size[288]; + uint16 value[288]; +} zhuffman; + +__forceinline static int bitreverse16(int n) +{ + n = ((n & 0xAAAA) >> 1) | ((n & 0x5555) << 1); + n = ((n & 0xCCCC) >> 2) | ((n & 0x3333) << 2); + n = ((n & 0xF0F0) >> 4) | ((n & 0x0F0F) << 4); + n = ((n & 0xFF00) >> 8) | ((n & 0x00FF) << 8); + return n; +} + +__forceinline static int bit_reverse(int v, int bits) +{ + assert(bits <= 16); + // to bit reverse n bits, reverse 16 and shift + // e.g. 11 bits, bit reverse and shift away 5 + return bitreverse16(v) >> (16-bits); +} + +static int zbuild_huffman(zhuffman *z, uint8 *sizelist, int num) +{ + int i,k=0; + int code, next_code[16], sizes[17]; + + // DEFLATE spec for generating codes + memset(sizes, 0, sizeof(sizes)); + memset(z->fast, 255, sizeof(z->fast)); + for (i=0; i < num; ++i) + ++sizes[sizelist[i]]; + sizes[0] = 0; + for (i=1; i < 16; ++i) + assert(sizes[i] <= (1 << i)); + code = 0; + for (i=1; i < 16; ++i) { + next_code[i] = code; + z->firstcode[i] = (uint16) code; + z->firstsymbol[i] = (uint16) k; + code = (code + sizes[i]); + if (sizes[i]) + if (code-1 >= (1 << i)) return e("bad codelengths","Corrupt JPEG"); + z->maxcode[i] = code << (16-i); // preshift for inner loop + code <<= 1; + k += sizes[i]; + } + z->maxcode[16] = 0x10000; // sentinel + for (i=0; i < num; ++i) { + int s = sizelist[i]; + if (s) { + int c = next_code[s] - z->firstcode[s] + z->firstsymbol[s]; + z->size[c] = (uint8)s; + z->value[c] = (uint16)i; + if (s <= ZFAST_BITS) { + int k = bit_reverse(next_code[s],s); + while (k < (1 << ZFAST_BITS)) { + z->fast[k] = (uint16) c; + k += (1 << s); + } + } + ++next_code[s]; + } + } + return 1; +} + +// zlib-from-memory implementation for PNG reading +// because PNG allows splitting the zlib stream arbitrarily, +// and it's annoying structurally to have PNG call ZLIB call PNG, +// we require PNG read all the IDATs and combine them into a single +// memory buffer + +typedef struct +{ + uint8 *zbuffer, *zbuffer_end; + int num_bits; + uint32 code_buffer; + + char *zout; + char *zout_start; + char *zout_end; + int z_expandable; + + zhuffman z_length, z_distance; +} zbuf; + +__forceinline static int zget8(zbuf *z) +{ + if (z->zbuffer >= z->zbuffer_end) return 0; + return *z->zbuffer++; +} + +static void fill_bits(zbuf *z) +{ + do { + assert(z->code_buffer < (1U << z->num_bits)); + z->code_buffer |= zget8(z) << z->num_bits; + z->num_bits += 8; + } while (z->num_bits <= 24); +} + +__forceinline static unsigned int zreceive(zbuf *z, int n) +{ + unsigned int k; + if (z->num_bits < n) fill_bits(z); + k = z->code_buffer & ((1 << n) - 1); + z->code_buffer >>= n; + z->num_bits -= n; + return k; +} + +__forceinline static int zhuffman_decode(zbuf *a, zhuffman *z) +{ + int b,s,k; + if (a->num_bits < 16) fill_bits(a); + b = z->fast[a->code_buffer & ZFAST_MASK]; + if (b < 0xffff) { + s = z->size[b]; + a->code_buffer >>= s; + a->num_bits -= s; + return z->value[b]; + } + + // not resolved by fast table, so compute it the slow way + // use jpeg approach, which requires MSbits at top + k = bit_reverse(a->code_buffer, 16); + for (s=ZFAST_BITS+1; ; ++s) + if (k < z->maxcode[s]) + break; + if (s == 16) return -1; // invalid code! + // code size is s, so: + b = (k >> (16-s)) - z->firstcode[s] + z->firstsymbol[s]; + assert(z->size[b] == s); + a->code_buffer >>= s; + a->num_bits -= s; + return z->value[b]; +} + +static int expand(zbuf *z, int n) // need to make room for n bytes +{ + char *q; + int cur, limit; + if (!z->z_expandable) return e("output buffer limit","Corrupt PNG"); + cur = (int) (z->zout - z->zout_start); + limit = (int) (z->zout_end - z->zout_start); + while (cur + n > limit) + limit *= 2; + q = (char *) realloc(z->zout_start, limit); + if (q == NULL) return e("outofmem", "Out of memory"); + z->zout_start = q; + z->zout = q + cur; + z->zout_end = q + limit; + return 1; +} + +static int length_base[31] = { + 3,4,5,6,7,8,9,10,11,13, + 15,17,19,23,27,31,35,43,51,59, + 67,83,99,115,131,163,195,227,258,0,0 }; + +static int length_extra[31]= +{ 0,0,0,0,0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4,5,5,5,5,0,0,0 }; + +static int dist_base[32] = { 1,2,3,4,5,7,9,13,17,25,33,49,65,97,129,193, +257,385,513,769,1025,1537,2049,3073,4097,6145,8193,12289,16385,24577,0,0}; + +static int dist_extra[32] = +{ 0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13}; + +static int parse_huffman_block(zbuf *a) +{ + for(;;) { + int z = zhuffman_decode(a, &a->z_length); + if (z < 256) { + if (z < 0) return e("bad huffman code","Corrupt PNG"); // error in huffman codes + if (a->zout >= a->zout_end) if (!expand(a, 1)) return 0; + *a->zout++ = (char) z; + } else { + uint8 *p; + int len,dist; + if (z == 256) return 1; + z -= 257; + len = length_base[z]; + if (length_extra[z]) len += zreceive(a, length_extra[z]); + z = zhuffman_decode(a, &a->z_distance); + if (z < 0) return e("bad huffman code","Corrupt PNG"); + dist = dist_base[z]; + if (dist_extra[z]) dist += zreceive(a, dist_extra[z]); + if (a->zout - a->zout_start < dist) return e("bad dist","Corrupt PNG"); + if (a->zout + len > a->zout_end) if (!expand(a, len)) return 0; + p = (uint8 *) (a->zout - dist); + while (len--) + *a->zout++ = *p++; + } + } +} + +static int compute_huffman_codes(zbuf *a) +{ + static uint8 length_dezigzag[19] = { 16,17,18,0,8,7,9,6,10,5,11,4,12,3,13,2,14,1,15 }; + zhuffman z_codelength; + uint8 lencodes[286+32+137];//padding for maximum single op + uint8 codelength_sizes[19]; + int i,n; + + int hlit = zreceive(a,5) + 257; + int hdist = zreceive(a,5) + 1; + int hclen = zreceive(a,4) + 4; + + memset(codelength_sizes, 0, sizeof(codelength_sizes)); + for (i=0; i < hclen; ++i) { + int s = zreceive(a,3); + codelength_sizes[length_dezigzag[i]] = (uint8) s; + } + if (!zbuild_huffman(&z_codelength, codelength_sizes, 19)) return 0; + + n = 0; + while (n < hlit + hdist) { + int c = zhuffman_decode(a, &z_codelength); + assert(c >= 0 && c < 19); + if (c < 16) + lencodes[n++] = (uint8) c; + else if (c == 16) { + c = zreceive(a,2)+3; + memset(lencodes+n, lencodes[n-1], c); + n += c; + } else if (c == 17) { + c = zreceive(a,3)+3; + memset(lencodes+n, 0, c); + n += c; + } else { + assert(c == 18); + c = zreceive(a,7)+11; + memset(lencodes+n, 0, c); + n += c; + } + } + if (n != hlit+hdist) return e("bad codelengths","Corrupt PNG"); + if (!zbuild_huffman(&a->z_length, lencodes, hlit)) return 0; + if (!zbuild_huffman(&a->z_distance, lencodes+hlit, hdist)) return 0; + return 1; +} + +static int parse_uncompressed_block(zbuf *a) +{ + uint8 header[4]; + int len,nlen,k; + if (a->num_bits & 7) + zreceive(a, a->num_bits & 7); // discard + // drain the bit-packed data into header + k = 0; + while (a->num_bits > 0) { + header[k++] = (uint8) (a->code_buffer & 255); // wtf this warns? + a->code_buffer >>= 8; + a->num_bits -= 8; + } + assert(a->num_bits == 0); + // now fill header the normal way + while (k < 4) + header[k++] = (uint8) zget8(a); + len = header[1] * 256 + header[0]; + nlen = header[3] * 256 + header[2]; + if (nlen != (len ^ 0xffff)) return e("zlib corrupt","Corrupt PNG"); + if (a->zbuffer + len > a->zbuffer_end) return e("read past buffer","Corrupt PNG"); + if (a->zout + len > a->zout_end) + if (!expand(a, len)) return 0; + memcpy(a->zout, a->zbuffer, len); + a->zbuffer += len; + a->zout += len; + return 1; +} + +static int parse_zlib_header(zbuf *a) +{ + int cmf = zget8(a); + int cm = cmf & 15; + /* int cinfo = cmf >> 4; */ + int flg = zget8(a); + if ((cmf*256+flg) % 31 != 0) return e("bad zlib header","Corrupt PNG"); // zlib spec + if (flg & 32) return e("no preset dict","Corrupt PNG"); // preset dictionary not allowed in png + if (cm != 8) return e("bad compression","Corrupt PNG"); // DEFLATE required for png + // window = 1 << (8 + cinfo)... but who cares, we fully buffer output + return 1; +} + +// @TODO: should statically initialize these for optimal thread safety +static uint8 default_length[288], default_distance[32]; +static void init_defaults(void) +{ + int i; // use <= to match clearly with spec + for (i=0; i <= 143; ++i) default_length[i] = 8; + for ( ; i <= 255; ++i) default_length[i] = 9; + for ( ; i <= 279; ++i) default_length[i] = 7; + for ( ; i <= 287; ++i) default_length[i] = 8; + + for (i=0; i <= 31; ++i) default_distance[i] = 5; +} + +int stbi_png_partial; // a quick hack to only allow decoding some of a PNG... I should implement real streaming support instead +static int parse_zlib(zbuf *a, int parse_header) +{ + int final, type; + if (parse_header) + if (!parse_zlib_header(a)) return 0; + a->num_bits = 0; + a->code_buffer = 0; + do { + final = zreceive(a,1); + type = zreceive(a,2); + if (type == 0) { + if (!parse_uncompressed_block(a)) return 0; + } else if (type == 3) { + return 0; + } else { + if (type == 1) { + // use fixed code lengths + if (!default_distance[31]) init_defaults(); + if (!zbuild_huffman(&a->z_length , default_length , 288)) return 0; + if (!zbuild_huffman(&a->z_distance, default_distance, 32)) return 0; + } else { + if (!compute_huffman_codes(a)) return 0; + } + if (!parse_huffman_block(a)) return 0; + } + if (stbi_png_partial && a->zout - a->zout_start > 65536) + break; + } while (!final); + return 1; +} + +static int do_zlib(zbuf *a, char *obuf, int olen, int exp, int parse_header) +{ + a->zout_start = obuf; + a->zout = obuf; + a->zout_end = obuf + olen; + a->z_expandable = exp; + + return parse_zlib(a, parse_header); +} + +char *stbi_zlib_decode_malloc_guesssize(const char *buffer, int len, int initial_size, int *outlen) +{ + zbuf a; + char *p = (char *) malloc(initial_size); + if (p == NULL) return NULL; + a.zbuffer = (uint8 *) buffer; + a.zbuffer_end = (uint8 *) buffer + len; + if (do_zlib(&a, p, initial_size, 1, 1)) { + if (outlen) *outlen = (int) (a.zout - a.zout_start); + return a.zout_start; + } else { + free(a.zout_start); + return NULL; + } +} + +char *stbi_zlib_decode_malloc(char const *buffer, int len, int *outlen) +{ + return stbi_zlib_decode_malloc_guesssize(buffer, len, 16384, outlen); +} + +char *stbi_zlib_decode_malloc_guesssize_headerflag(const char *buffer, int len, int initial_size, int *outlen, int parse_header) +{ + zbuf a; + char *p = (char *) malloc(initial_size); + if (p == NULL) return NULL; + a.zbuffer = (uint8 *) buffer; + a.zbuffer_end = (uint8 *) buffer + len; + if (do_zlib(&a, p, initial_size, 1, parse_header)) { + if (outlen) *outlen = (int) (a.zout - a.zout_start); + return a.zout_start; + } else { + free(a.zout_start); + return NULL; + } +} + +int stbi_zlib_decode_buffer(char *obuffer, int olen, char const *ibuffer, int ilen) +{ + zbuf a; + a.zbuffer = (uint8 *) ibuffer; + a.zbuffer_end = (uint8 *) ibuffer + ilen; + if (do_zlib(&a, obuffer, olen, 0, 1)) + return (int) (a.zout - a.zout_start); + else + return -1; +} + +char *stbi_zlib_decode_noheader_malloc(char const *buffer, int len, int *outlen) +{ + zbuf a; + char *p = (char *) malloc(16384); + if (p == NULL) return NULL; + a.zbuffer = (uint8 *) buffer; + a.zbuffer_end = (uint8 *) buffer+len; + if (do_zlib(&a, p, 16384, 1, 0)) { + if (outlen) *outlen = (int) (a.zout - a.zout_start); + return a.zout_start; + } else { + free(a.zout_start); + return NULL; + } +} + +int stbi_zlib_decode_noheader_buffer(char *obuffer, int olen, const char *ibuffer, int ilen) +{ + zbuf a; + a.zbuffer = (uint8 *) ibuffer; + a.zbuffer_end = (uint8 *) ibuffer + ilen; + if (do_zlib(&a, obuffer, olen, 0, 0)) + return (int) (a.zout - a.zout_start); + else + return -1; +} + +// public domain "baseline" PNG decoder v0.10 Sean Barrett 2006-11-18 +// simple implementation +// - only 8-bit samples +// - no CRC checking +// - allocates lots of intermediate memory +// - avoids problem of streaming data between subsystems +// - avoids explicit window management +// performance +// - uses stb_zlib, a PD zlib implementation with fast huffman decoding + + +typedef struct +{ + uint32 length; + uint32 type; +} chunk; + +#define PNG_TYPE(a,b,c,d) (((a) << 24) + ((b) << 16) + ((c) << 8) + (d)) + +static chunk get_chunk_header(stbi *s) +{ + chunk c; + c.length = get32(s); + c.type = get32(s); + return c; +} + +static int check_png_header(stbi *s) +{ + static uint8 png_sig[8] = { 137,80,78,71,13,10,26,10 }; + int i; + for (i=0; i < 8; ++i) + if (get8(s) != png_sig[i]) return e("bad png sig","Not a PNG"); + return 1; +} + +typedef struct +{ + stbi s; + uint8 *idata, *expanded, *out; +} png; + + +enum { + F_none=0, F_sub=1, F_up=2, F_avg=3, F_paeth=4, + F_avg_first, F_paeth_first +}; + +static uint8 first_row_filter[5] = +{ + F_none, F_sub, F_none, F_avg_first, F_paeth_first +}; + +static int paeth(int a, int b, int c) +{ + int p = a + b - c; + int pa = abs(p-a); + int pb = abs(p-b); + int pc = abs(p-c); + if (pa <= pb && pa <= pc) return a; + if (pb <= pc) return b; + return c; +} + +// create the png data from post-deflated data +static int create_png_image_raw(png *a, uint8 *raw, uint32 raw_len, int out_n, uint32 x, uint32 y) +{ + stbi *s = &a->s; + uint32 i,j,stride = x*out_n; + int k; + int img_n = s->img_n; // copy it into a local for later + assert(out_n == s->img_n || out_n == s->img_n+1); + if (stbi_png_partial) y = 1; + a->out = (uint8 *) malloc(x * y * out_n); + if (!a->out) return e("outofmem", "Out of memory"); + if (!stbi_png_partial) { + if (s->img_x == x && s->img_y == y) { + if (raw_len != (img_n * x + 1) * y) return e("not enough pixels","Corrupt PNG"); + } else { // interlaced: + if (raw_len < (img_n * x + 1) * y) return e("not enough pixels","Corrupt PNG"); + } + } + for (j=0; j < y; ++j) { + uint8 *cur = a->out + stride*j; + uint8 *prior = cur - stride; + int filter = *raw++; + if (filter > 4) return e("invalid filter","Corrupt PNG"); + // if first row, use special filter that doesn't sample previous row + if (j == 0) filter = first_row_filter[filter]; + // handle first pixel explicitly + for (k=0; k < img_n; ++k) { + switch (filter) { + case F_none : cur[k] = raw[k]; break; + case F_sub : cur[k] = raw[k]; break; + case F_up : cur[k] = raw[k] + prior[k]; break; + case F_avg : cur[k] = raw[k] + (prior[k]>>1); break; + case F_paeth : cur[k] = (uint8) (raw[k] + paeth(0,prior[k],0)); break; + case F_avg_first : cur[k] = raw[k]; break; + case F_paeth_first: cur[k] = raw[k]; break; + } + } + if (img_n != out_n) cur[img_n] = 255; + raw += img_n; + cur += out_n; + prior += out_n; + // this is a little gross, so that we don't switch per-pixel or per-component + if (img_n == out_n) { + #define CASE(f) \ + case f: \ + for (i=x-1; i >= 1; --i, raw+=img_n,cur+=img_n,prior+=img_n) \ + for (k=0; k < img_n; ++k) + switch (filter) { + CASE(F_none) cur[k] = raw[k]; break; + CASE(F_sub) cur[k] = raw[k] + cur[k-img_n]; break; + CASE(F_up) cur[k] = raw[k] + prior[k]; break; + CASE(F_avg) cur[k] = raw[k] + ((prior[k] + cur[k-img_n])>>1); break; + CASE(F_paeth) cur[k] = (uint8) (raw[k] + paeth(cur[k-img_n],prior[k],prior[k-img_n])); break; + CASE(F_avg_first) cur[k] = raw[k] + (cur[k-img_n] >> 1); break; + CASE(F_paeth_first) cur[k] = (uint8) (raw[k] + paeth(cur[k-img_n],0,0)); break; + } + #undef CASE + } else { + assert(img_n+1 == out_n); + #define CASE(f) \ + case f: \ + for (i=x-1; i >= 1; --i, cur[img_n]=255,raw+=img_n,cur+=out_n,prior+=out_n) \ + for (k=0; k < img_n; ++k) + switch (filter) { + CASE(F_none) cur[k] = raw[k]; break; + CASE(F_sub) cur[k] = raw[k] + cur[k-out_n]; break; + CASE(F_up) cur[k] = raw[k] + prior[k]; break; + CASE(F_avg) cur[k] = raw[k] + ((prior[k] + cur[k-out_n])>>1); break; + CASE(F_paeth) cur[k] = (uint8) (raw[k] + paeth(cur[k-out_n],prior[k],prior[k-out_n])); break; + CASE(F_avg_first) cur[k] = raw[k] + (cur[k-out_n] >> 1); break; + CASE(F_paeth_first) cur[k] = (uint8) (raw[k] + paeth(cur[k-out_n],0,0)); break; + } + #undef CASE + } + } + return 1; +} + +static int create_png_image(png *a, uint8 *raw, uint32 raw_len, int out_n, int interlaced) +{ + uint8 *final; + int p; + int save; + if (!interlaced) + return create_png_image_raw(a, raw, raw_len, out_n, a->s.img_x, a->s.img_y); + save = stbi_png_partial; + stbi_png_partial = 0; + + // de-interlacing + final = (uint8 *) malloc(a->s.img_x * a->s.img_y * out_n); + for (p=0; p < 7; ++p) { + int xorig[] = { 0,4,0,2,0,1,0 }; + int yorig[] = { 0,0,4,0,2,0,1 }; + int xspc[] = { 8,8,4,4,2,2,1 }; + int yspc[] = { 8,8,8,4,4,2,2 }; + int i,j,x,y; + // pass1_x[4] = 0, pass1_x[5] = 1, pass1_x[12] = 1 + x = (a->s.img_x - xorig[p] + xspc[p]-1) / xspc[p]; + y = (a->s.img_y - yorig[p] + yspc[p]-1) / yspc[p]; + if (x && y) { + if (!create_png_image_raw(a, raw, raw_len, out_n, x, y)) { + free(final); + return 0; + } + for (j=0; j < y; ++j) + for (i=0; i < x; ++i) + memcpy(final + (j*yspc[p]+yorig[p])*a->s.img_x*out_n + (i*xspc[p]+xorig[p])*out_n, + a->out + (j*x+i)*out_n, out_n); + free(a->out); + raw += (x*out_n+1)*y; + raw_len -= (x*out_n+1)*y; + } + } + a->out = final; + + stbi_png_partial = save; + return 1; +} + +static int compute_transparency(png *z, uint8 tc[3], int out_n) +{ + stbi *s = &z->s; + uint32 i, pixel_count = s->img_x * s->img_y; + uint8 *p = z->out; + + // compute color-based transparency, assuming we've + // already got 255 as the alpha value in the output + assert(out_n == 2 || out_n == 4); + + if (out_n == 2) { + for (i=0; i < pixel_count; ++i) { + p[1] = (p[0] == tc[0] ? 0 : 255); + p += 2; + } + } else { + for (i=0; i < pixel_count; ++i) { + if (p[0] == tc[0] && p[1] == tc[1] && p[2] == tc[2]) + p[3] = 0; + p += 4; + } + } + return 1; +} + +static int expand_palette(png *a, uint8 *palette, int len, int pal_img_n) +{ + uint32 i, pixel_count = a->s.img_x * a->s.img_y; + uint8 *p, *temp_out, *orig = a->out; + + p = (uint8 *) malloc(pixel_count * pal_img_n); + if (p == NULL) return e("outofmem", "Out of memory"); + + // between here and free(out) below, exitting would leak + temp_out = p; + + if (pal_img_n == 3) { + for (i=0; i < pixel_count; ++i) { + int n = orig[i]*4; + p[0] = palette[n ]; + p[1] = palette[n+1]; + p[2] = palette[n+2]; + p += 3; + } + } else { + for (i=0; i < pixel_count; ++i) { + int n = orig[i]*4; + p[0] = palette[n ]; + p[1] = palette[n+1]; + p[2] = palette[n+2]; + p[3] = palette[n+3]; + p += 4; + } + } + free(a->out); + a->out = temp_out; + + STBI_NOTUSED(len); + + return 1; +} + +static int stbi_unpremultiply_on_load = 0; +static int stbi_de_iphone_flag = 0; + +void stbi_set_unpremultiply_on_load(int flag_true_if_should_unpremultiply) +{ + stbi_unpremultiply_on_load = flag_true_if_should_unpremultiply; +} +void stbi_convert_iphone_png_to_rgb(int flag_true_if_should_convert) +{ + stbi_de_iphone_flag = flag_true_if_should_convert; +} + +static void stbi_de_iphone(png *z) +{ + stbi *s = &z->s; + uint32 i, pixel_count = s->img_x * s->img_y; + uint8 *p = z->out; + + if (s->img_out_n == 3) { // convert bgr to rgb + for (i=0; i < pixel_count; ++i) { + uint8 t = p[0]; + p[0] = p[2]; + p[2] = t; + p += 3; + } + } else { + assert(s->img_out_n == 4); + if (stbi_unpremultiply_on_load) { + // convert bgr to rgb and unpremultiply + for (i=0; i < pixel_count; ++i) { + uint8 a = p[3]; + uint8 t = p[0]; + if (a) { + p[0] = p[2] * 255 / a; + p[1] = p[1] * 255 / a; + p[2] = t * 255 / a; + } else { + p[0] = p[2]; + p[2] = t; + } + p += 4; + } + } else { + // convert bgr to rgb + for (i=0; i < pixel_count; ++i) { + uint8 t = p[0]; + p[0] = p[2]; + p[2] = t; + p += 4; + } + } + } +} + +static int parse_png_file(png *z, int scan, int req_comp) +{ + uint8 palette[1024], pal_img_n=0; + uint8 has_trans=0, tc[3]; + uint32 ioff=0, idata_limit=0, i, pal_len=0; + int first=1,k,interlace=0, iphone=0; + stbi *s = &z->s; + + if (!check_png_header(s)) return 0; + + if (scan == SCAN_type) return 1; + + for (;;) { + chunk c = get_chunk_header(s); + switch (c.type) { + case PNG_TYPE('C','g','B','I'): + iphone = stbi_de_iphone_flag; + skip(s, c.length); + break; + case PNG_TYPE('I','H','D','R'): { + int depth,color,comp,filter; + if (!first) return e("multiple IHDR","Corrupt PNG"); + first = 0; + if (c.length != 13) return e("bad IHDR len","Corrupt PNG"); + s->img_x = get32(s); if (s->img_x > (1 << 24)) return e("too large","Very large image (corrupt?)"); + s->img_y = get32(s); if (s->img_y > (1 << 24)) return e("too large","Very large image (corrupt?)"); + depth = get8(s); if (depth != 8) return e("8bit only","PNG not supported: 8-bit only"); + color = get8(s); if (color > 6) return e("bad ctype","Corrupt PNG"); + if (color == 3) pal_img_n = 3; else if (color & 1) return e("bad ctype","Corrupt PNG"); + comp = get8(s); if (comp) return e("bad comp method","Corrupt PNG"); + filter= get8(s); if (filter) return e("bad filter method","Corrupt PNG"); + interlace = get8(s); if (interlace>1) return e("bad interlace method","Corrupt PNG"); + if (!s->img_x || !s->img_y) return e("0-pixel image","Corrupt PNG"); + if (!pal_img_n) { + s->img_n = (color & 2 ? 3 : 1) + (color & 4 ? 1 : 0); + if ((1 << 30) / s->img_x / s->img_n < s->img_y) return e("too large", "Image too large to decode"); + if (scan == SCAN_header) return 1; + } else { + // if paletted, then pal_n is our final components, and + // img_n is # components to decompress/filter. + s->img_n = 1; + if ((1 << 30) / s->img_x / 4 < s->img_y) return e("too large","Corrupt PNG"); + // if SCAN_header, have to scan to see if we have a tRNS + } + break; + } + + case PNG_TYPE('P','L','T','E'): { + if (first) return e("first not IHDR", "Corrupt PNG"); + if (c.length > 256*3) return e("invalid PLTE","Corrupt PNG"); + pal_len = c.length / 3; + if (pal_len * 3 != c.length) return e("invalid PLTE","Corrupt PNG"); + for (i=0; i < pal_len; ++i) { + palette[i*4+0] = get8u(s); + palette[i*4+1] = get8u(s); + palette[i*4+2] = get8u(s); + palette[i*4+3] = 255; + } + break; + } + + case PNG_TYPE('t','R','N','S'): { + if (first) return e("first not IHDR", "Corrupt PNG"); + if (z->idata) return e("tRNS after IDAT","Corrupt PNG"); + if (pal_img_n) { + if (scan == SCAN_header) { s->img_n = 4; return 1; } + if (pal_len == 0) return e("tRNS before PLTE","Corrupt PNG"); + if (c.length > pal_len) return e("bad tRNS len","Corrupt PNG"); + pal_img_n = 4; + for (i=0; i < c.length; ++i) + palette[i*4+3] = get8u(s); + } else { + if (!(s->img_n & 1)) return e("tRNS with alpha","Corrupt PNG"); + if (c.length != (uint32) s->img_n*2) return e("bad tRNS len","Corrupt PNG"); + has_trans = 1; + for (k=0; k < s->img_n; ++k) + tc[k] = (uint8) get16(s); // non 8-bit images will be larger + } + break; + } + + case PNG_TYPE('I','D','A','T'): { + if (first) return e("first not IHDR", "Corrupt PNG"); + if (pal_img_n && !pal_len) return e("no PLTE","Corrupt PNG"); + if (scan == SCAN_header) { s->img_n = pal_img_n; return 1; } + if (ioff + c.length > idata_limit) { + uint8 *p; + if (idata_limit == 0) idata_limit = c.length > 4096 ? c.length : 4096; + while (ioff + c.length > idata_limit) + idata_limit *= 2; + p = (uint8 *) realloc(z->idata, idata_limit); if (p == NULL) return e("outofmem", "Out of memory"); + z->idata = p; + } + if (!getn(s, z->idata+ioff,c.length)) return e("outofdata","Corrupt PNG"); + ioff += c.length; + break; + } + + case PNG_TYPE('I','E','N','D'): { + uint32 raw_len; + if (first) return e("first not IHDR", "Corrupt PNG"); + if (scan != SCAN_load) return 1; + if (z->idata == NULL) return e("no IDAT","Corrupt PNG"); + z->expanded = (uint8 *) stbi_zlib_decode_malloc_guesssize_headerflag((char *) z->idata, ioff, 16384, (int *) &raw_len, !iphone); + if (z->expanded == NULL) return 0; // zlib should set error + free(z->idata); z->idata = NULL; + if ((req_comp == s->img_n+1 && req_comp != 3 && !pal_img_n) || has_trans) + s->img_out_n = s->img_n+1; + else + s->img_out_n = s->img_n; + if (!create_png_image(z, z->expanded, raw_len, s->img_out_n, interlace)) return 0; + if (has_trans) + if (!compute_transparency(z, tc, s->img_out_n)) return 0; + if (iphone && s->img_out_n > 2) + stbi_de_iphone(z); + if (pal_img_n) { + // pal_img_n == 3 or 4 + s->img_n = pal_img_n; // record the actual colors we had + s->img_out_n = pal_img_n; + if (req_comp >= 3) s->img_out_n = req_comp; + if (!expand_palette(z, palette, pal_len, s->img_out_n)) + return 0; + } + free(z->expanded); z->expanded = NULL; + return 1; + } + + default: + // if critical, fail + if (first) return e("first not IHDR", "Corrupt PNG"); + if ((c.type & (1 << 29)) == 0) { + #ifndef STBI_NO_FAILURE_STRINGS + // not threadsafe + static char invalid_chunk[] = "XXXX chunk not known"; + invalid_chunk[0] = (uint8) (c.type >> 24); + invalid_chunk[1] = (uint8) (c.type >> 16); + invalid_chunk[2] = (uint8) (c.type >> 8); + invalid_chunk[3] = (uint8) (c.type >> 0); + #endif + return e(invalid_chunk, "PNG not supported: unknown chunk type"); + } + skip(s, c.length); + break; + } + // end of chunk, read and skip CRC + get32(s); + } +} + +static unsigned char *do_png(png *p, int *x, int *y, int *n, int req_comp) +{ + unsigned char *result=NULL; + p->expanded = NULL; + p->idata = NULL; + p->out = NULL; + if (req_comp < 0 || req_comp > 4) return epuc("bad req_comp", "Internal error"); + if (parse_png_file(p, SCAN_load, req_comp)) { + result = p->out; + p->out = NULL; + if (req_comp && req_comp != p->s.img_out_n) { + result = convert_format(result, p->s.img_out_n, req_comp, p->s.img_x, p->s.img_y); + p->s.img_out_n = req_comp; + if (result == NULL) return result; + } + *x = p->s.img_x; + *y = p->s.img_y; + if (n) *n = p->s.img_n; + } + free(p->out); p->out = NULL; + free(p->expanded); p->expanded = NULL; + free(p->idata); p->idata = NULL; + + return result; +} + +#ifndef STBI_NO_STDIO +unsigned char *stbi_png_load_from_file(FILE *f, int *x, int *y, int *comp, int req_comp) +{ + png p; + start_file(&p.s, f); + return do_png(&p, x,y,comp,req_comp); +} + +unsigned char *stbi_png_load(char const *filename, int *x, int *y, int *comp, int req_comp) +{ + unsigned char *data; + FILE *f = fopen(filename, "rb"); + if (!f) return NULL; + data = stbi_png_load_from_file(f,x,y,comp,req_comp); + fclose(f); + return data; +} +#endif + +unsigned char *stbi_png_load_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp) +{ + png p; + start_mem(&p.s, buffer,len); + return do_png(&p, x,y,comp,req_comp); +} + +#ifndef STBI_NO_STDIO +int stbi_png_test_file(FILE *f) +{ + png p; + int n,r; + n = ftell(f); + start_file(&p.s, f); + r = parse_png_file(&p, SCAN_type,STBI_default); + fseek(f,n,SEEK_SET); + return r; +} +#endif + +int stbi_png_test_memory(stbi_uc const *buffer, int len) +{ + png p; + start_mem(&p.s, buffer, len); + return parse_png_file(&p, SCAN_type,STBI_default); +} + +static int stbi_png_info_raw(png *p, int *x, int *y, int *comp) +{ + if (!parse_png_file(p, SCAN_header, 0)) + return 0; + if (x) *x = p->s.img_x; + if (y) *y = p->s.img_y; + if (comp) *comp = p->s.img_n; + return 1; +} + +#ifndef STBI_NO_STDIO +int stbi_png_info (char const *filename, int *x, int *y, int *comp) +{ + int res; + FILE *f = fopen(filename, "rb"); + if (!f) return 0; + res = stbi_png_info_from_file(f, x, y, comp); + fclose(f); + return res; +} + +int stbi_png_info_from_file(FILE *f, int *x, int *y, int *comp) +{ + png p; + int res; + long n = ftell(f); + start_file(&p.s, f); + res = stbi_png_info_raw(&p, x, y, comp); + fseek(f, n, SEEK_SET); + return res; +} +#endif // !STBI_NO_STDIO + +int stbi_png_info_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp) +{ + png p; + start_mem(&p.s, buffer, len); + return stbi_png_info_raw(&p, x, y, comp); +} + +// Microsoft/Windows BMP image + +static int bmp_test(stbi *s) +{ + int sz; + if (get8(s) != 'B') return 0; + if (get8(s) != 'M') return 0; + get32le(s); // discard filesize + get16le(s); // discard reserved + get16le(s); // discard reserved + get32le(s); // discard data offset + sz = get32le(s); + if (sz == 12 || sz == 40 || sz == 56 || sz == 108) return 1; + return 0; +} + +#ifndef STBI_NO_STDIO +int stbi_bmp_test_file (FILE *f) +{ + stbi s; + int r,n = ftell(f); + start_file(&s,f); + r = bmp_test(&s); + fseek(f,n,SEEK_SET); + return r; +} +#endif + +int stbi_bmp_test_memory (stbi_uc const *buffer, int len) +{ + stbi s; + start_mem(&s, buffer, len); + return bmp_test(&s); +} + +// returns 0..31 for the highest set bit +static int high_bit(unsigned int z) +{ + int n=0; + if (z == 0) return -1; + if (z >= 0x10000) n += 16, z >>= 16; + if (z >= 0x00100) n += 8, z >>= 8; + if (z >= 0x00010) n += 4, z >>= 4; + if (z >= 0x00004) n += 2, z >>= 2; + if (z >= 0x00002) n += 1, z >>= 1; + return n; +} + +static int bitcount(unsigned int a) +{ + a = (a & 0x55555555) + ((a >> 1) & 0x55555555); // max 2 + a = (a & 0x33333333) + ((a >> 2) & 0x33333333); // max 4 + a = (a + (a >> 4)) & 0x0f0f0f0f; // max 8 per 4, now 8 bits + a = (a + (a >> 8)); // max 16 per 8 bits + a = (a + (a >> 16)); // max 32 per 8 bits + return a & 0xff; +} + +static int shiftsigned(int v, int shift, int bits) +{ + int result; + int z=0; + + if (shift < 0) v <<= -shift; + else v >>= shift; + result = v; + + z = bits; + while (z < 8) { + result += v >> z; + z += bits; + } + return result; +} + +static stbi_uc *bmp_load(stbi *s, int *x, int *y, int *comp, int req_comp) +{ + uint8 *out; + unsigned int mr=0,mg=0,mb=0,ma=0, fake_a=0; + stbi_uc pal[256][4]; + int psize=0,i,j,compress=0,width; + int bpp, flip_vertically, pad, target, offset, hsz; + if (get8(s) != 'B' || get8(s) != 'M') return epuc("not BMP", "Corrupt BMP"); + get32le(s); // discard filesize + get16le(s); // discard reserved + get16le(s); // discard reserved + offset = get32le(s); + hsz = get32le(s); + if (hsz != 12 && hsz != 40 && hsz != 56 && hsz != 108) return epuc("unknown BMP", "BMP type not supported: unknown"); + if (hsz == 12) { + s->img_x = get16le(s); + s->img_y = get16le(s); + } else { + s->img_x = get32le(s); + s->img_y = get32le(s); + } + if (get16le(s) != 1) return epuc("bad BMP", "bad BMP"); + bpp = get16le(s); + if (bpp == 1) return epuc("monochrome", "BMP type not supported: 1-bit"); + flip_vertically = ((int) s->img_y) > 0; + s->img_y = abs((int) s->img_y); + if (hsz == 12) { + if (bpp < 24) + psize = (offset - 14 - 24) / 3; + } else { + compress = get32le(s); + if (compress == 1 || compress == 2) return epuc("BMP RLE", "BMP type not supported: RLE"); + get32le(s); // discard sizeof + get32le(s); // discard hres + get32le(s); // discard vres + get32le(s); // discard colorsused + get32le(s); // discard max important + if (hsz == 40 || hsz == 56) { + if (hsz == 56) { + get32le(s); + get32le(s); + get32le(s); + get32le(s); + } + if (bpp == 16 || bpp == 32) { + mr = mg = mb = 0; + if (compress == 0) { + if (bpp == 32) { + mr = 0xffu << 16; + mg = 0xffu << 8; + mb = 0xffu << 0; + ma = 0xffu << 24; + fake_a = 1; // @TODO: check for cases like alpha value is all 0 and switch it to 255 + } else { + mr = 31u << 10; + mg = 31u << 5; + mb = 31u << 0; + } + } else if (compress == 3) { + mr = get32le(s); + mg = get32le(s); + mb = get32le(s); + // not documented, but generated by photoshop and handled by mspaint + if (mr == mg && mg == mb) { + // ?!?!? + return epuc("bad BMP", "bad BMP"); + } + } else + return epuc("bad BMP", "bad BMP"); + } + } else { + assert(hsz == 108); + mr = get32le(s); + mg = get32le(s); + mb = get32le(s); + ma = get32le(s); + get32le(s); // discard color space + for (i=0; i < 12; ++i) + get32le(s); // discard color space parameters + } + if (bpp < 16) + psize = (offset - 14 - hsz) >> 2; + } + s->img_n = ma ? 4 : 3; + if (req_comp && req_comp >= 3) // we can directly decode 3 or 4 + target = req_comp; + else + target = s->img_n; // if they want monochrome, we'll post-convert + out = (stbi_uc *) malloc(target * s->img_x * s->img_y); + if (!out) return epuc("outofmem", "Out of memory"); + if (bpp < 16) { + int z=0; + if (psize == 0 || psize > 256) { free(out); return epuc("invalid", "Corrupt BMP"); } + for (i=0; i < psize; ++i) { + pal[i][2] = get8u(s); + pal[i][1] = get8u(s); + pal[i][0] = get8u(s); + if (hsz != 12) get8(s); + pal[i][3] = 255; + } + skip(s, offset - 14 - hsz - psize * (hsz == 12 ? 3 : 4)); + if (bpp == 4) width = (s->img_x + 1) >> 1; + else if (bpp == 8) width = s->img_x; + else { free(out); return epuc("bad bpp", "Corrupt BMP"); } + pad = (-width)&3; + for (j=0; j < (int) s->img_y; ++j) { + for (i=0; i < (int) s->img_x; i += 2) { + int v=get8(s),v2=0; + if (bpp == 4) { + v2 = v & 15; + v >>= 4; + } + out[z++] = pal[v][0]; + out[z++] = pal[v][1]; + out[z++] = pal[v][2]; + if (target == 4) out[z++] = 255; + if (i+1 == (int) s->img_x) break; + v = (bpp == 8) ? get8(s) : v2; + out[z++] = pal[v][0]; + out[z++] = pal[v][1]; + out[z++] = pal[v][2]; + if (target == 4) out[z++] = 255; + } + skip(s, pad); + } + } else { + int rshift=0,gshift=0,bshift=0,ashift=0,rcount=0,gcount=0,bcount=0,acount=0; + int z = 0; + int easy=0; + skip(s, offset - 14 - hsz); + if (bpp == 24) width = 3 * s->img_x; + else if (bpp == 16) width = 2*s->img_x; + else /* bpp = 32 and pad = 0 */ width=0; + pad = (-width) & 3; + if (bpp == 24) { + easy = 1; + } else if (bpp == 32) { + if (mb == 0xff && mg == 0xff00 && mr == 0xff000000 && ma == 0xff000000) + easy = 2; + } + if (!easy) { + if (!mr || !mg || !mb) return epuc("bad masks", "Corrupt BMP"); + // right shift amt to put high bit in position #7 + rshift = high_bit(mr)-7; rcount = bitcount(mr); + gshift = high_bit(mg)-7; gcount = bitcount(mr); + bshift = high_bit(mb)-7; bcount = bitcount(mr); + ashift = high_bit(ma)-7; acount = bitcount(mr); + } + for (j=0; j < (int) s->img_y; ++j) { + if (easy) { + for (i=0; i < (int) s->img_x; ++i) { + int a; + out[z+2] = get8u(s); + out[z+1] = get8u(s); + out[z+0] = get8u(s); + z += 3; + a = (easy == 2 ? get8(s) : 255); + if (target == 4) out[z++] = (uint8) a; + } + } else { + for (i=0; i < (int) s->img_x; ++i) { + uint32 v = (bpp == 16 ? get16le(s) : get32le(s)); + int a; + out[z++] = (uint8) shiftsigned(v & mr, rshift, rcount); + out[z++] = (uint8) shiftsigned(v & mg, gshift, gcount); + out[z++] = (uint8) shiftsigned(v & mb, bshift, bcount); + a = (ma ? shiftsigned(v & ma, ashift, acount) : 255); + if (target == 4) out[z++] = (uint8) a; + } + } + skip(s, pad); + } + } + if (flip_vertically) { + stbi_uc t; + for (j=0; j < (int) s->img_y>>1; ++j) { + stbi_uc *p1 = out + j *s->img_x*target; + stbi_uc *p2 = out + (s->img_y-1-j)*s->img_x*target; + for (i=0; i < (int) s->img_x*target; ++i) { + t = p1[i], p1[i] = p2[i], p2[i] = t; + } + } + } + + if (req_comp && req_comp != target) { + out = convert_format(out, target, req_comp, s->img_x, s->img_y); + if (out == NULL) return out; // convert_format frees input on failure + } + + *x = s->img_x; + *y = s->img_y; + if (comp) *comp = target; + return out; +} + +#ifndef STBI_NO_STDIO +stbi_uc *stbi_bmp_load (char const *filename, int *x, int *y, int *comp, int req_comp) +{ + stbi_uc *data; + FILE *f = fopen(filename, "rb"); + if (!f) return NULL; + data = stbi_bmp_load_from_file(f, x,y,comp,req_comp); + fclose(f); + return data; +} + +stbi_uc *stbi_bmp_load_from_file (FILE *f, int *x, int *y, int *comp, int req_comp) +{ + stbi s; + start_file(&s, f); + return bmp_load(&s, x,y,comp,req_comp); +} +#endif + +stbi_uc *stbi_bmp_load_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp) +{ + stbi s; + start_mem(&s, buffer, len); + return bmp_load(&s, x,y,comp,req_comp); +} + +// Targa Truevision - TGA +// by Jonathan Dummer + +static int tga_info(stbi *s, int *x, int *y, int *comp) +{ + int tga_w, tga_h, tga_comp; + int sz; + get8u(s); // discard Offset + sz = get8u(s); // color type + if( sz > 1 ) return 0; // only RGB or indexed allowed + sz = get8u(s); // image type + // only RGB or grey allowed, +/- RLE + if ((sz != 1) && (sz != 2) && (sz != 3) && (sz != 9) && (sz != 10) && (sz != 11)) return 0; + get16le(s); // discard palette start + get16le(s); // discard palette length + get8(s); // discard bits per palette color entry + get16le(s); // discard x origin + get16le(s); // discard y origin + tga_w = get16le(s); + if( tga_w < 1 ) return 0; // test width + tga_h = get16le(s); + if( tga_h < 1 ) return 0; // test height + sz = get8(s); // bits per pixel + // only RGB or RGBA or grey allowed + if ((sz != 8) && (sz != 16) && (sz != 24) && (sz != 32)) return 0; + tga_comp = sz; + if (x) *x = tga_w; + if (y) *y = tga_h; + if (comp) *comp = tga_comp / 8; + return 1; // seems to have passed everything +} + +#ifndef STBI_NO_STDIO +int stbi_tga_info_from_file(FILE *f, int *x, int *y, int *comp) +{ + stbi s; + int r; + long n = ftell(f); + start_file(&s, f); + r = tga_info(&s, x, y, comp); + fseek(f, n, SEEK_SET); + return r; +} +#endif + +int stbi_tga_info_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp) +{ + stbi s; + start_mem(&s, buffer, len); + return tga_info(&s, x, y, comp); +} + +static int tga_test(stbi *s) +{ + int sz; + get8u(s); // discard Offset + sz = get8u(s); // color type + if ( sz > 1 ) return 0; // only RGB or indexed allowed + sz = get8u(s); // image type + if ( (sz != 1) && (sz != 2) && (sz != 3) && (sz != 9) && (sz != 10) && (sz != 11) ) return 0; // only RGB or grey allowed, +/- RLE + get16(s); // discard palette start + get16(s); // discard palette length + get8(s); // discard bits per palette color entry + get16(s); // discard x origin + get16(s); // discard y origin + if ( get16(s) < 1 ) return 0; // test width + if ( get16(s) < 1 ) return 0; // test height + sz = get8(s); // bits per pixel + if ( (sz != 8) && (sz != 16) && (sz != 24) && (sz != 32) ) return 0; // only RGB or RGBA or grey allowed + return 1; // seems to have passed everything +} + +#ifndef STBI_NO_STDIO +int stbi_tga_test_file (FILE *f) +{ + stbi s; + int r,n = ftell(f); + start_file(&s, f); + r = tga_test(&s); + fseek(f,n,SEEK_SET); + return r; +} +#endif + +int stbi_tga_test_memory (stbi_uc const *buffer, int len) +{ + stbi s; + start_mem(&s, buffer, len); + return tga_test(&s); +} + +static stbi_uc *tga_load(stbi *s, int *x, int *y, int *comp, int req_comp) +{ + // read in the TGA header stuff + int tga_offset = get8u(s); + int tga_indexed = get8u(s); + int tga_image_type = get8u(s); + int tga_is_RLE = 0; + int tga_palette_start = get16le(s); + int tga_palette_len = get16le(s); + int tga_palette_bits = get8u(s); + int tga_x_origin = get16le(s); + int tga_y_origin = get16le(s); + int tga_width = get16le(s); + int tga_height = get16le(s); + int tga_bits_per_pixel = get8u(s); + int tga_inverted = get8u(s); + // image data + unsigned char *tga_data; + unsigned char *tga_palette = NULL; + int i, j; + unsigned char raw_data[4]; + unsigned char trans_data[4]; + int RLE_count = 0; + int RLE_repeating = 0; + int read_next_pixel = 1; + + // do a tiny bit of precessing + if ( tga_image_type >= 8 ) + { + tga_image_type -= 8; + tga_is_RLE = 1; + } + /* int tga_alpha_bits = tga_inverted & 15; */ + tga_inverted = 1 - ((tga_inverted >> 5) & 1); + + // error check + if ( //(tga_indexed) || + (tga_width < 1) || (tga_height < 1) || + (tga_image_type < 1) || (tga_image_type > 3) || + ((tga_bits_per_pixel != 8) && (tga_bits_per_pixel != 16) && + (tga_bits_per_pixel != 24) && (tga_bits_per_pixel != 32)) + ) + { + return NULL; + } + + // If I'm paletted, then I'll use the number of bits from the palette + if ( tga_indexed ) + { + tga_bits_per_pixel = tga_palette_bits; + } + + // tga info + *x = tga_width; + *y = tga_height; + if ( (req_comp < 1) || (req_comp > 4) ) + { + // just use whatever the file was + req_comp = tga_bits_per_pixel / 8; + *comp = req_comp; + } else + { + // force a new number of components + *comp = tga_bits_per_pixel/8; + } + tga_data = (unsigned char*)malloc( tga_width * tga_height * req_comp ); + + // skip to the data's starting position (offset usually = 0) + skip(s, tga_offset ); + // do I need to load a palette? + if ( tga_indexed ) + { + // any data to skip? (offset usually = 0) + skip(s, tga_palette_start ); + // load the palette + tga_palette = (unsigned char*)malloc( tga_palette_len * tga_palette_bits / 8 ); + if (!getn(s, tga_palette, tga_palette_len * tga_palette_bits / 8 )) + return NULL; + } + // load the data + trans_data[0] = trans_data[1] = trans_data[2] = trans_data[3] = 0; + for (i=0; i < tga_width * tga_height; ++i) + { + // if I'm in RLE mode, do I need to get a RLE chunk? + if ( tga_is_RLE ) + { + if ( RLE_count == 0 ) + { + // yep, get the next byte as a RLE command + int RLE_cmd = get8u(s); + RLE_count = 1 + (RLE_cmd & 127); + RLE_repeating = RLE_cmd >> 7; + read_next_pixel = 1; + } else if ( !RLE_repeating ) + { + read_next_pixel = 1; + } + } else + { + read_next_pixel = 1; + } + // OK, if I need to read a pixel, do it now + if ( read_next_pixel ) + { + // load however much data we did have + if ( tga_indexed ) + { + // read in 1 byte, then perform the lookup + int pal_idx = get8u(s); + if ( pal_idx >= tga_palette_len ) + { + // invalid index + pal_idx = 0; + } + pal_idx *= tga_bits_per_pixel / 8; + for (j = 0; j*8 < tga_bits_per_pixel; ++j) + { + raw_data[j] = tga_palette[pal_idx+j]; + } + } else + { + // read in the data raw + for (j = 0; j*8 < tga_bits_per_pixel; ++j) + { + raw_data[j] = get8u(s); + } + } + // convert raw to the intermediate format + switch (tga_bits_per_pixel) + { + case 8: + // Luminous => RGBA + trans_data[0] = raw_data[0]; + trans_data[1] = raw_data[0]; + trans_data[2] = raw_data[0]; + trans_data[3] = 255; + break; + case 16: + // Luminous,Alpha => RGBA + trans_data[0] = raw_data[0]; + trans_data[1] = raw_data[0]; + trans_data[2] = raw_data[0]; + trans_data[3] = raw_data[1]; + break; + case 24: + // BGR => RGBA + trans_data[0] = raw_data[2]; + trans_data[1] = raw_data[1]; + trans_data[2] = raw_data[0]; + trans_data[3] = 255; + break; + case 32: + // BGRA => RGBA + trans_data[0] = raw_data[2]; + trans_data[1] = raw_data[1]; + trans_data[2] = raw_data[0]; + trans_data[3] = raw_data[3]; + break; + } + // clear the reading flag for the next pixel + read_next_pixel = 0; + } // end of reading a pixel + // convert to final format + switch (req_comp) + { + case 1: + // RGBA => Luminance + tga_data[i*req_comp+0] = compute_y(trans_data[0],trans_data[1],trans_data[2]); + break; + case 2: + // RGBA => Luminance,Alpha + tga_data[i*req_comp+0] = compute_y(trans_data[0],trans_data[1],trans_data[2]); + tga_data[i*req_comp+1] = trans_data[3]; + break; + case 3: + // RGBA => RGB + tga_data[i*req_comp+0] = trans_data[0]; + tga_data[i*req_comp+1] = trans_data[1]; + tga_data[i*req_comp+2] = trans_data[2]; + break; + case 4: + // RGBA => RGBA + tga_data[i*req_comp+0] = trans_data[0]; + tga_data[i*req_comp+1] = trans_data[1]; + tga_data[i*req_comp+2] = trans_data[2]; + tga_data[i*req_comp+3] = trans_data[3]; + break; + } + // in case we're in RLE mode, keep counting down + --RLE_count; + } + // do I need to invert the image? + if ( tga_inverted ) + { + for (j = 0; j*2 < tga_height; ++j) + { + int index1 = j * tga_width * req_comp; + int index2 = (tga_height - 1 - j) * tga_width * req_comp; + for (i = tga_width * req_comp; i > 0; --i) + { + unsigned char temp = tga_data[index1]; + tga_data[index1] = tga_data[index2]; + tga_data[index2] = temp; + ++index1; + ++index2; + } + } + } + // clear my palette, if I had one + if ( tga_palette != NULL ) + { + free( tga_palette ); + } + // the things I do to get rid of an error message, and yet keep + // Microsoft's C compilers happy... [8^( + tga_palette_start = tga_palette_len = tga_palette_bits = + tga_x_origin = tga_y_origin = 0; + // OK, done + return tga_data; +} + +#ifndef STBI_NO_STDIO +stbi_uc *stbi_tga_load (char const *filename, int *x, int *y, int *comp, int req_comp) +{ + stbi_uc *data; + FILE *f = fopen(filename, "rb"); + if (!f) return NULL; + data = stbi_tga_load_from_file(f, x,y,comp,req_comp); + fclose(f); + return data; +} + +stbi_uc *stbi_tga_load_from_file (FILE *f, int *x, int *y, int *comp, int req_comp) +{ + stbi s; + start_file(&s, f); + return tga_load(&s, x,y,comp,req_comp); +} +#endif + +stbi_uc *stbi_tga_load_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp) +{ + stbi s; + start_mem(&s, buffer, len); + return tga_load(&s, x,y,comp,req_comp); +} + + +// ************************************************************************************************* +// Photoshop PSD loader -- PD by Thatcher Ulrich, integration by Nicolas Schulz, tweaked by STB + +static int psd_test(stbi *s) +{ + if (get32(s) != 0x38425053) return 0; // "8BPS" + else return 1; +} + +#ifndef STBI_NO_STDIO +int stbi_psd_test_file(FILE *f) +{ + stbi s; + int r,n = ftell(f); + start_file(&s, f); + r = psd_test(&s); + fseek(f,n,SEEK_SET); + return r; +} +#endif + +int stbi_psd_test_memory(stbi_uc const *buffer, int len) +{ + stbi s; + start_mem(&s, buffer, len); + return psd_test(&s); +} + +static stbi_uc *psd_load(stbi *s, int *x, int *y, int *comp, int req_comp) +{ + int pixelCount; + int channelCount, compression; + int channel, i, count, len; + int w,h; + uint8 *out; + + // Check identifier + if (get32(s) != 0x38425053) // "8BPS" + return epuc("not PSD", "Corrupt PSD image"); + + // Check file type version. + if (get16(s) != 1) + return epuc("wrong version", "Unsupported version of PSD image"); + + // Skip 6 reserved bytes. + skip(s, 6 ); + + // Read the number of channels (R, G, B, A, etc). + channelCount = get16(s); + if (channelCount < 0 || channelCount > 16) + return epuc("wrong channel count", "Unsupported number of channels in PSD image"); + + // Read the rows and columns of the image. + h = get32(s); + w = get32(s); + + // Make sure the depth is 8 bits. + if (get16(s) != 8) + return epuc("unsupported bit depth", "PSD bit depth is not 8 bit"); + + // Make sure the color mode is RGB. + // Valid options are: + // 0: Bitmap + // 1: Grayscale + // 2: Indexed color + // 3: RGB color + // 4: CMYK color + // 7: Multichannel + // 8: Duotone + // 9: Lab color + if (get16(s) != 3) + return epuc("wrong color format", "PSD is not in RGB color format"); + + // Skip the Mode Data. (It's the palette for indexed color; other info for other modes.) + skip(s,get32(s) ); + + // Skip the image resources. (resolution, pen tool paths, etc) + skip(s, get32(s) ); + + // Skip the reserved data. + skip(s, get32(s) ); + + // Find out if the data is compressed. + // Known values: + // 0: no compression + // 1: RLE compressed + compression = get16(s); + if (compression > 1) + return epuc("bad compression", "PSD has an unknown compression format"); + + // Create the destination image. + out = (stbi_uc *) malloc(4 * w*h); + if (!out) return epuc("outofmem", "Out of memory"); + pixelCount = w*h; + + // Initialize the data to zero. + //memset( out, 0, pixelCount * 4 ); + + // Finally, the image data. + if (compression) { + // RLE as used by .PSD and .TIFF + // Loop until you get the number of unpacked bytes you are expecting: + // Read the next source byte into n. + // If n is between 0 and 127 inclusive, copy the next n+1 bytes literally. + // Else if n is between -127 and -1 inclusive, copy the next byte -n+1 times. + // Else if n is 128, noop. + // Endloop + + // The RLE-compressed data is preceeded by a 2-byte data count for each row in the data, + // which we're going to just skip. + skip(s, h * channelCount * 2 ); + + // Read the RLE data by channel. + for (channel = 0; channel < 4; channel++) { + uint8 *p; + + p = out+channel; + if (channel >= channelCount) { + // Fill this channel with default data. + for (i = 0; i < pixelCount; i++) *p = (channel == 3 ? 255 : 0), p += 4; + } else { + // Read the RLE data. + count = 0; + while (count < pixelCount) { + len = get8(s); + if (len == 128) { + // No-op. + } else if (len < 128) { + // Copy next len+1 bytes literally. + len++; + count += len; + while (len) { + *p = get8u(s); + p += 4; + len--; + } + } else if (len > 128) { + uint8 val; + // Next -len+1 bytes in the dest are replicated from next source byte. + // (Interpret len as a negative 8-bit int.) + len ^= 0x0FF; + len += 2; + val = get8u(s); + count += len; + while (len) { + *p = val; + p += 4; + len--; + } + } + } + } + } + + } else { + // We're at the raw image data. It's each channel in order (Red, Green, Blue, Alpha, ...) + // where each channel consists of an 8-bit value for each pixel in the image. + + // Read the data by channel. + for (channel = 0; channel < 4; channel++) { + uint8 *p; + + p = out + channel; + if (channel > channelCount) { + // Fill this channel with default data. + for (i = 0; i < pixelCount; i++) *p = channel == 3 ? 255 : 0, p += 4; + } else { + // Read the data. + for (i = 0; i < pixelCount; i++) + *p = get8u(s), p += 4; + } + } + } + + if (req_comp && req_comp != 4) { + out = convert_format(out, 4, req_comp, w, h); + if (out == NULL) return out; // convert_format frees input on failure + } + + if (comp) *comp = channelCount; + *y = h; + *x = w; + + return out; +} + +#ifndef STBI_NO_STDIO +stbi_uc *stbi_psd_load(char const *filename, int *x, int *y, int *comp, int req_comp) +{ + stbi_uc *data; + FILE *f = fopen(filename, "rb"); + if (!f) return NULL; + data = stbi_psd_load_from_file(f, x,y,comp,req_comp); + fclose(f); + return data; +} + +stbi_uc *stbi_psd_load_from_file(FILE *f, int *x, int *y, int *comp, int req_comp) +{ + stbi s; + start_file(&s, f); + return psd_load(&s, x,y,comp,req_comp); +} +#endif + +stbi_uc *stbi_psd_load_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp) +{ + stbi s; + start_mem(&s, buffer, len); + return psd_load(&s, x,y,comp,req_comp); +} + +// ************************************************************************************************* +// Softimage PIC loader +// by Tom Seddon +// +// See http://softimage.wiki.softimage.com/index.php/INFO:_PIC_file_format +// See http://ozviz.wasp.uwa.edu.au/~pbourke/dataformats/softimagepic/ + +static int pic_is4(stbi *s,const char *str) +{ + int i; + for (i=0; i<4; ++i) + if (get8(s) != (stbi_uc)str[i]) + return 0; + + return 1; +} + +static int pic_test(stbi *s) +{ + int i; + + if (!pic_is4(s,"\x53\x80\xF6\x34")) + return 0; + + for(i=0;i<84;++i) + get8(s); + + if (!pic_is4(s,"PICT")) + return 0; + + return 1; +} + +typedef struct +{ + stbi_uc size,type,channel; +} pic_packet_t; + +static stbi_uc *pic_readval(stbi *s, int channel, stbi_uc *dest) +{ + int mask=0x80, i; + + for (i=0; i<4; ++i, mask>>=1) { + if (channel & mask) { + if (at_eof(s)) return epuc("bad file","PIC file too short"); + dest[i]=get8u(s); + } + } + + return dest; +} + +static void pic_copyval(int channel,stbi_uc *dest,const stbi_uc *src) +{ + int mask=0x80,i; + + for (i=0;i<4; ++i, mask>>=1) + if (channel&mask) + dest[i]=src[i]; +} + +static stbi_uc *pic_load2(stbi *s,int width,int height,int *comp, stbi_uc *result) +{ + int act_comp=0,num_packets=0,y,chained; + pic_packet_t packets[10]; + + // this will (should...) cater for even some bizarre stuff like having data + // for the same channel in multiple packets. + do { + pic_packet_t *packet; + + if (num_packets==sizeof(packets)/sizeof(packets[0])) + return epuc("bad format","too many packets"); + + packet = &packets[num_packets++]; + + chained = get8(s); + packet->size = get8u(s); + packet->type = get8u(s); + packet->channel = get8u(s); + + act_comp |= packet->channel; + + if (at_eof(s)) return epuc("bad file","file too short (reading packets)"); + if (packet->size != 8) return epuc("bad format","packet isn't 8bpp"); + } while (chained); + + *comp = (act_comp & 0x10 ? 4 : 3); // has alpha channel? + + for(y=0; ytype) { + default: + return epuc("bad format","packet has bad compression type"); + + case 0: {//uncompressed + int x; + + for(x=0;xchannel,dest)) + return 0; + break; + } + + case 1://Pure RLE + { + int left=width, i; + + while (left>0) { + stbi_uc count,value[4]; + + count=get8u(s); + if (at_eof(s)) return epuc("bad file","file too short (pure read count)"); + + if (count > left) + count = (uint8) left; + + if (!pic_readval(s,packet->channel,value)) return 0; + + for(i=0; ichannel,dest,value); + left -= count; + } + } + break; + + case 2: {//Mixed RLE + int left=width; + while (left>0) { + int count = get8(s), i; + if (at_eof(s)) return epuc("bad file","file too short (mixed read count)"); + + if (count >= 128) { // Repeated + stbi_uc value[4]; + int i; + + if (count==128) + count = get16(s); + else + count -= 127; + if (count > left) + return epuc("bad file","scanline overrun"); + + if (!pic_readval(s,packet->channel,value)) + return 0; + + for(i=0;ichannel,dest,value); + } else { // Raw + ++count; + if (count>left) return epuc("bad file","scanline overrun"); + + for(i=0;ichannel,dest)) + return 0; + } + left-=count; + } + break; + } + } + } + } + + return result; +} + +static stbi_uc *pic_load(stbi *s,int *px,int *py,int *comp,int req_comp) +{ + stbi_uc *result; + int i, x,y; + + for (i=0; i<92; ++i) + get8(s); + + x = get16(s); + y = get16(s); + if (at_eof(s)) return epuc("bad file","file too short (pic header)"); + if ((1 << 28) / x < y) return epuc("too large", "Image too large to decode"); + + get32(s); //skip `ratio' + get16(s); //skip `fields' + get16(s); //skip `pad' + + // intermediate buffer is RGBA + result = (stbi_uc *) malloc(x*y*4); + memset(result, 0xff, x*y*4); + + if (!pic_load2(s,x,y,comp, result)) { + free(result); + result=0; + } + *px = x; + *py = y; + if (req_comp == 0) req_comp = *comp; + result=convert_format(result,4,req_comp,x,y); + + return result; +} + +int stbi_pic_test_memory(stbi_uc const *buffer, int len) +{ + stbi s; + start_mem(&s,buffer,len); + return pic_test(&s); +} + +stbi_uc *stbi_pic_load_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp) +{ + stbi s; + start_mem(&s,buffer,len); + return pic_load(&s,x,y,comp,req_comp); +} + +#ifndef STBI_NO_STDIO +int stbi_pic_test_file(FILE *f) +{ + int result; + long l = ftell(f); + stbi s; + start_file(&s,f); + result = pic_test(&s); + fseek(f,l,SEEK_SET); + return result; +} + +stbi_uc *stbi_pic_load(char const *filename,int *x, int *y, int *comp, int req_comp) +{ + stbi_uc *result; + FILE *f=fopen(filename,"rb"); + if (!f) return 0; + result = stbi_pic_load_from_file(f,x,y,comp,req_comp); + fclose(f); + return result; +} + +stbi_uc *stbi_pic_load_from_file(FILE *f,int *x, int *y, int *comp, int req_comp) +{ + stbi s; + start_file(&s,f); + return pic_load(&s,x,y,comp,req_comp); +} +#endif + +// ************************************************************************************************* +// GIF loader -- public domain by Jean-Marc Lienher -- simplified/shrunk by stb +typedef struct stbi_gif_lzw_struct { + int16 prefix; + uint8 first; + uint8 suffix; +} stbi_gif_lzw; + +typedef struct stbi_gif_struct +{ + int w,h; + stbi_uc *out; // output buffer (always 4 components) + int flags, bgindex, ratio, transparent, eflags; + uint8 pal[256][4]; + uint8 lpal[256][4]; + stbi_gif_lzw codes[4096]; + uint8 *color_table; + int parse, step; + int lflags; + int start_x, start_y; + int max_x, max_y; + int cur_x, cur_y; + int line_size; +} stbi_gif; + +static int gif_test(stbi *s) +{ + int sz; + if (get8(s) != 'G' || get8(s) != 'I' || get8(s) != 'F' || get8(s) != '8') return 0; + sz = get8(s); + if (sz != '9' && sz != '7') return 0; + if (get8(s) != 'a') return 0; + return 1; +} + +#ifndef STBI_NO_STDIO +int stbi_gif_test_file (FILE *f) +{ + stbi s; + int r,n = ftell(f); + start_file(&s,f); + r = gif_test(&s); + fseek(f,n,SEEK_SET); + return r; +} +#endif + +int stbi_gif_test_memory (stbi_uc const *buffer, int len) +{ + stbi s; + start_mem(&s, buffer, len); + return gif_test(&s); +} + +static void stbi_gif_parse_colortable(stbi *s, uint8 pal[256][4], int num_entries, int transp) +{ + int i; + for (i=0; i < num_entries; ++i) { + pal[i][2] = get8u(s); + pal[i][1] = get8u(s); + pal[i][0] = get8u(s); + pal[i][3] = transp ? 0 : 255; + } +} + +static int stbi_gif_header(stbi *s, stbi_gif *g, int *comp, int is_info) +{ + uint8 version; + if (get8(s) != 'G' || get8(s) != 'I' || get8(s) != 'F' || get8(s) != '8') + return e("not GIF", "Corrupt GIF"); + + version = get8u(s); + if (version != '7' && version != '9') return e("not GIF", "Corrupt GIF"); + if (get8(s) != 'a') return e("not GIF", "Corrupt GIF"); + + failure_reason = ""; + g->w = get16le(s); + g->h = get16le(s); + g->flags = get8(s); + g->bgindex = get8(s); + g->ratio = get8(s); + g->transparent = -1; + + if (comp != 0) *comp = 4; // can't actually tell whether it's 3 or 4 until we parse the comments + + if (is_info) return 1; + + if (g->flags & 0x80) + stbi_gif_parse_colortable(s,g->pal, 2 << (g->flags & 7), -1); + + return 1; +} + +static int stbi_gif_info_raw(stbi *s, int *x, int *y, int *comp) +{ + stbi_gif g; + if (!stbi_gif_header(s, &g, comp, 1)) return 0; + if (x) *x = g.w; + if (y) *y = g.h; + return 1; +} + +static void stbi_out_gif_code(stbi_gif *g, uint16 code) +{ + uint8 *p, *c; + + // recurse to decode the prefixes, since the linked-list is backwards, + // and working backwards through an interleaved image would be nasty + if (g->codes[code].prefix >= 0) + stbi_out_gif_code(g, g->codes[code].prefix); + + if (g->cur_y >= g->max_y) return; + + p = &g->out[g->cur_x + g->cur_y]; + c = &g->color_table[g->codes[code].suffix * 4]; + + if (c[3] >= 128) { + p[0] = c[2]; + p[1] = c[1]; + p[2] = c[0]; + p[3] = c[3]; + } + g->cur_x += 4; + + if (g->cur_x >= g->max_x) { + g->cur_x = g->start_x; + g->cur_y += g->step; + + while (g->cur_y >= g->max_y && g->parse > 0) { + g->step = (1 << g->parse) * g->line_size; + g->cur_y = g->start_y + (g->step >> 1); + --g->parse; + } + } +} + +static uint8 *stbi_process_gif_raster(stbi *s, stbi_gif *g) +{ + uint8 lzw_cs; + int32 len, code; + uint32 first; + int32 codesize, codemask, avail, oldcode, bits, valid_bits, clear; + stbi_gif_lzw *p; + + lzw_cs = get8u(s); + clear = 1 << lzw_cs; + first = 1; + codesize = lzw_cs + 1; + codemask = (1 << codesize) - 1; + bits = 0; + valid_bits = 0; + for (code = 0; code < clear; code++) { + g->codes[code].prefix = -1; + g->codes[code].first = (uint8) code; + g->codes[code].suffix = (uint8) code; + } + + // support no starting clear code + avail = clear+2; + oldcode = -1; + + len = 0; + for(;;) { + if (valid_bits < codesize) { + if (len == 0) { + len = get8(s); // start new block + if (len == 0) + return g->out; + } + --len; + bits |= (int32) get8(s) << valid_bits; + valid_bits += 8; + } else { + int32 code = bits & codemask; + bits >>= codesize; + valid_bits -= codesize; + // @OPTIMIZE: is there some way we can accelerate the non-clear path? + if (code == clear) { // clear code + codesize = lzw_cs + 1; + codemask = (1 << codesize) - 1; + avail = clear + 2; + oldcode = -1; + first = 0; + } else if (code == clear + 1) { // end of stream code + skip(s, len); + while ((len = get8(s)) > 0) + skip(s,len); + return g->out; + } else if (code <= avail) { + if (first) return epuc("no clear code", "Corrupt GIF"); + + if (oldcode >= 0) { + p = &g->codes[avail++]; + if (avail > 4096) return epuc("too many codes", "Corrupt GIF"); + p->prefix = (int16) oldcode; + p->first = g->codes[oldcode].first; + p->suffix = (code == avail) ? p->first : g->codes[code].first; + } else if (code == avail) + return epuc("illegal code in raster", "Corrupt GIF"); + + stbi_out_gif_code(g, (uint16) code); + + if ((avail & codemask) == 0 && avail <= 0x0FFF) { + codesize++; + codemask = (1 << codesize) - 1; + } + + oldcode = code; + } else { + return epuc("illegal code in raster", "Corrupt GIF"); + } + } + } +} + +static void stbi_fill_gif_background(stbi_gif *g) +{ + int i; + uint8 *c = g->pal[g->bgindex]; + // @OPTIMIZE: write a dword at a time + for (i = 0; i < g->w * g->h * 4; i += 4) { + uint8 *p = &g->out[i]; + p[0] = c[2]; + p[1] = c[1]; + p[2] = c[0]; + p[3] = c[3]; + } +} + +// this function is designed to support animated gifs, although stb_image doesn't support it +static uint8 *stbi_gif_load_next(stbi *s, stbi_gif *g, int *comp, int req_comp) +{ + int i; + uint8 *old_out = 0; + + if (g->out == 0) { + if (!stbi_gif_header(s, g, comp,0)) return 0; // failure_reason set by stbi_gif_header + g->out = (uint8 *) malloc(4 * g->w * g->h); + if (g->out == 0) return epuc("outofmem", "Out of memory"); + stbi_fill_gif_background(g); + } else { + // animated-gif-only path + if (((g->eflags & 0x1C) >> 2) == 3) { + old_out = g->out; + g->out = (uint8 *) malloc(4 * g->w * g->h); + if (g->out == 0) return epuc("outofmem", "Out of memory"); + memcpy(g->out, old_out, g->w*g->h*4); + } + } + + for (;;) { + switch (get8(s)) { + case 0x2C: /* Image Descriptor */ + { + int32 x, y, w, h; + uint8 *o; + + x = get16le(s); + y = get16le(s); + w = get16le(s); + h = get16le(s); + if (((x + w) > (g->w)) || ((y + h) > (g->h))) + return epuc("bad Image Descriptor", "Corrupt GIF"); + + g->line_size = g->w * 4; + g->start_x = x * 4; + g->start_y = y * g->line_size; + g->max_x = g->start_x + w * 4; + g->max_y = g->start_y + h * g->line_size; + g->cur_x = g->start_x; + g->cur_y = g->start_y; + + g->lflags = get8(s); + + if (g->lflags & 0x40) { + g->step = 8 * g->line_size; // first interlaced spacing + g->parse = 3; + } else { + g->step = g->line_size; + g->parse = 0; + } + + if (g->lflags & 0x80) { + stbi_gif_parse_colortable(s,g->lpal, 2 << (g->lflags & 7), g->eflags & 0x01 ? g->transparent : -1); + g->color_table = (uint8 *) g->lpal; + } else if (g->flags & 0x80) { + for (i=0; i < 256; ++i) // @OPTIMIZE: reset only the previous transparent + g->pal[i][3] = 255; + if (g->transparent >= 0 && (g->eflags & 0x01)) + g->pal[g->transparent][3] = 0; + g->color_table = (uint8 *) g->pal; + } else + return epuc("missing color table", "Corrupt GIF"); + + o = stbi_process_gif_raster(s, g); + if (o == NULL) return NULL; + + if (req_comp && req_comp != 4) + o = convert_format(o, 4, req_comp, g->w, g->h); + return o; + } + + case 0x21: // Comment Extension. + { + int len; + if (get8(s) == 0xF9) { // Graphic Control Extension. + len = get8(s); + if (len == 4) { + g->eflags = get8(s); + get16le(s); // delay + g->transparent = get8(s); + } else { + skip(s, len); + break; + } + } + while ((len = get8(s)) != 0) + skip(s, len); + break; + } + + case 0x3B: // gif stream termination code + return (uint8 *) 1; + + default: + return epuc("unknown code", "Corrupt GIF"); + } + } +} + +#ifndef STBI_NO_STDIO +stbi_uc *stbi_gif_load (char const *filename, int *x, int *y, int *comp, int req_comp) +{ + uint8 *data; + FILE *f = fopen(filename, "rb"); + if (!f) return NULL; + data = stbi_gif_load_from_file(f, x,y,comp,req_comp); + fclose(f); + return data; +} + +stbi_uc *stbi_gif_load_from_file (FILE *f, int *x, int *y, int *comp, int req_comp) +{ + uint8 *u = 0; + stbi s; + stbi_gif g={0}; + start_file(&s, f); + + u = stbi_gif_load_next(&s, &g, comp, req_comp); + if (u == (void *) 1) u = 0; // end of animated gif marker + if (u) { + *x = g.w; + *y = g.h; + } + + return u; +} +#endif + +stbi_uc *stbi_gif_load_from_memory (stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp) +{ + uint8 *u = 0; + stbi s; + stbi_gif g={0}; + start_mem(&s, buffer, len); + u = stbi_gif_load_next(&s, &g, comp, req_comp); + if (u == (void *) 1) u = 0; // end of animated gif marker + if (u) { + *x = g.w; + *y = g.h; + } + return u; +} + +#ifndef STBI_NO_STDIO +int stbi_gif_info (char const *filename, int *x, int *y, int *comp) +{ + int res; + FILE *f = fopen(filename, "rb"); + if (!f) return 0; + res = stbi_gif_info_from_file(f, x, y, comp); + fclose(f); + return res; +} + +int stbi_gif_info_from_file(FILE *f, int *x, int *y, int *comp) +{ + stbi s; + int res; + long n = ftell(f); + start_file(&s, f); + res = stbi_gif_info_raw(&s, x, y, comp); + fseek(f, n, SEEK_SET); + return res; +} +#endif // !STBI_NO_STDIO + +int stbi_gif_info_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp) +{ + stbi s; + start_mem(&s, buffer, len); + return stbi_gif_info_raw(&s, x, y, comp); +} + + + + +// ************************************************************************************************* +// Radiance RGBE HDR loader +// originally by Nicolas Schulz +#ifndef STBI_NO_HDR +static int hdr_test(stbi *s) +{ + const char *signature = "#?RADIANCE\n"; + int i; + for (i=0; signature[i]; ++i) + if (get8(s) != signature[i]) + return 0; + return 1; +} + +int stbi_hdr_test_memory(stbi_uc const *buffer, int len) +{ + stbi s; + start_mem(&s, buffer, len); + return hdr_test(&s); +} + +#ifndef STBI_NO_STDIO +int stbi_hdr_test_file(FILE *f) +{ + stbi s; + int r,n = ftell(f); + start_file(&s, f); + r = hdr_test(&s); + fseek(f,n,SEEK_SET); + return r; +} +#endif + +#define HDR_BUFLEN 1024 +static char *hdr_gettoken(stbi *z, char *buffer) +{ + int len=0; + char c = '\0'; + + c = (char) get8(z); + + while (!at_eof(z) && c != '\n') { + buffer[len++] = c; + if (len == HDR_BUFLEN-1) { + // flush to end of line + while (!at_eof(z) && get8(z) != '\n') + ; + break; + } + c = (char) get8(z); + } + + buffer[len] = 0; + return buffer; +} + +static void hdr_convert(float *output, stbi_uc *input, int req_comp) +{ + if ( input[3] != 0 ) { + float f1; + // Exponent + f1 = (float) ldexp(1.0f, input[3] - (int)(128 + 8)); + if (req_comp <= 2) + output[0] = (input[0] + input[1] + input[2]) * f1 / 3; + else { + output[0] = input[0] * f1; + output[1] = input[1] * f1; + output[2] = input[2] * f1; + } + if (req_comp == 2) output[1] = 1; + if (req_comp == 4) output[3] = 1; + } else { + switch (req_comp) { + case 4: output[3] = 1; /* fallthrough */ + case 3: output[0] = output[1] = output[2] = 0; + break; + case 2: output[1] = 1; /* fallthrough */ + case 1: output[0] = 0; + break; + } + } +} + + +static float *hdr_load(stbi *s, int *x, int *y, int *comp, int req_comp) +{ + char buffer[HDR_BUFLEN]; + char *token; + int valid = 0; + int width, height; + stbi_uc *scanline; + float *hdr_data; + int len; + unsigned char count, value; + int i, j, k, c1,c2, z; + + + // Check identifier + if (strcmp(hdr_gettoken(s,buffer), "#?RADIANCE") != 0) + return epf("not HDR", "Corrupt HDR image"); + + // Parse header + for(;;) { + token = hdr_gettoken(s,buffer); + if (token[0] == 0) break; + if (strcmp(token, "FORMAT=32-bit_rle_rgbe") == 0) valid = 1; + } + + if (!valid) return epf("unsupported format", "Unsupported HDR format"); + + // Parse width and height + // can't use sscanf() if we're not using stdio! + token = hdr_gettoken(s,buffer); + if (strncmp(token, "-Y ", 3)) return epf("unsupported data layout", "Unsupported HDR format"); + token += 3; + height = strtol(token, &token, 10); + while (*token == ' ') ++token; + if (strncmp(token, "+X ", 3)) return epf("unsupported data layout", "Unsupported HDR format"); + token += 3; + width = strtol(token, NULL, 10); + + *x = width; + *y = height; + + *comp = 3; + if (req_comp == 0) req_comp = 3; + + // Read data + hdr_data = (float *) malloc(height * width * req_comp * sizeof(float)); + + // Load image data + // image data is stored as some number of sca + if ( width < 8 || width >= 32768) { + // Read flat data + for (j=0; j < height; ++j) { + for (i=0; i < width; ++i) { + stbi_uc rgbe[4]; + main_decode_loop: + getn(s, rgbe, 4); + hdr_convert(hdr_data + j * width * req_comp + i * req_comp, rgbe, req_comp); + } + } + } else { + // Read RLE-encoded data + scanline = NULL; + + for (j = 0; j < height; ++j) { + c1 = get8(s); + c2 = get8(s); + len = get8(s); + if (c1 != 2 || c2 != 2 || (len & 0x80)) { + // not run-length encoded, so we have to actually use THIS data as a decoded + // pixel (note this can't be a valid pixel--one of RGB must be >= 128) + uint8 rgbe[4]; + rgbe[0] = (uint8) c1; + rgbe[1] = (uint8) c2; + rgbe[2] = (uint8) len; + rgbe[3] = (uint8) get8u(s); + hdr_convert(hdr_data, rgbe, req_comp); + i = 1; + j = 0; + free(scanline); + goto main_decode_loop; // yes, this makes no sense + } + len <<= 8; + len |= get8(s); + if (len != width) { free(hdr_data); free(scanline); return epf("invalid decoded scanline length", "corrupt HDR"); } + if (scanline == NULL) scanline = (stbi_uc *) malloc(width * 4); + + for (k = 0; k < 4; ++k) { + i = 0; + while (i < width) { + count = get8u(s); + if (count > 128) { + // Run + value = get8u(s); + count -= 128; + for (z = 0; z < count; ++z) + scanline[i++ * 4 + k] = value; + } else { + // Dump + for (z = 0; z < count; ++z) + scanline[i++ * 4 + k] = get8u(s); + } + } + } + for (i=0; i < width; ++i) + hdr_convert(hdr_data+(j*width + i)*req_comp, scanline + i*4, req_comp); + } + free(scanline); + } + + return hdr_data; +} + +#ifndef STBI_NO_STDIO +float *stbi_hdr_load_from_file(FILE *f, int *x, int *y, int *comp, int req_comp) +{ + stbi s; + start_file(&s,f); + return hdr_load(&s,x,y,comp,req_comp); +} +#endif + +float *stbi_hdr_load_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp) +{ + stbi s; + start_mem(&s,buffer, len); + return hdr_load(&s,x,y,comp,req_comp); +} + +#endif // STBI_NO_HDR + + +#ifndef STBI_NO_STDIO +int stbi_info(char const *filename, int *x, int *y, int *comp) +{ + FILE *f = fopen(filename, "rb"); + int result; + if (!f) return e("can't fopen", "Unable to open file"); + result = stbi_info_from_file(f, x, y, comp); + fclose(f); + return result; +} + +int stbi_info_from_file(FILE *f, int *x, int *y, int *comp) +{ + if (stbi_jpeg_info_from_file(f, x, y, comp)) + return 1; + if (stbi_png_info_from_file(f, x, y, comp)) + return 1; + if (stbi_gif_info_from_file(f, x, y, comp)) + return 1; + // @TODO: stbi_bmp_info_from_file + // @TODO: stbi_psd_info_from_file + #ifndef STBI_NO_HDR + // @TODO: stbi_hdr_info_from_file + #endif + // test tga last because it's a crappy test! + if (stbi_tga_info_from_file(f, x, y, comp)) + return 1; + return e("unknown image type", "Image not of any known type, or corrupt"); +} +#endif // !STBI_NO_STDIO + +int stbi_info_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp) +{ + if (stbi_jpeg_info_from_memory(buffer, len, x, y, comp)) + return 1; + if (stbi_png_info_from_memory(buffer, len, x, y, comp)) + return 1; + if (stbi_gif_info_from_memory(buffer, len, x, y, comp)) + return 1; + // @TODO: stbi_bmp_info_from_memory + // @TODO: stbi_psd_info_from_memory + #ifndef STBI_NO_HDR + // @TODO: stbi_hdr_info_from_memory + #endif + // test tga last because it's a crappy test! + if (stbi_tga_info_from_memory(buffer, len, x, y, comp)) + return 1; + return e("unknown image type", "Image not of any known type, or corrupt"); +} + +/////////////////////// write image /////////////////////// + +#ifndef STBI_NO_WRITE + +static void write8(FILE *f, int x) { uint8 z = (uint8) x; fwrite(&z,1,1,f); } + +static void writefv(FILE *f, char *fmt, va_list v) +{ + while (*fmt) { + switch (*fmt++) { + case ' ': break; + case '1': { uint8 x = (uint8)va_arg(v, int); write8(f,x); break; } + case '2': { int16 x = (int16)va_arg(v, int); write8(f,x); write8(f,x>>8); break; } + case '4': { int32 x = va_arg(v, int); write8(f,x); write8(f,x>>8); write8(f,x>>16); write8(f,x>>24); break; } + default: + assert(0); + va_end(v); + return; + } + } +} + +static void writef(FILE *f, char *fmt, ...) +{ + va_list v; + va_start(v, fmt); + writefv(f,fmt,v); + va_end(v); +} + +static void write_pixels(FILE *f, int rgb_dir, int vdir, int x, int y, int comp, void *data, int write_alpha, int scanline_pad) +{ + uint8 bg[3] = { 255, 0, 255}, px[3]; + uint32 zero = 0; + int i,j,k, j_end; + + if (vdir < 0) + j_end = -1, j = y-1; + else + j_end = y, j = 0; + + for (; j != j_end; j += vdir) { + for (i=0; i < x; ++i) { + uint8 *d = (uint8 *) data + (j*x+i)*comp; + if (write_alpha < 0) + fwrite(&d[comp-1], 1, 1, f); + switch (comp) { + case 1: + case 2: writef(f, (char*)"111", d[0],d[0],d[0]); + break; + case 4: + if (!write_alpha) { + for (k=0; k < 3; ++k) + px[k] = bg[k] + ((d[k] - bg[k]) * d[3])/255; + writef(f, (char*)"111", px[1-rgb_dir],px[1],px[1+rgb_dir]); + break; + } + /* FALLTHROUGH */ + case 3: + writef(f, (char*)"111", d[1-rgb_dir],d[1],d[1+rgb_dir]); + break; + } + if (write_alpha > 0) + fwrite(&d[comp-1], 1, 1, f); + } + fwrite(&zero,scanline_pad,1,f); + } +} + +static int outfile(char const *filename, int rgb_dir, int vdir, int x, int y, int comp, void *data, int alpha, int pad, char *fmt, ...) +{ + FILE *f = fopen(filename, "wb"); + if (f) { + va_list v; + va_start(v, fmt); + writefv(f, fmt, v); + va_end(v); + write_pixels(f,rgb_dir,vdir,x,y,comp,data,alpha,pad); + fclose(f); + } + return f != NULL; +} + +int stbi_write_bmp(char const *filename, int x, int y, int comp, void *data) +{ + int pad = (-x*3) & 3; + return outfile(filename,-1,-1,x,y,comp,data,0,pad, + (char*)"11 4 22 4" "4 44 22 444444", + 'B', 'M', 14+40+(x*3+pad)*y, 0,0, 14+40, // file header + 40, x,y, 1,24, 0,0,0,0,0,0); // bitmap header +} + +int stbi_write_tga(char const *filename, int x, int y, int comp, void *data) +{ + int has_alpha = !(comp & 1); + return outfile(filename, -1,-1, x, y, comp, data, has_alpha, 0, + (char*)"111 221 2222 11", 0,0,2, 0,0,0, 0,0,x,y, 24+8*has_alpha, 8*has_alpha); +} + +#endif // STBI_NO_WRITE + +#endif // STBI_HEADER_FILE_ONLY + +/* + revision history: + 1.29 (2010-08-16) various warning fixes from Aurelien Pocheville + 1.28 (2010-08-01) fix bug in GIF palette transparency (SpartanJ) + 1.27 (2010-08-01) + cast-to-uint8 to fix warnings + 1.26 (2010-07-24) + fix bug in file buffering for PNG reported by SpartanJ + 1.25 (2010-07-17) + refix trans_data warning (Won Chun) + 1.24 (2010-07-12) + perf improvements reading from files on platforms with lock-heavy fgetc() + minor perf improvements for jpeg + deprecated type-specific functions so we'll get feedback if they're needed + attempt to fix trans_data warning (Won Chun) + 1.23 fixed bug in iPhone support + 1.22 (2010-07-10) + removed image *writing* support + removed image *writing* support + stbi_info support from Jetro Lauha + GIF support from Jean-Marc Lienher + iPhone PNG-extensions from James Brown + warning-fixes from Nicolas Schulz and Janez Zemva (i.e. Janez (U+017D)emva) + 1.21 fix use of 'uint8' in header (reported by jon blow) + 1.20 added support for Softimage PIC, by Tom Seddon + 1.19 bug in interlaced PNG corruption check (found by ryg) + 1.18 2008-08-02 + fix a threading bug (local mutable static) + 1.17 support interlaced PNG + 1.16 major bugfix - convert_format converted one too many pixels + 1.15 initialize some fields for thread safety + 1.14 fix threadsafe conversion bug + header-file-only version (#define STBI_HEADER_FILE_ONLY before including) + 1.13 threadsafe + 1.12 const qualifiers in the API + 1.11 Support installable IDCT, colorspace conversion routines + 1.10 Fixes for 64-bit (don't use "unsigned long") + optimized upsampling by Fabian "ryg" Giesen + 1.09 Fix format-conversion for PSD code (bad global variables!) + 1.08 Thatcher Ulrich's PSD code integrated by Nicolas Schulz + 1.07 attempt to fix C++ warning/errors again + 1.06 attempt to fix C++ warning/errors again + 1.05 fix TGA loading to return correct *comp and use good luminance calc + 1.04 default float alpha is 1, not 255; use 'void *' for stbi_image_free + 1.03 bugfixes to STBI_NO_STDIO, STBI_NO_HDR + 1.02 support for (subset of) HDR files, float interface for preferred access to them + 1.01 fix bug: possible bug in handling right-side up bmps... not sure + fix bug: the stbi_bmp_load() and stbi_tga_load() functions didn't work at all + 1.00 interface to zlib that skips zlib header + 0.99 correct handling of alpha in palette + 0.98 TGA loader by lonesock; dynamically add loaders (untested) + 0.97 jpeg errors on too large a file; also catch another malloc failure + 0.96 fix detection of invalid v value - particleman@mollyrocket forum + 0.95 during header scan, seek to markers in case of padding + 0.94 STBI_NO_STDIO to disable stdio usage; rename all #defines the same + 0.93 handle jpegtran output; verbose errors + 0.92 read 4,8,16,24,32-bit BMP files of several formats + 0.91 output 24-bit Windows 3.0 BMP files + 0.90 fix a few more warnings; bump version number to approach 1.0 + 0.61 bugfixes due to Marc LeBlanc, Christopher Lloyd + 0.60 fix compiling as c++ + 0.59 fix warnings: merge Dave Moore's -Wall fixes + 0.58 fix bug: zlib uncompressed mode len/nlen was wrong endian + 0.57 fix bug: jpg last huffman symbol before marker was >9 bits but less + than 16 available + 0.56 fix bug: zlib uncompressed mode len vs. nlen + 0.55 fix bug: restart_interval not initialized to 0 + 0.54 allow NULL for 'int *comp' + 0.53 fix bug in png 3->4; speedup png decoding + 0.52 png handles req_comp=3,4 directly; minor cleanup; jpeg comments + 0.51 obey req_comp requests, 1-component jpegs return as 1-component, + on 'test' only check type, not whether we support this variant +*/ diff --git a/libs/jpeg/tga2jpg.cpp b/libs/jpeg/tga2jpg.cpp new file mode 100644 index 0000000..1f70120 --- /dev/null +++ b/libs/jpeg/tga2jpg.cpp @@ -0,0 +1,533 @@ +// tga2jpg.cpp - jpge/jpgd example command line app. +// Public domain, Rich Geldreich +// Last updated May. 19, 2012 + +// Note: jpge.cpp/h and jpgd.cpp/h are completely standalone, i.e. they do not have any dependencies to each other. +#include "jpge.h" +#include "jpgd.h" +#include "stb_image.c" +#include "timer.h" +#include + +#if defined(_MSC_VER) + #define strcasecmp _stricmp +#else + #define strcpy_s(d, c, s) strcpy(d, s) +#endif + +static int print_usage() +{ + printf("Usage: jpge [options] \n"); + printf("\nRequired parameters (must follow options):\n"); + printf("source_file: Source image file, in any format stb_image.c supports.\n"); + printf("dest_file: Destination JPEG file.\n"); + printf("quality_factor: 1-100, higher=better (only needed in compression mode)\n"); + printf("\nDefault mode compresses source_file to dest_file. Alternate modes:\n"); + printf("-x: Exhaustive compression test (only needs source_file)\n"); + printf("-d: Test jpgd.h. source_file must be JPEG, and dest_file must be .TGA\n"); + printf("\nOptions supported in all modes:\n"); + printf("-glogfilename.txt: Append output to log file\n"); + printf("\nOptions supported in compression mode (the default):\n"); + printf("-o: Enable optimized Huffman tables (slower, but smaller files)\n"); + printf("-luma: Output Y-only image\n"); + printf("-h1v1, -h2v1, -h2v2: Chroma subsampling (default is either Y-only or H2V2)\n"); + printf("-m: Test mem to mem compression (instead of mem to file)\n"); + printf("-wfilename.tga: Write decompressed image to filename.tga\n"); + printf("-s: Use stb_image.c to decompress JPEG image, instead of jpgd.cpp\n"); + printf("\nExample usages:\n"); + printf("Test compression: jpge orig.png comp.jpg 90\n"); + printf("Test decompression: jpge -d comp.jpg uncomp.tga\n"); + printf("Exhaustively test compressor: jpge -x orig.png\n"); + + return EXIT_FAILURE; +} + +static char s_log_filename[256]; + +static void log_printf(const char *pMsg, ...) +{ + va_list args; + + va_start(args, pMsg); + char buf[2048]; + vsnprintf(buf, sizeof(buf) - 1, pMsg, args); + buf[sizeof(buf) - 1] = '\0'; + va_end(args); + + printf("%s", buf); + + if (s_log_filename[0]) + { + FILE *pFile = fopen(s_log_filename, "a+"); + if (pFile) + { + fprintf(pFile, "%s", buf); + fclose(pFile); + } + } +} + +static uint get_file_size(const char *pFilename) +{ + FILE *pFile = fopen(pFilename, "rb"); + if (!pFile) return 0; + fseek(pFile, 0, SEEK_END); + uint file_size = ftell(pFile); + fclose(pFile); + return file_size; +} + +struct image_compare_results +{ + image_compare_results() { memset(this, 0, sizeof(*this)); } + + double max_err; + double mean; + double mean_squared; + double root_mean_squared; + double peak_snr; +}; + +static void get_pixel(int* pDst, const uint8 *pSrc, bool luma_only, int num_comps) +{ + int r, g, b; + if (num_comps == 1) + { + r = g = b = pSrc[0]; + } + else if (luma_only) + { + const int YR = 19595, YG = 38470, YB = 7471; + r = g = b = (pSrc[0] * YR + pSrc[1] * YG + pSrc[2] * YB + 32768) / 65536; + } + else + { + r = pSrc[0]; g = pSrc[1]; b = pSrc[2]; + } + pDst[0] = r; pDst[1] = g; pDst[2] = b; +} + +// Compute image error metrics. +static void image_compare(image_compare_results &results, int width, int height, const uint8 *pComp_image, int comp_image_comps, const uint8 *pUncomp_image_data, int uncomp_comps, bool luma_only) +{ + double hist[256]; + memset(hist, 0, sizeof(hist)); + + const uint first_channel = 0, num_channels = 3; + for (int y = 0; y < height; y++) + { + for (int x = 0; x < width; x++) + { + int a[3]; get_pixel(a, pComp_image + (y * width + x) * comp_image_comps, luma_only, comp_image_comps); + int b[3]; get_pixel(b, pUncomp_image_data + (y * width + x) * uncomp_comps, luma_only, uncomp_comps); + for (uint c = 0; c < num_channels; c++) + hist[labs(a[first_channel + c] - b[first_channel + c])]++; + } + } + + results.max_err = 0; + double sum = 0.0f, sum2 = 0.0f; + for (uint i = 0; i < 256; i++) + { + if (!hist[i]) + continue; + if (i > results.max_err) + results.max_err = i; + double x = i * hist[i]; + sum += x; + sum2 += i * x; + } + + // See http://bmrc.berkeley.edu/courseware/cs294/fall97/assignment/psnr.html + double total_values = width * height; + + results.mean = sum / total_values; + results.mean_squared = sum2 / total_values; + + results.root_mean_squared = sqrt(results.mean_squared); + + if (!results.root_mean_squared) + results.peak_snr = 1e+10f; + else + results.peak_snr = log10(255.0f / results.root_mean_squared) * 20.0f; +} + +// Simple exhaustive test. Tries compressing/decompressing image using all supported quality, subsampling, and Huffman optimization settings. +static int exhausive_compression_test(const char *pSrc_filename, bool use_jpgd) +{ + int status = EXIT_SUCCESS; + + // Load the source image. + const int req_comps = 3; // request RGB image + int width = 0, height = 0, actual_comps = 0; + uint8 *pImage_data = stbi_load(pSrc_filename, &width, &height, &actual_comps, req_comps); + if (!pImage_data) + { + log_printf("Failed loading file \"%s\"!\n", pSrc_filename); + return EXIT_FAILURE; + } + + log_printf("Source file: \"%s\" Image resolution: %ix%i Actual comps: %i\n", pSrc_filename, width, height, actual_comps); + + int orig_buf_size = width * height * 3; // allocate a buffer that's hopefully big enough (this is way overkill for jpeg) + if (orig_buf_size < 1024) orig_buf_size = 1024; + void *pBuf = malloc(orig_buf_size); + + uint8 *pUncomp_image_data = NULL; + + double max_err = 0; + double lowest_psnr = 9e+9; + double threshold_psnr = 9e+9; + double threshold_max_err = 0.0f; + + image_compare_results prev_results; + + for (uint quality_factor = 1; quality_factor <= 100; quality_factor++) + { + for (uint subsampling = 0; subsampling <= jpge::H2V2; subsampling++) + { + for (uint optimize_huffman_tables = 0; optimize_huffman_tables <= 1; optimize_huffman_tables++) + { + // Fill in the compression parameter structure. + jpge::params params; + params.m_quality = quality_factor; + params.m_subsampling = static_cast(subsampling); + params.m_two_pass_flag = (optimize_huffman_tables != 0); + + int comp_size = orig_buf_size; + if (!jpge::compress_image_to_jpeg_file_in_memory(pBuf, comp_size, width, height, req_comps, pImage_data, params)) + { + status = EXIT_FAILURE; + goto failure; + } + + int uncomp_width = 0, uncomp_height = 0, uncomp_actual_comps = 0, uncomp_req_comps = 3; + free(pUncomp_image_data); + if (use_jpgd) + pUncomp_image_data = jpgd::decompress_jpeg_image_from_memory((const stbi_uc*)pBuf, comp_size, &uncomp_width, &uncomp_height, &uncomp_actual_comps, uncomp_req_comps); + else + pUncomp_image_data = stbi_load_from_memory((const stbi_uc*)pBuf, comp_size, &uncomp_width, &uncomp_height, &uncomp_actual_comps, uncomp_req_comps); + if (!pUncomp_image_data) + { + status = EXIT_FAILURE; + goto failure; + } + + if ((uncomp_width != width) || (uncomp_height != height)) + { + status = EXIT_FAILURE; + goto failure; + } + + image_compare_results results; + image_compare(results, width, height, pImage_data, req_comps, pUncomp_image_data, uncomp_req_comps, (params.m_subsampling == jpge::Y_ONLY) || (actual_comps == 1) || (uncomp_actual_comps == 1)); + //log_printf("Q: %3u, S: %u, O: %u, CompSize: %7u, Error Max: %3.3f, Mean: %3.3f, Mean^2: %5.3f, RMSE: %3.3f, PSNR: %3.3f\n", quality_factor, subsampling, optimize_huffman_tables, comp_size, results.max_err, results.mean, results.mean_squared, results.root_mean_squared, results.peak_snr); + log_printf("%3u, %u, %u, %7u, %3.3f, %3.3f, %5.3f, %3.3f, %3.3f\n", quality_factor, subsampling, optimize_huffman_tables, comp_size, results.max_err, results.mean, results.mean_squared, results.root_mean_squared, results.peak_snr); + if (results.max_err > max_err) max_err = results.max_err; + if (results.peak_snr < lowest_psnr) lowest_psnr = results.peak_snr; + + if (quality_factor == 1) + { + if (results.peak_snr < threshold_psnr) + threshold_psnr = results.peak_snr; + if (results.max_err > threshold_max_err) + threshold_max_err = results.max_err; + } + else + { + // Couple empirically determined tests - worked OK on my test data set. + if ((results.peak_snr < (threshold_psnr - 3.0f)) || (results.peak_snr < 6.0f)) + { + status = EXIT_FAILURE; + goto failure; + } + if (optimize_huffman_tables) + { + if ((prev_results.max_err != results.max_err) || (prev_results.peak_snr != results.peak_snr)) + { + status = EXIT_FAILURE; + goto failure; + } + } + } + + prev_results = results; + } + } + } + + log_printf("Max error: %f Lowest PSNR: %f\n", max_err, lowest_psnr); + +failure: + free(pImage_data); + free(pBuf); + free(pUncomp_image_data); + + log_printf((status == EXIT_SUCCESS) ? "Success.\n" : "Exhaustive test failed!\n"); + return status; +} + +// Test JPEG file decompression using jpgd.h +static int test_jpgd(const char *pSrc_filename, const char *pDst_filename) +{ + // Load the source JPEG image. + const int req_comps = 3; // request RGB image + int width = 0, height = 0, actual_comps = 0; + + timer tm; + tm.start(); + + uint8 *pImage_data = jpgd::decompress_jpeg_image_from_file(pSrc_filename, &width, &height, &actual_comps, req_comps); + + tm.stop(); + + if (!pImage_data) + { + log_printf("Failed loading JPEG file \"%s\"!\n", pSrc_filename); + return EXIT_FAILURE; + } + + log_printf("Source JPEG file: \"%s\", image resolution: %ix%i, actual comps: %i\n", pSrc_filename, width, height, actual_comps); + + log_printf("Decompression time: %3.3fms\n", tm.get_elapsed_ms()); + + if (!stbi_write_tga(pDst_filename, width, height, req_comps, pImage_data)) + { + log_printf("Failed writing image to file \"%s\"!\n", pDst_filename); + free(pImage_data); + return EXIT_FAILURE; + } + log_printf("Wrote decompressed image to TGA file \"%s\"\n", pDst_filename); + + log_printf("Success.\n"); + + free(pImage_data); + return EXIT_SUCCESS; +} + +int main(int arg_c, char* ppArgs[]) +{ + printf("jpge/jpgd example app\n"); + + // Parse command line. + bool run_exhausive_test = false; + bool test_memory_compression = false; + bool optimize_huffman_tables = false; + int subsampling = -1; + char output_filename[256] = ""; + bool use_jpgd = true; + bool test_jpgd_decompression = false; + + int arg_index = 1; + while ((arg_index < arg_c) && (ppArgs[arg_index][0] == '-')) + { + switch (tolower(ppArgs[arg_index][1])) + { + case 'd': + test_jpgd_decompression = true; + break; + case 'g': + strcpy_s(s_log_filename, sizeof(s_log_filename), &ppArgs[arg_index][2]); + break; + case 'x': + run_exhausive_test = true; + break; + case 'm': + test_memory_compression = true; + break; + case 'o': + optimize_huffman_tables = true; + break; + case 'l': + if (strcasecmp(&ppArgs[arg_index][1], "luma") == 0) + subsampling = jpge::Y_ONLY; + else + { + log_printf("Unrecognized option: %s\n", ppArgs[arg_index]); + return EXIT_FAILURE; + } + break; + case 'h': + if (strcasecmp(&ppArgs[arg_index][1], "h1v1") == 0) + subsampling = jpge::H1V1; + else if (strcasecmp(&ppArgs[arg_index][1], "h2v1") == 0) + subsampling = jpge::H2V1; + else if (strcasecmp(&ppArgs[arg_index][1], "h2v2") == 0) + subsampling = jpge::H2V2; + else + { + log_printf("Unrecognized subsampling: %s\n", ppArgs[arg_index]); + return EXIT_FAILURE; + } + break; + case 'w': + { + strcpy_s(output_filename, sizeof(output_filename), &ppArgs[arg_index][2]); + break; + } + case 's': + { + use_jpgd = false; + break; + } + default: + log_printf("Unrecognized option: %s\n", ppArgs[arg_index]); + return EXIT_FAILURE; + } + arg_index++; + } + + if (run_exhausive_test) + { + if ((arg_c - arg_index) < 1) + { + log_printf("Not enough parameters (expected source file)\n"); + return print_usage(); + } + + const char* pSrc_filename = ppArgs[arg_index++]; + return exhausive_compression_test(pSrc_filename, use_jpgd); + } + else if (test_jpgd_decompression) + { + if ((arg_c - arg_index) < 2) + { + log_printf("Not enough parameters (expected source and destination files)\n"); + return print_usage(); + } + + const char* pSrc_filename = ppArgs[arg_index++]; + const char* pDst_filename = ppArgs[arg_index++]; + return test_jpgd(pSrc_filename, pDst_filename); + } + + // Test jpge + if ((arg_c - arg_index) < 3) + { + log_printf("Not enough parameters (expected source file, dest file, quality factor to follow options)\n"); + return print_usage(); + } + + const char* pSrc_filename = ppArgs[arg_index++]; + const char* pDst_filename = ppArgs[arg_index++]; + + int quality_factor = atoi(ppArgs[arg_index++]); + if ((quality_factor < 1) || (quality_factor > 100)) + { + log_printf("Quality factor must range from 1-100!\n"); + return EXIT_FAILURE; + } + + // Load the source image. + const int req_comps = 3; // request RGB image + int width = 0, height = 0, actual_comps = 0; + uint8 *pImage_data = stbi_load(pSrc_filename, &width, &height, &actual_comps, req_comps); + if (!pImage_data) + { + log_printf("Failed loading file \"%s\"!\n", pSrc_filename); + return EXIT_FAILURE; + } + + log_printf("Source file: \"%s\", image resolution: %ix%i, actual comps: %i\n", pSrc_filename, width, height, actual_comps); + + // Fill in the compression parameter structure. + jpge::params params; + params.m_quality = quality_factor; + params.m_subsampling = (subsampling < 0) ? ((actual_comps == 1) ? jpge::Y_ONLY : jpge::H2V2) : static_cast(subsampling); + params.m_two_pass_flag = optimize_huffman_tables; + + log_printf("Writing JPEG image to file: %s\n", pDst_filename); + + timer tm; + + // Now create the JPEG file. + if (test_memory_compression) + { + int buf_size = width * height * 3; // allocate a buffer that's hopefully big enough (this is way overkill for jpeg) + if (buf_size < 1024) buf_size = 1024; + void *pBuf = malloc(buf_size); + + tm.start(); + if (!jpge::compress_image_to_jpeg_file_in_memory(pBuf, buf_size, width, height, req_comps, pImage_data, params)) + { + log_printf("Failed creating JPEG data!\n"); + return EXIT_FAILURE; + } + tm.stop(); + + FILE *pFile = fopen(pDst_filename, "wb"); + if (!pFile) + { + log_printf("Failed creating file \"%s\"!\n", pDst_filename); + return EXIT_FAILURE; + } + + if (fwrite(pBuf, buf_size, 1, pFile) != 1) + { + log_printf("Failed writing to output file!\n"); + return EXIT_FAILURE; + } + + if (fclose(pFile) == EOF) + { + log_printf("Failed writing to output file!\n"); + return EXIT_FAILURE; + } + } + else + { + tm.start(); + + if (!jpge::compress_image_to_jpeg_file(pDst_filename, width, height, req_comps, pImage_data, params)) + { + log_printf("Failed writing to output file!\n"); + return EXIT_FAILURE; + } + tm.stop(); + } + + double total_comp_time = tm.get_elapsed_ms(); + + const uint comp_file_size = get_file_size(pDst_filename); + const uint total_pixels = width * height; + log_printf("Compressed file size: %u, bits/pixel: %3.3f\n", comp_file_size, (comp_file_size * 8.0f) / total_pixels); + + // Now try loading the JPEG file using jpgd or stbi_image's JPEG decompressor. + int uncomp_width = 0, uncomp_height = 0, uncomp_actual_comps = 0, uncomp_req_comps = 3; + + tm.start(); + uint8 *pUncomp_image_data; + if (use_jpgd) + pUncomp_image_data = jpgd::decompress_jpeg_image_from_file(pDst_filename, &uncomp_width, &uncomp_height, &uncomp_actual_comps, uncomp_req_comps); + else + pUncomp_image_data = stbi_load(pDst_filename, &uncomp_width, &uncomp_height, &uncomp_actual_comps, uncomp_req_comps); + + double total_uncomp_time = tm.get_elapsed_ms(); + + if (!pUncomp_image_data) + { + log_printf("Failed loading compressed image file \"%s\"!\n", pDst_filename); + return EXIT_FAILURE; + } + + log_printf("Compression time: %3.3fms, Decompression time: %3.3fms\n", total_comp_time, total_uncomp_time); + + // Write uncompressed image. + if (output_filename[0]) + stbi_write_tga(output_filename, uncomp_width, uncomp_height, uncomp_req_comps, pUncomp_image_data); + + if ((uncomp_width != width) || (uncomp_height != height)) + { + log_printf("Loaded JPEG file has a different resolution than the original file!\n"); + return EXIT_FAILURE; + } + + // Diff the original and compressed images. + image_compare_results results; + image_compare(results, width, height, pImage_data, req_comps, pUncomp_image_data, uncomp_req_comps, (params.m_subsampling == jpge::Y_ONLY) || (actual_comps == 1) || (uncomp_actual_comps == 1)); + log_printf("Error Max: %f, Mean: %f, Mean^2: %f, RMSE: %f, PSNR: %f\n", results.max_err, results.mean, results.mean_squared, results.root_mean_squared, results.peak_snr); + + log_printf("Success.\n"); + + return EXIT_SUCCESS; +} diff --git a/libs/jpeg/timer.cpp b/libs/jpeg/timer.cpp new file mode 100644 index 0000000..67fba46 --- /dev/null +++ b/libs/jpeg/timer.cpp @@ -0,0 +1,152 @@ +// File: timer.cpp - Simple high-precision timer class. Supports Win32, X360, and POSIX/Linux +#include +#include +#include +#include + +#include "timer.h" + +#if defined(WIN32) +#include +#elif defined(_XBOX) +#include +#endif + +unsigned long long timer::g_init_ticks; +unsigned long long timer::g_freq; +double timer::g_inv_freq; + +#if defined(WIN32) || defined(_XBOX) +inline void query_counter(timer_ticks *pTicks) +{ + QueryPerformanceCounter(reinterpret_cast(pTicks)); +} +inline void query_counter_frequency(timer_ticks *pTicks) +{ + QueryPerformanceFrequency(reinterpret_cast(pTicks)); +} +#elif defined(__GNUC__) +#include +inline void query_counter(timer_ticks *pTicks) +{ + struct timeval cur_time; + gettimeofday(&cur_time, NULL); + *pTicks = static_cast(cur_time.tv_sec)*1000000ULL + static_cast(cur_time.tv_usec); +} +inline void query_counter_frequency(timer_ticks *pTicks) +{ + *pTicks = 1000000; +} +#endif + +timer::timer() : + m_start_time(0), + m_stop_time(0), + m_started(false), + m_stopped(false) +{ + if (!g_inv_freq) + init(); +} + +timer::timer(timer_ticks start_ticks) +{ + if (!g_inv_freq) + init(); + + m_start_time = start_ticks; + + m_started = true; + m_stopped = false; +} + +void timer::start(timer_ticks start_ticks) +{ + m_start_time = start_ticks; + + m_started = true; + m_stopped = false; +} + +void timer::start() +{ + query_counter(&m_start_time); + + m_started = true; + m_stopped = false; +} + +void timer::stop() +{ + assert(m_started); + + query_counter(&m_stop_time); + + m_stopped = true; +} + +double timer::get_elapsed_secs() const +{ + assert(m_started); + if (!m_started) + return 0; + + timer_ticks stop_time = m_stop_time; + if (!m_stopped) + query_counter(&stop_time); + + timer_ticks delta = stop_time - m_start_time; + return delta * g_inv_freq; +} + +timer_ticks timer::get_elapsed_us() const +{ + assert(m_started); + if (!m_started) + return 0; + + timer_ticks stop_time = m_stop_time; + if (!m_stopped) + query_counter(&stop_time); + + timer_ticks delta = stop_time - m_start_time; + return (delta * 1000000ULL + (g_freq >> 1U)) / g_freq; +} + +void timer::init() +{ + if (!g_inv_freq) + { + query_counter_frequency(&g_freq); + g_inv_freq = 1.0f / g_freq; + + query_counter(&g_init_ticks); + } +} + +timer_ticks timer::get_init_ticks() +{ + if (!g_inv_freq) + init(); + + return g_init_ticks; +} + +timer_ticks timer::get_ticks() +{ + if (!g_inv_freq) + init(); + + timer_ticks ticks; + query_counter(&ticks); + return ticks - g_init_ticks; +} + +double timer::ticks_to_secs(timer_ticks ticks) +{ + if (!g_inv_freq) + init(); + + return ticks * g_inv_freq; +} + diff --git a/libs/jpeg/timer.h b/libs/jpeg/timer.h new file mode 100644 index 0000000..ae0f58b --- /dev/null +++ b/libs/jpeg/timer.h @@ -0,0 +1,40 @@ +// File: timer.h +#pragma once + +typedef unsigned long long timer_ticks; + +class timer +{ +public: + timer(); + timer(timer_ticks start_ticks); + + void start(); + void start(timer_ticks start_ticks); + + void stop(); + + double get_elapsed_secs() const; + inline double get_elapsed_ms() const { return get_elapsed_secs() * 1000.0f; } + timer_ticks get_elapsed_us() const; + + static void init(); + static inline timer_ticks get_ticks_per_sec() { return g_freq; } + static timer_ticks get_init_ticks(); + static timer_ticks get_ticks(); + static double ticks_to_secs(timer_ticks ticks); + static inline double ticks_to_ms(timer_ticks ticks) { return ticks_to_secs(ticks) * 1000.0f; } + static inline double get_secs() { return ticks_to_secs(get_ticks()); } + static inline double get_ms() { return ticks_to_ms(get_ticks()); } + +private: + static timer_ticks g_init_ticks; + static timer_ticks g_freq; + static double g_inv_freq; + + timer_ticks m_start_time; + timer_ticks m_stop_time; + + bool m_started : 1; + bool m_stopped : 1; +};