From 9ad341902da34e462b73e8bca85e39179f210e6c Mon Sep 17 00:00:00 2001 From: omar Date: Tue, 8 Jan 2019 17:37:22 +0100 Subject: [PATCH] ImDrawList: Optimized some of the functions for performance of debug builds where non-inline function call cost are non-negligible. --- docs/CHANGELOG.txt | 2 + imgui.h | 8 ++-- imgui_draw.cpp | 116 +++++++++++++++++++++++++-------------------- 3 files changed, 70 insertions(+), 56 deletions(-) diff --git a/docs/CHANGELOG.txt b/docs/CHANGELOG.txt index 053c8ea2..66ed9545 100644 --- a/docs/CHANGELOG.txt +++ b/docs/CHANGELOG.txt @@ -72,6 +72,8 @@ Other Changes: Missing calls to End(), past the assert, should not lead to crashes or to the fallback Debug window appearing on screen. Those changes makes it easier to integrate dear imgui with a scripting language allowing, given asserts are redirected into e.g. an error log and stopping the script execution. +- ImDrawList: Optimized some of the functions for performance of debug builds where non-inline function call cost are non-negligible. + (Our test UI scene on VS2015 Debug Win64 with /RTC1 went ~5.9 ms -> ~4.9 ms. In Release same scene stays at ~0.3 ms.) - IO: Added BackendPlatformUserData, BackendRendererUserData, BackendLanguageUserData void* for storage use by back-ends. - IO: Renamed InputCharacters[], marked internal as was always intended. Please don't access directly, and use AddInputCharacter() instead! - IO: AddInputCharacter() goes into a queue which can receive as many characters as needed during the frame. This is useful diff --git a/imgui.h b/imgui.h index 82c0a62e..4298b3b8 100644 --- a/imgui.h +++ b/imgui.h @@ -1847,11 +1847,11 @@ struct ImDrawList IMGUI_API void AddBezierCurve(const ImVec2& pos0, const ImVec2& cp0, const ImVec2& cp1, const ImVec2& pos1, ImU32 col, float thickness, int num_segments = 0); // Stateful path API, add points then finish with PathFillConvex() or PathStroke() - inline void PathClear() { _Path.resize(0); } + inline void PathClear() { _Path.Size = 0; } inline void PathLineTo(const ImVec2& pos) { _Path.push_back(pos); } - inline void PathLineToMergeDuplicate(const ImVec2& pos) { if (_Path.Size == 0 || memcmp(&_Path[_Path.Size-1], &pos, 8) != 0) _Path.push_back(pos); } - inline void PathFillConvex(ImU32 col) { AddConvexPolyFilled(_Path.Data, _Path.Size, col); PathClear(); } // Note: Anti-aliased filling requires points to be in clockwise order. - inline void PathStroke(ImU32 col, bool closed, float thickness = 1.0f) { AddPolyline(_Path.Data, _Path.Size, col, closed, thickness); PathClear(); } + inline void PathLineToMergeDuplicate(const ImVec2& pos) { if (_Path.Size == 0 || memcmp(&_Path.Data[_Path.Size-1], &pos, 8) != 0) _Path.push_back(pos); } + inline void PathFillConvex(ImU32 col) { AddConvexPolyFilled(_Path.Data, _Path.Size, col); _Path.Size = 0; } // Note: Anti-aliased filling requires points to be in clockwise order. + inline void PathStroke(ImU32 col, bool closed, float thickness = 1.0f) { AddPolyline(_Path.Data, _Path.Size, col, closed, thickness); _Path.Size = 0; } IMGUI_API void PathArcTo(const ImVec2& centre, float radius, float a_min, float a_max, int num_segments = 10); IMGUI_API void PathArcToFast(const ImVec2& centre, float radius, int a_min_of_12, int a_max_of_12); // Use precomputed angles for a 12 steps circle IMGUI_API void PathBezierCurveTo(const ImVec2& p1, const ImVec2& p2, const ImVec2& p3, int num_segments = 0); diff --git a/imgui_draw.cpp b/imgui_draw.cpp index 6853ce0a..9833590f 100644 --- a/imgui_draw.cpp +++ b/imgui_draw.cpp @@ -656,7 +656,13 @@ void ImDrawList::PrimQuadUV(const ImVec2& a, const ImVec2& b, const ImVec2& c, c _IdxWritePtr += 6; } +// On AddPolyline() and AddConvexPolyFilled() we intentionally avoid using ImVec2 and superflous function calls to optimize debug/non-inlined builds. +// Those macros expects l-values. +#define IM_NORMALIZE2F_OVER_ZERO(VX,VY) { float d2 = VX*VX + VY*VY; if (d2 > 0.0f) { float inv_len = 1.0f / ImSqrt(d2); VX *= inv_len; VY *= inv_len; } } +#define IM_NORMALIZE2F_OVER_EPSILON_CLAMP(VX,VY,EPS,INVLENMAX) { float d2 = VX*VX + VY*VY; if (d2 > EPS) { float inv_len = 1.0f / ImSqrt(d2); if (inv_len > INVLENMAX) inv_len = INVLENMAX; VX *= inv_len; VY *= inv_len; } } + // TODO: Thickness anti-aliased lines cap are missing their AA fringe. +// We avoid using the ImVec2 math operators here to reduce cost to a minimum for debug/non-inlined builds. void ImDrawList::AddPolyline(const ImVec2* points, const int points_count, ImU32 col, bool closed, float thickness) { if (points_count < 2) @@ -686,10 +692,11 @@ void ImDrawList::AddPolyline(const ImVec2* points, const int points_count, ImU32 for (int i1 = 0; i1 < count; i1++) { const int i2 = (i1+1) == points_count ? 0 : i1+1; - ImVec2 diff = points[i2] - points[i1]; - diff *= ImInvLength(diff, 1.0f); - temp_normals[i1].x = diff.y; - temp_normals[i1].y = -diff.x; + float dx = points[i2].x - points[i1].x; + float dy = points[i2].y - points[i1].y; + IM_NORMALIZE2F_OVER_ZERO(dx, dy); + temp_normals[i1].x = dy; + temp_normals[i1].y = -dx; } if (!closed) temp_normals[points_count-1] = temp_normals[points_count-2]; @@ -712,17 +719,18 @@ void ImDrawList::AddPolyline(const ImVec2* points, const int points_count, ImU32 unsigned int idx2 = (i1+1) == points_count ? _VtxCurrentIdx : idx1+3; // Average normals - ImVec2 dm = (temp_normals[i1] + temp_normals[i2]) * 0.5f; - float dmr2 = dm.x*dm.x + dm.y*dm.y; - if (dmr2 > 0.000001f) - { - float scale = 1.0f / dmr2; - if (scale > 100.0f) scale = 100.0f; - dm *= scale; - } - dm *= AA_SIZE; - temp_points[i2*2+0] = points[i2] + dm; - temp_points[i2*2+1] = points[i2] - dm; + float dm_x = (temp_normals[i1].x + temp_normals[i2].x) * 0.5f; + float dm_y = (temp_normals[i1].y + temp_normals[i2].y) * 0.5f; + IM_NORMALIZE2F_OVER_EPSILON_CLAMP(dm_x, dm_y, 0.000001f, 100.0f) + dm_x *= AA_SIZE; + dm_y *= AA_SIZE; + + // Add temporary vertexes + ImVec2* out_vtx = &temp_points[i2*2]; + out_vtx[0].x = points[i2].x + dm_x; + out_vtx[0].y = points[i2].y + dm_y; + out_vtx[1].x = points[i2].x - dm_x; + out_vtx[1].y = points[i2].y - dm_y; // Add indexes _IdxWritePtr[0] = (ImDrawIdx)(idx2+0); _IdxWritePtr[1] = (ImDrawIdx)(idx1+0); _IdxWritePtr[2] = (ImDrawIdx)(idx1+2); @@ -766,20 +774,24 @@ void ImDrawList::AddPolyline(const ImVec2* points, const int points_count, ImU32 unsigned int idx2 = (i1+1) == points_count ? _VtxCurrentIdx : idx1+4; // Average normals - ImVec2 dm = (temp_normals[i1] + temp_normals[i2]) * 0.5f; - float dmr2 = dm.x*dm.x + dm.y*dm.y; - if (dmr2 > 0.000001f) - { - float scale = 1.0f / dmr2; - if (scale > 100.0f) scale = 100.0f; - dm *= scale; - } - ImVec2 dm_out = dm * (half_inner_thickness + AA_SIZE); - ImVec2 dm_in = dm * half_inner_thickness; - temp_points[i2*4+0] = points[i2] + dm_out; - temp_points[i2*4+1] = points[i2] + dm_in; - temp_points[i2*4+2] = points[i2] - dm_in; - temp_points[i2*4+3] = points[i2] - dm_out; + float dm_x = (temp_normals[i1].x + temp_normals[i2].x) * 0.5f; + float dm_y = (temp_normals[i1].y + temp_normals[i2].y) * 0.5f; + IM_NORMALIZE2F_OVER_EPSILON_CLAMP(dm_x, dm_y, 0.000001f, 100.0f); + float dm_out_x = dm_x * (half_inner_thickness + AA_SIZE); + float dm_out_y = dm_y * (half_inner_thickness + AA_SIZE); + float dm_in_x = dm_x * half_inner_thickness; + float dm_in_y = dm_y * half_inner_thickness; + + // Add temporary vertexes + ImVec2* out_vtx = &temp_points[i2*4]; + out_vtx[0].x = points[i2].x + dm_out_x; + out_vtx[0].y = points[i2].y + dm_out_y; + out_vtx[1].x = points[i2].x + dm_in_x; + out_vtx[1].y = points[i2].y + dm_in_y; + out_vtx[2].x = points[i2].x - dm_in_x; + out_vtx[2].y = points[i2].y - dm_in_y; + out_vtx[3].x = points[i2].x - dm_out_x; + out_vtx[3].y = points[i2].y - dm_out_y; // Add indexes _IdxWritePtr[0] = (ImDrawIdx)(idx2+1); _IdxWritePtr[1] = (ImDrawIdx)(idx1+1); _IdxWritePtr[2] = (ImDrawIdx)(idx1+2); @@ -817,11 +829,13 @@ void ImDrawList::AddPolyline(const ImVec2* points, const int points_count, ImU32 const int i2 = (i1+1) == points_count ? 0 : i1+1; const ImVec2& p1 = points[i1]; const ImVec2& p2 = points[i2]; - ImVec2 diff = p2 - p1; - diff *= ImInvLength(diff, 1.0f); - const float dx = diff.x * (thickness * 0.5f); - const float dy = diff.y * (thickness * 0.5f); + float dx = p2.x - p1.x; + float dy = p2.y - p1.y; + IM_NORMALIZE2F_OVER_ZERO(dx, dy); + dx *= (thickness * 0.5f); + dy *= (thickness * 0.5f); + _VtxWritePtr[0].pos.x = p1.x + dy; _VtxWritePtr[0].pos.y = p1.y - dx; _VtxWritePtr[0].uv = uv; _VtxWritePtr[0].col = col; _VtxWritePtr[1].pos.x = p2.x + dy; _VtxWritePtr[1].pos.y = p2.y - dx; _VtxWritePtr[1].uv = uv; _VtxWritePtr[1].col = col; _VtxWritePtr[2].pos.x = p2.x - dy; _VtxWritePtr[2].pos.y = p2.y + dx; _VtxWritePtr[2].uv = uv; _VtxWritePtr[2].col = col; @@ -836,6 +850,7 @@ void ImDrawList::AddPolyline(const ImVec2* points, const int points_count, ImU32 } } +// We intentionally avoid using ImVec2 and its math operators here to reduce cost to a minimum for debug/non-inlined builds. void ImDrawList::AddConvexPolyFilled(const ImVec2* points, const int points_count, ImU32 col) { if (points_count < 3) @@ -867,10 +882,11 @@ void ImDrawList::AddConvexPolyFilled(const ImVec2* points, const int points_coun { const ImVec2& p0 = points[i0]; const ImVec2& p1 = points[i1]; - ImVec2 diff = p1 - p0; - diff *= ImInvLength(diff, 1.0f); - temp_normals[i0].x = diff.y; - temp_normals[i0].y = -diff.x; + float dx = p1.x - p0.x; + float dy = p1.y - p0.y; + IM_NORMALIZE2F_OVER_ZERO(dx, dy); + temp_normals[i0].x = dy; + temp_normals[i0].y = -dx; } for (int i0 = points_count-1, i1 = 0; i1 < points_count; i0 = i1++) @@ -878,19 +894,15 @@ void ImDrawList::AddConvexPolyFilled(const ImVec2* points, const int points_coun // Average normals const ImVec2& n0 = temp_normals[i0]; const ImVec2& n1 = temp_normals[i1]; - ImVec2 dm = (n0 + n1) * 0.5f; - float dmr2 = dm.x*dm.x + dm.y*dm.y; - if (dmr2 > 0.000001f) - { - float scale = 1.0f / dmr2; - if (scale > 100.0f) scale = 100.0f; - dm *= scale; - } - dm *= AA_SIZE * 0.5f; + float dm_x = (n0.x + n1.x) * 0.5f; + float dm_y = (n0.y + n1.y) * 0.5f; + IM_NORMALIZE2F_OVER_EPSILON_CLAMP(dm_x, dm_y, 0.000001f, 100.0f); + dm_x *= AA_SIZE * 0.5f; + dm_y *= AA_SIZE * 0.5f; // Add vertices - _VtxWritePtr[0].pos = (points[i1] - dm); _VtxWritePtr[0].uv = uv; _VtxWritePtr[0].col = col; // Inner - _VtxWritePtr[1].pos = (points[i1] + dm); _VtxWritePtr[1].uv = uv; _VtxWritePtr[1].col = col_trans; // Outer + _VtxWritePtr[0].pos.x = (points[i1].x - dm_x); _VtxWritePtr[0].pos.y = (points[i1].y - dm_y); _VtxWritePtr[0].uv = uv; _VtxWritePtr[0].col = col; // Inner + _VtxWritePtr[1].pos.x = (points[i1].x + dm_x); _VtxWritePtr[1].pos.y = (points[i1].y + dm_y); _VtxWritePtr[1].uv = uv; _VtxWritePtr[1].col = col_trans; // Outer _VtxWritePtr += 2; // Add indexes for fringes @@ -2422,7 +2434,7 @@ const ImFontGlyph* ImFont::FindGlyph(ImWchar c) const { if (c >= IndexLookup.Size) return FallbackGlyph; - const ImWchar i = IndexLookup[c]; + const ImWchar i = IndexLookup.Data[c]; if (i == (ImWchar)-1) return FallbackGlyph; return &Glyphs.Data[i]; @@ -2432,7 +2444,7 @@ const ImFontGlyph* ImFont::FindGlyphNoFallback(ImWchar c) const { if (c >= IndexLookup.Size) return NULL; - const ImWchar i = IndexLookup[c]; + const ImWchar i = IndexLookup.Data[c]; if (i == (ImWchar)-1) return NULL; return &Glyphs.Data[i]; @@ -2492,7 +2504,7 @@ const char* ImFont::CalcWordWrapPositionA(float scale, const char* text, const c } } - const float char_width = ((int)c < IndexAdvanceX.Size ? IndexAdvanceX[(int)c] : FallbackAdvanceX); + const float char_width = ((int)c < IndexAdvanceX.Size ? IndexAdvanceX.Data[c] : FallbackAdvanceX); if (ImCharIsBlankW(c)) { if (inside_word) @@ -2609,7 +2621,7 @@ ImVec2 ImFont::CalcTextSizeA(float size, float max_width, float wrap_width, cons continue; } - const float char_width = ((int)c < IndexAdvanceX.Size ? IndexAdvanceX[(int)c] : FallbackAdvanceX) * scale; + const float char_width = ((int)c < IndexAdvanceX.Size ? IndexAdvanceX.Data[c] : FallbackAdvanceX) * scale; if (line_width + char_width >= max_width) { s = prev_s;