From 840e906a4ddbbb32b8a5989e8a0ce10c8c374819 Mon Sep 17 00:00:00 2001
From: B Stack <bgstack15@gmail.com>
Date: Tue, 2 Mar 2021 17:23:41 -0500
Subject: add upstream 11.7

---
 xBRZ/src/xbrz.cpp     | 60 ++++++++++++++++++++++++++++++++-------------------
 xBRZ/src/xbrz.h       | 33 ++++++++++++++--------------
 xBRZ/src/xbrz_tools.h | 33 ++++++++++++++++++++++++----
 3 files changed, 83 insertions(+), 43 deletions(-)

(limited to 'xBRZ')
diff --git a/xBRZ/src/xbrz.cpp b/xBRZ/src/xbrz.cpp
index 50660b84..6c015aa1 100644
--- a/xBRZ/src/xbrz.cpp
+++ b/xBRZ/src/xbrz.cpp
@@ -32,7 +32,10 @@ uint32_t gradientRGB(uint32_t pixFront, uint32_t pixBack) //blend front color wi
 {
     static_assert(0 < M && M < N && N <= 1000);
 
-    auto calcColor = [](unsigned char colFront, unsigned char colBack) -> unsigned char { return (colFront * M + colBack * (N - M)) / N; };
+    auto calcColor = [](unsigned char colFront, unsigned char colBack)
+    {
+        return static_cast<unsigned char>(uintDivRound(colFront * M + colBack * (N - M), N));
+    };
 
     return makePixel(calcColor(getRed  (pixFront), getRed  (pixBack)),
                      calcColor(getGreen(pixFront), getGreen(pixBack)),
@@ -53,10 +56,10 @@ uint32_t gradientARGB(uint32_t pixFront, uint32_t pixBack) //find intermediate c
 
     auto calcColor = [=](unsigned char colFront, unsigned char colBack)
     {
-        return static_cast<unsigned char>((colFront * weightFront + colBack * weightBack) / weightSum);
+        return static_cast<unsigned char>(uintDivRound(colFront * weightFront + colBack * weightBack, weightSum));
     };
 
-    return makePixel(static_cast<unsigned char>(weightSum / N),
+    return makePixel(static_cast<unsigned char>(uintDivRound(weightSum, N)),
                      calcColor(getRed  (pixFront), getRed  (pixBack)),
                      calcColor(getGreen(pixFront), getGreen(pixBack)),
                      calcColor(getBlue (pixFront), getBlue (pixBack)));
@@ -154,7 +157,7 @@ double distYCbCr(uint32_t pix1, uint32_t pix2, double lumaWeight)
 {
     //https://en.wikipedia.org/wiki/YCbCr#ITU-R_BT.601_conversion
     //YCbCr conversion is a matrix multiplication => take advantage of linearity by subtracting first!
-    const int r_diff = static_cast<int>(getRed  (pix1)) - getRed  (pix2); //we may delay division by 255 to after matrix multiplication
+    const int r_diff = static_cast<int>(getRed  (pix1)) - getRed  (pix2); //defer division by 255 to after matrix multiplication
     const int g_diff = static_cast<int>(getGreen(pix1)) - getGreen(pix2); //
     const int b_diff = static_cast<int>(getBlue (pix1)) - getBlue (pix2); //substraction for int is noticeable faster than for double!
 
@@ -1094,23 +1097,23 @@ struct ColorDistanceARGB
     {
         const double a1 = getAlpha(pix1) / 255.0 ;
         const double a2 = getAlpha(pix2) / 255.0 ;
-        /*
-        Requirements for a color distance handling alpha channel: with a1, a2 in [0, 1]
+
+        /*  Requirements for a color distance handling alpha channel: with a1, a2 in [0, 1]
 
             1. if a1 = a2, distance should be: a1 * distYCbCr()
             2. if a1 = 0,  distance should be: a2 * distYCbCr(black, white) = a2 * 255
             3. if a1 = 1,  ??? maybe: 255 * (1 - a2) + a2 * distYCbCr()
-        */
 
-        //return std::min(a1, a2) * distYCbCrBuffered(pix1, pix2) + 255 * abs(a1 - a2);
+            std::min(a1, a2) * distYCbCrBuffered(pix1, pix2) + 255 * abs(a1 - a2);
+
+            alternative? std::sqrt(a1 * a2 * square(distYCbCrBuffered(pix1, pix2)) + square(255 * (a1 - a2)));   */
+
         //=> following code is 15% faster:
         const double d = distYCbCrBuffered(pix1, pix2);
         if (a1 < a2)
             return a1 * d + 255 * (a2 - a1);
         else
             return a2 * d + 255 * (a1 - a2);
-
-        //alternative? return std::sqrt(a1 * a2 * square(distYCbCrBuffered(pix1, pix2)) + square(255 * (a1 - a2)));
     }
 };
 
@@ -1163,7 +1166,7 @@ void xbrz::scale(size_t factor, const uint32_t* src, uint32_t* trg, int srcWidth
     switch (colFmt)
     {
         //*INDENT-OFF*
-        case ColorFormat::RGB:
+        case ColorFormat::rgb:
             switch (factor)
             {
                 case 2: return scaleImage<Scaler2x<ColorGradientRGB>, ColorDistanceRGB, OobReaderDuplicate>(src, trg, srcWidth, srcHeight, cfg, yFirst, yLast);
@@ -1174,7 +1177,7 @@ void xbrz::scale(size_t factor, const uint32_t* src, uint32_t* trg, int srcWidth
             }
             break;
 
-        case ColorFormat::ARGB:
+        case ColorFormat::argb:
             switch (factor)
             {
                 case 2: return scaleImage<Scaler2x<ColorGradientARGB>, ColorDistanceARGB, OobReaderTransparent>(src, trg, srcWidth, srcHeight, cfg, yFirst, yLast);
@@ -1185,7 +1188,7 @@ void xbrz::scale(size_t factor, const uint32_t* src, uint32_t* trg, int srcWidth
             }
             break;
 
-        case ColorFormat::ARGB_UNBUFFERED:
+        case ColorFormat::argbUnbuffered:
             switch (factor)
             {
                 case 2: return scaleImage<Scaler2x<ColorGradientARGB>, ColorDistanceUnbufferedARGB, OobReaderTransparent>(src, trg, srcWidth, srcHeight, cfg, yFirst, yLast);
@@ -1205,11 +1208,11 @@ bool xbrz::equalColorTest(uint32_t col1, uint32_t col2, ColorFormat colFmt, doub
 {
     switch (colFmt)
     {
-        case ColorFormat::RGB:
+        case ColorFormat::rgb:
             return ColorDistanceRGB::dist(col1, col2, luminanceWeight) < equalColorTolerance;
-        case ColorFormat::ARGB:
+        case ColorFormat::argb:
             return ColorDistanceARGB::dist(col1, col2, luminanceWeight) < equalColorTolerance;
-        case ColorFormat::ARGB_UNBUFFERED:
+        case ColorFormat::argbUnbuffered:
             return ColorDistanceUnbufferedARGB::dist(col1, col2, luminanceWeight) < equalColorTolerance;
     }
     assert(false);
@@ -1223,13 +1226,26 @@ void xbrz::bilinearScale(const uint32_t* src, int srcWidth, int srcHeight,
     const auto imgReader = [src, srcWidth](int x, int y, BytePixel& pix)
     {
         static_assert(sizeof(pix) == sizeof(uint32_t));
-        std::memcpy(pix, src + y * srcWidth + x, sizeof(pix));
+        const uint32_t pixSrc = src[y * srcWidth + x];
+
+        const unsigned char a = getAlpha(pixSrc);
+        pix[0] = a;
+        pix[1] = xbrz::premultiply(getRed  (pixSrc), a); //r
+        pix[2] = xbrz::premultiply(getGreen(pixSrc), a); //g
+        pix[3] = xbrz::premultiply(getBlue (pixSrc), a); //b
     };
 
-    const auto imgWriter = [trg](const xbrz::BytePixel& pix) mutable { std::memcpy(trg++, pix, sizeof(pix)); };
+    const auto imgWriter = [trg](const xbrz::BytePixel& pix) mutable
+    {
+        const unsigned char a = pix[0];
+        * trg++ = makePixel(a,
+                            xbrz::demultiply(pix[1], a),  //r
+                            xbrz::demultiply(pix[2], a),  //g
+                            xbrz::demultiply(pix[3], a)); //b
+    };
 
-    bilinearScale(imgReader, srcWidth, srcHeight,
-                  imgWriter, trgWidth, trgHeight, 0, trgHeight);
+    bilinearScaleSimple(imgReader, srcWidth, srcHeight,
+                        imgWriter, trgWidth, trgHeight, 0, trgHeight);
 }
 
 
@@ -1262,8 +1278,8 @@ void bilinearScaleCpu(const uint32_t* src, int srcWidth, int srcHeight,
         tg.run([=]
     {
         const int iLast = std::min(i + TASK_GRANULARITY, trgHeight);
-        xbrz::bilinearScale(src, srcWidth, srcHeight, srcWidth * sizeof(uint32_t),
-                            trg, trgWidth, trgHeight, trgWidth * sizeof(uint32_t),
+        xbrz::bilinearScaleSimple(src, srcWidth, srcHeight, srcWidth * sizeof(uint32_t),
+                                  trg, trgWidth, trgHeight, trgWidth * sizeof(uint32_t),
         i, iLast, [](uint32_t pix) { return pix; });
     });
     tg.wait();
diff --git a/xBRZ/src/xbrz.h b/xBRZ/src/xbrz.h
index c0778cf1..b3a496ba 100644
--- a/xBRZ/src/xbrz.h
+++ b/xBRZ/src/xbrz.h
@@ -26,26 +26,24 @@
 
 namespace xbrz
 {
-/*
--------------------------------------------------------------------------
-| xBRZ: "Scale by rules" - high quality image upscaling filter by Zenju |
--------------------------------------------------------------------------
-using a modified approach of xBR:
-http://board.byuu.org/viewtopic.php?f=10&t=2248
-- new rule set preserving small image features
-- highly optimized for performance
-- support alpha channel
-- support multithreading
-- support 64-bit architectures
-- support processing image slices
-- support scaling up to 6xBRZ
-*/
+/*  -------------------------------------------------------------------------
+    | xBRZ: "Scale by rules" - high quality image upscaling filter by Zenju |
+    -------------------------------------------------------------------------
+    using a modified approach of xBR:
+    http://board.byuu.org/viewtopic.php?f=10&t=2248
+    - new rule set preserving small image features
+    - highly optimized for performance
+    - support alpha channel
+    - support multithreading
+    - support 64-bit architectures
+    - support processing image slices
+    - support scaling up to 6xBRZ                                             */
 
 enum class ColorFormat //from high bits -> low bits, 8 bit per channel
 {
-    RGB,  //8 bit for each red, green, blue, upper 8 bits unused
-    ARGB, //including alpha channel, BGRA byte order on little-endian machines
-    ARGB_UNBUFFERED, //like ARGB, but without the one-time buffer creation overhead (ca. 100 - 300 ms) at the expense of a slightly slower scaling time
+    rgb,  //8 bit for each red, green, blue, upper 8 bits unused
+    argb, //including alpha channel, BGRA byte order on little-endian machines
+    argbUnbuffered, //like ARGB, but without the one-time buffer creation overhead (ca. 100 - 300 ms) at the expense of a slightly slower scaling time
 };
 
 const int SCALE_FACTOR_MAX = 6;
@@ -66,6 +64,7 @@ void scale(size_t factor, //valid range: 2 - SCALE_FACTOR_MAX
            const ScalerCfg& cfg = ScalerCfg(),
            int yFirst = 0, int yLast = std::numeric_limits<int>::max()); //slice of source image
 
+//BGRA byte order
 void bilinearScale(const uint32_t* src, int srcWidth, int srcHeight,
                    /**/  uint32_t* trg, int trgWidth, int trgHeight);
 
diff --git a/xBRZ/src/xbrz_tools.h b/xBRZ/src/xbrz_tools.h
index 98164678..d6e48c0c 100644
--- a/xBRZ/src/xbrz_tools.h
+++ b/xBRZ/src/xbrz_tools.h
@@ -87,10 +87,35 @@ void nearestNeighborScale(PixReader srcReader /* (int x, int y, BytePixel& pix)
 }
 
 
+inline
+unsigned int uintDivRound(unsigned int num, unsigned int den)
+{
+    assert(den != 0);
+    return (num + den / 2) / den;
+}
+
+
+inline
+unsigned char premultiply(unsigned char c, unsigned char alpha)
+{
+    return static_cast<unsigned char>(uintDivRound(static_cast<unsigned int>(c) * alpha, 255));
+    //premultiply/demultiply using int div round is more accurate than int div floor/ceil pair
+}
+
+
+inline
+unsigned char demultiply(unsigned char c, unsigned char alpha)
+{
+    return static_cast<unsigned char>(alpha == 0 ? 0 :
+                                      std::clamp(uintDivRound(static_cast<unsigned int>(c) * 255, alpha), 0U, 255U));
+}
+
+
+//caveat: treats alpha channel like regular color! => caller needs to pre/de-multiply alpha!
 template <class PixReader, class PixWriter>
-void bilinearScale(PixReader srcReader /* (int x, int y, BytePixel& pix) */, int srcWidth, int srcHeight,
-                   PixWriter trgWriter /* (const BytePixel& pix)         */, int trgWidth, int trgHeight,
-                   int yFirst, int yLast)
+void bilinearScaleSimple(PixReader srcReader /* (int x, int y, BytePixel& pix) */, int srcWidth, int srcHeight,
+                         PixWriter trgWriter /* (const BytePixel& pix)         */, int trgWidth, int trgHeight,
+                         int yFirst, int yLast)
 {
     yFirst = std::max(yFirst, 0);
     yLast  = std::min(yLast, trgHeight);
@@ -121,7 +146,7 @@ void bilinearScale(PixReader srcReader /* (int x, int y, BytePixel& pix) */, int
         const double xx1 = x / scaleX - x1;
         const double x2x = 1 - xx1;
 
-        buf[x] = { x1, x2, xx1, x2x };
+        buf[x] = {x1, x2, xx1, x2x};
     }
 
     for (int y = yFirst; y < yLast; ++y)
-- 
cgit