summaryrefslogtreecommitdiff
path: root/xBRZ
diff options
context:
space:
mode:
Diffstat (limited to 'xBRZ')
-rw-r--r--xBRZ/src/xbrz.cpp387
-rw-r--r--xBRZ/src/xbrz.h1
-rw-r--r--xBRZ/src/xbrz_tools.h10
3 files changed, 237 insertions, 161 deletions
diff --git a/xBRZ/src/xbrz.cpp b/xBRZ/src/xbrz.cpp
index 0bf9db17..5228073f 100644
--- a/xBRZ/src/xbrz.cpp
+++ b/xBRZ/src/xbrz.cpp
@@ -246,24 +246,32 @@ struct BlendResult
};
+struct Kernel_3x3
+{
+ uint32_t
+ a, b, c,
+ d, e, f,
+ g, h, i;
+};
+
struct Kernel_4x4 //kernel for preprocessing step
{
uint32_t
- /**/a, b, c, d,
- /**/e, f, g, h,
- /**/i, j, k, l,
- /**/m, n, o, p;
+ a, b, c, //
+ e, f, g, // support reinterpret_cast from Kernel_4x4 => Kernel_3x3
+ i, j, k, //
+ m, n, o,
+ d, h, l, p;
};
-/*
-input kernel area naming convention:
+/* input kernel area naming convention:
-----------------
| A | B | C | D |
-----|---|---|---|
-| E | F | G | H | //evaluate the four corners between F, G, J, K
-----|---|---|---| //input pixel is at position F
+|---|---|---|---|
+| E | F | G | H | evaluate the four corners between F, G, J, K
+|---|---|---|---| input pixel is at position F
| I | J | K | L |
-----|---|---|---|
+|---|---|---|---|
| M | N | O | P |
-----------------
*/
@@ -306,14 +314,6 @@ BlendResult preProcessCorners(const Kernel_4x4& ker, const xbrz::ScalerCfg& cfg)
return result;
}
-struct Kernel_3x3
-{
- uint32_t
- /**/a, b, c,
- /**/d, e, f,
- /**/g, h, i;
-};
-
#define DEF_GETTER(x) template <RotationDegree rotDeg> uint32_t inline get_##x(const Kernel_3x3& ker) { return ker.x; }
//we cannot and NEED NOT write "ker.##x" since ## concatenates preprocessor tokens but "." is not a token
DEF_GETTER(a) DEF_GETTER(b) DEF_GETTER(c)
@@ -346,12 +346,16 @@ inline BlendType getTopR (unsigned char b) { return static_cast<BlendType>(0x3
inline BlendType getBottomR(unsigned char b) { return static_cast<BlendType>(0x3 & (b >> 4)); }
inline BlendType getBottomL(unsigned char b) { return static_cast<BlendType>(0x3 & (b >> 6)); }
-inline void setTopL (unsigned char& b, BlendType bt) { b |= bt; } //buffer is assumed to be initialized before preprocessing!
-inline void setTopR (unsigned char& b, BlendType bt) { b |= (bt << 2); }
-inline void setBottomR(unsigned char& b, BlendType bt) { b |= (bt << 4); }
-inline void setBottomL(unsigned char& b, BlendType bt) { b |= (bt << 6); }
+inline void clearAddTopL(unsigned char& b, BlendType bt) { b = static_cast<unsigned char>(bt); }
+inline void addTopR (unsigned char& b, BlendType bt) { b |= (bt << 2); } //buffer is assumed to be initialized before preprocessing!
+inline void addBottomR (unsigned char& b, BlendType bt) { b |= (bt << 4); } //e.g. via clearAddTopL()
+inline void addBottomL (unsigned char& b, BlendType bt) { b |= (bt << 6); } //
-inline bool blendingNeeded(unsigned char b) { return b != 0; }
+inline bool blendingNeeded(unsigned char b)
+{
+ static_assert(BLEND_NONE == 0);
+ return b != 0;
+}
template <RotationDegree rotDeg> inline
unsigned char rotateBlendInfo(unsigned char b) { return b; }
@@ -360,13 +364,12 @@ template <> inline unsigned char rotateBlendInfo<ROT_180>(unsigned char b) { ret
template <> inline unsigned char rotateBlendInfo<ROT_270>(unsigned char b) { return ((b << 6) | (b >> 2)) & 0xff; }
-/*
-input kernel area naming convention:
+/* input kernel area naming convention:
-------------
| A | B | C |
-----|---|---|
-| D | E | F | //input pixel is at position E
-----|---|---|
+|---|---|---|
+| D | E | F | input pixel is at position E
+|---|---|---|
| G | H | I |
-------------
*/
@@ -456,7 +459,72 @@ void blendPixel(const Kernel_3x3& ker,
}
-template <class Scaler, class ColorDistance> //scaler policy: see "Scaler2x" reference implementation
+class OobReaderTransparent
+{
+public:
+ OobReaderTransparent(const uint32_t* src, int srcWidth, int srcHeight, int y) :
+ s_m1(0 <= y - 1 && y - 1 < srcHeight ? src + srcWidth * (y - 1) : nullptr),
+ s_0 (0 <= y && y < srcHeight ? src + srcWidth * y : nullptr),
+ s_p1(0 <= y + 1 && y + 1 < srcHeight ? src + srcWidth * (y + 1) : nullptr),
+ s_p2(0 <= y + 2 && y + 2 < srcHeight ? src + srcWidth * (y + 2) : nullptr),
+ srcWidth_(srcWidth) {}
+
+ void readDhlp(Kernel_4x4& ker, int x) const //(x, y) is at kernel position F
+ {
+ [[likely]] if (const int x_p2 = x + 2; 0 <= x_p2 && x_p2 < srcWidth_)
+ {
+ ker.d = s_m1 ? s_m1[x_p2] : 0;
+ ker.h = s_0 ? s_0 [x_p2] : 0;
+ ker.l = s_p1 ? s_p1[x_p2] : 0;
+ ker.p = s_p2 ? s_p2[x_p2] : 0;
+ }
+ else
+ {
+ ker.d = 0;
+ ker.h = 0;
+ ker.l = 0;
+ ker.p = 0;
+ }
+ }
+
+private:
+ const uint32_t* const s_m1;
+ const uint32_t* const s_0;
+ const uint32_t* const s_p1;
+ const uint32_t* const s_p2;
+ const int srcWidth_;
+};
+
+
+class OobReaderDuplicate
+{
+public:
+ OobReaderDuplicate(const uint32_t* src, int srcWidth, int srcHeight, int y) :
+ s_m1(src + srcWidth * std::clamp(y - 1, 0, srcHeight - 1)),
+ s_0 (src + srcWidth * std::clamp(y, 0, srcHeight - 1)),
+ s_p1(src + srcWidth * std::clamp(y + 1, 0, srcHeight - 1)),
+ s_p2(src + srcWidth * std::clamp(y + 2, 0, srcHeight - 1)),
+ srcWidth_(srcWidth) {}
+
+ void readDhlp(Kernel_4x4& ker, int x) const //(x, y) is at kernel position F
+ {
+ const int x_p2 = std::clamp(x + 2, 0, srcWidth_ - 1);
+ ker.d = s_m1[x_p2];
+ ker.h = s_0 [x_p2];
+ ker.l = s_p1[x_p2];
+ ker.p = s_p2[x_p2];
+ }
+
+private:
+ const uint32_t* const s_m1;
+ const uint32_t* const s_0;
+ const uint32_t* const s_p1;
+ const uint32_t* const s_p2;
+ const int srcWidth_;
+};
+
+
+template <class Scaler, class ColorDistance, class OobReader> //scaler policy: see "Scaler2x" reference implementation
void scaleImage(const uint32_t* src, uint32_t* trg, int srcWidth, int srcHeight, const xbrz::ScalerCfg& cfg, int yFirst, int yLast)
{
yFirst = std::max(yFirst, 0);
@@ -466,64 +534,72 @@ void scaleImage(const uint32_t* src, uint32_t* trg, int srcWidth, int srcHeight,
const int trgWidth = srcWidth * Scaler::scale;
- //"use" space at the end of the image as temporary buffer for "on the fly preprocessing": we even could use larger area of
- //"sizeof(uint32_t) * srcWidth * (yLast - yFirst)" bytes without risk of accidental overwriting before accessing
- const int bufferSize = srcWidth;
- unsigned char* preProcBuffer = reinterpret_cast<unsigned char*>(trg + yLast * Scaler::scale * trgWidth) - bufferSize;
- std::fill(preProcBuffer, preProcBuffer + bufferSize, '\0');
- static_assert(BLEND_NONE == 0);
+ //(ab)use space of "sizeof(uint32_t) * srcWidth * Scaler::scale" at the end of the image as temporary
+ //buffer for "on the fly preprocessing" without risk of accidental overwriting before accessing
+ unsigned char* const preProcBuf = reinterpret_cast<unsigned char*>(trg + yLast * Scaler::scale * trgWidth) - srcWidth;
//initialize preprocessing buffer for first row of current stripe: detect upper left and right corner blending
//this cannot be optimized for adjacent processing stripes; we must not allow for a memory race condition!
- if (yFirst > 0)
{
- const int y = yFirst - 1;
+ const OobReader oobReader(src, srcWidth, srcHeight, yFirst - 1);
+
+ //initialize at position x = -1
+ Kernel_4x4 ker4 = {};
+ oobReader.readDhlp(ker4, -4); //hack: read a, e, i, m at x = -1
+ ker4.a = ker4.d;
+ ker4.e = ker4.h;
+ ker4.i = ker4.l;
+ ker4.m = ker4.p;
+
+ oobReader.readDhlp(ker4, -3);
+ ker4.b = ker4.d;
+ ker4.f = ker4.h;
+ ker4.j = ker4.l;
+ ker4.n = ker4.p;
+
+ oobReader.readDhlp(ker4, -2);
+ ker4.c = ker4.d;
+ ker4.g = ker4.h;
+ ker4.k = ker4.l;
+ ker4.o = ker4.p;
+
+ oobReader.readDhlp(ker4, -1);
- const uint32_t* s_m1 = src + srcWidth * std::max(y - 1, 0);
- const uint32_t* s_0 = src + srcWidth * y; //center line
- const uint32_t* s_p1 = src + srcWidth * std::min(y + 1, srcHeight - 1);
- const uint32_t* s_p2 = src + srcWidth * std::min(y + 2, srcHeight - 1);
+ {
+ const BlendResult res = preProcessCorners<ColorDistance>(ker4, cfg);
+ clearAddTopL(preProcBuf[0], res.blend_k); //set 1st known corner for (0, yFirst)
+ }
for (int x = 0; x < srcWidth; ++x)
{
- const int x_m1 = std::max(x - 1, 0);
- const int x_p1 = std::min(x + 1, srcWidth - 1);
- const int x_p2 = std::min(x + 2, srcWidth - 1);
-
- Kernel_4x4 ker = {}; //perf: initialization is negligible
- ker.a = s_m1[x_m1]; //read sequentially from memory as far as possible
- ker.b = s_m1[x];
- ker.c = s_m1[x_p1];
- ker.d = s_m1[x_p2];
-
- ker.e = s_0[x_m1];
- ker.f = s_0[x];
- ker.g = s_0[x_p1];
- ker.h = s_0[x_p2];
-
- ker.i = s_p1[x_m1];
- ker.j = s_p1[x];
- ker.k = s_p1[x_p1];
- ker.l = s_p1[x_p2];
-
- ker.m = s_p2[x_m1];
- ker.n = s_p2[x];
- ker.o = s_p2[x_p1];
- ker.p = s_p2[x_p2];
-
- const BlendResult res = preProcessCorners<ColorDistance>(ker, cfg);
- /*
- preprocessing blend result:
- ---------
- | F | G | //evalute corner between F, G, J, K
- ----|---| //input pixel is at position F
- | J | K |
- ---------
- */
- setTopR(preProcBuffer[x], res.blend_j);
+ ker4.a = ker4.b; //shift previous kernel to the left
+ ker4.e = ker4.f; // -----------------
+ ker4.i = ker4.j; // | A | B | C | D |
+ ker4.m = ker4.n; // |---|---|---|---|
+ /**/ // | E | F | G | H | (x, yFirst - 1) is at position F
+ ker4.b = ker4.c; // |---|---|---|---|
+ ker4.f = ker4.g; // | I | J | K | L |
+ ker4.j = ker4.k; // |---|---|---|---|
+ ker4.n = ker4.o; // | M | N | O | P |
+ /**/ // -----------------
+ ker4.c = ker4.d;
+ ker4.g = ker4.h;
+ ker4.k = ker4.l;
+ ker4.o = ker4.p;
+
+ oobReader.readDhlp(ker4, x);
+
+ /* preprocessing blend result:
+ ---------
+ | F | G | evaluate corner between F, G, J, K
+ |---+---| current input pixel is at position F
+ | J | K |
+ --------- */
+ const BlendResult res = preProcessCorners<ColorDistance>(ker4, cfg);
+ addTopR(preProcBuf[x], res.blend_j); //set 2nd known corner for (x, yFirst)
- if (x + 1 < bufferSize)
- setTopL(preProcBuffer[x + 1], res.blend_k);
+ if (x + 1 < srcWidth)
+ clearAddTopL(preProcBuf[x + 1], res.blend_k); //set 1st known corner for (x + 1, yFirst)
}
}
//------------------------------------------------------------------------------------
@@ -532,88 +608,89 @@ void scaleImage(const uint32_t* src, uint32_t* trg, int srcWidth, int srcHeight,
{
uint32_t* out = trg + Scaler::scale * y * trgWidth; //consider MT "striped" access
- const uint32_t* s_m1 = src + srcWidth * std::max(y - 1, 0);
- const uint32_t* s_0 = src + srcWidth * y; //center line
- const uint32_t* s_p1 = src + srcWidth * std::min(y + 1, srcHeight - 1);
- const uint32_t* s_p2 = src + srcWidth * std::min(y + 2, srcHeight - 1);
+ const OobReader oobReader(src, srcWidth, srcHeight, y);
- unsigned char blend_xy1 = 0; //corner blending for current (x, y + 1) position
+ //initialize at position x = -1
+ Kernel_4x4 ker4 = {};
+ oobReader.readDhlp(ker4, -4); //hack: read a, e, i, m at x = -1
+ ker4.a = ker4.d;
+ ker4.e = ker4.h;
+ ker4.i = ker4.l;
+ ker4.m = ker4.p;
- for (int x = 0; x < srcWidth; ++x, out += Scaler::scale)
- {
- //all those bounds checks have only insignificant impact on performance!
- const int x_m1 = std::max(x - 1, 0); //perf: prefer array indexing to additional pointers!
- const int x_p1 = std::min(x + 1, srcWidth - 1);
- const int x_p2 = std::min(x + 2, srcWidth - 1);
+ oobReader.readDhlp(ker4, -3);
+ ker4.b = ker4.d;
+ ker4.f = ker4.h;
+ ker4.j = ker4.l;
+ ker4.n = ker4.p;
- Kernel_4x4 ker4 = {}; //perf: initialization is negligible
+ oobReader.readDhlp(ker4, -2);
+ ker4.c = ker4.d;
+ ker4.g = ker4.h;
+ ker4.k = ker4.l;
+ ker4.o = ker4.p;
- ker4.a = s_m1[x_m1]; //read sequentially from memory as far as possible
- ker4.b = s_m1[x];
- ker4.c = s_m1[x_p1];
- ker4.d = s_m1[x_p2];
+ oobReader.readDhlp(ker4, -1);
- ker4.e = s_0[x_m1];
- ker4.f = s_0[x];
- ker4.g = s_0[x_p1];
- ker4.h = s_0[x_p2];
+ unsigned char blend_xy1 = 0; //corner blending for current (x, y + 1) position
+ {
+ const BlendResult res = preProcessCorners<ColorDistance>(ker4, cfg);
+ clearAddTopL(blend_xy1, res.blend_k); //set 1st known corner for (0, y + 1) and buffer for use on next column
- ker4.i = s_p1[x_m1];
- ker4.j = s_p1[x];
- ker4.k = s_p1[x_p1];
- ker4.l = s_p1[x_p2];
+ addBottomL(preProcBuf[0], res.blend_g); //set 3rd known corner for (0, y)
+ }
- ker4.m = s_p2[x_m1];
- ker4.n = s_p2[x];
- ker4.o = s_p2[x_p1];
- ker4.p = s_p2[x_p2];
+ for (int x = 0; x < srcWidth; ++x, out += Scaler::scale)
+ {
+ ker4.a = ker4.b; //shift previous kernel to the left
+ ker4.e = ker4.f; // -----------------
+ ker4.i = ker4.j; // | A | B | C | D |
+ ker4.m = ker4.n; // |---|---|---|---|
+ /**/ // | E | F | G | H | (x, y) is at position F
+ ker4.b = ker4.c; // |---|---|---|---|
+ ker4.f = ker4.g; // | I | J | K | L |
+ ker4.j = ker4.k; // |---|---|---|---|
+ ker4.n = ker4.o; // | M | N | O | P |
+ /**/ // -----------------
+ ker4.c = ker4.d;
+ ker4.g = ker4.h;
+ ker4.k = ker4.l;
+ ker4.o = ker4.p;
+
+ oobReader.readDhlp(ker4, x);
//evaluate the four corners on bottom-right of current pixel
- unsigned char blend_xy = 0; //for current (x, y) position
+ unsigned char blend_xy = preProcBuf[x]; //for current (x, y) position
{
+ /* preprocessing blend result:
+ ---------
+ | F | G | evaluate corner between F, G, J, K
+ |---+---| current input pixel is at position F
+ | J | K |
+ --------- */
const BlendResult res = preProcessCorners<ColorDistance>(ker4, cfg);
- /*
- preprocessing blend result:
- ---------
- | F | G | //evalute corner between F, G, J, K
- ----|---| //current input pixel is at position F
- | J | K |
- ---------
- */
- blend_xy = preProcBuffer[x];
- setBottomR(blend_xy, res.blend_f); //all four corners of (x, y) have been determined at this point due to processing sequence!
+ addBottomR(blend_xy, res.blend_f); //all four corners of (x, y) have been determined at this point due to processing sequence!
- setTopR(blend_xy1, res.blend_j); //set 2nd known corner for (x, y + 1)
- preProcBuffer[x] = blend_xy1; //store on current buffer position for use on next row
+ addTopR(blend_xy1, res.blend_j); //set 2nd known corner for (x, y + 1)
+ preProcBuf[x] = blend_xy1; //store on current buffer position for use on next row
- blend_xy1 = 0;
- setTopL(blend_xy1, res.blend_k); //set 1st known corner for (x + 1, y + 1) and buffer for use on next column
+ [[likely]] if (x + 1 < srcWidth)
+ {
+ //blend_xy1 -> blend_x1y1
+ clearAddTopL(blend_xy1, res.blend_k); //set 1st known corner for (x + 1, y + 1) and buffer for use on next column
- if (x + 1 < bufferSize) //set 3rd known corner for (x + 1, y)
- setBottomL(preProcBuffer[x + 1], res.blend_g);
+ addBottomL(preProcBuf[x + 1], res.blend_g); //set 3rd known corner for (x + 1, y)
+ }
}
//fill block of size scale * scale with the given color
fillBlock(out, trgWidth * sizeof(uint32_t), ker4.f, Scaler::scale, Scaler::scale);
- //place *after* preprocessing step, to not overwrite the results while processing the the last pixel!
+ //place *after* preprocessing step, to not overwrite the results while processing the last pixel!
- //blend four corners of current pixel
- if (blendingNeeded(blend_xy)) //good 5% perf-improvement
+ //blend all four corners of current pixel
+ if (blendingNeeded(blend_xy))
{
- Kernel_3x3 ker3 = {}; //perf: initialization is negligible
-
- ker3.a = ker4.a;
- ker3.b = ker4.b;
- ker3.c = ker4.c;
-
- ker3.d = ker4.e;
- ker3.e = ker4.f;
- ker3.f = ker4.g;
-
- ker3.g = ker4.i;
- ker3.h = ker4.j;
- ker3.i = ker4.k;
-
+ const auto& ker3 = reinterpret_cast<const Kernel_3x3&>(ker4); //"The Things We Do for Perf"
blendPixel<Scaler, ColorDistance, ROT_0 >(ker3, out, trgWidth, blend_xy, cfg);
blendPixel<Scaler, ColorDistance, ROT_90 >(ker3, out, trgWidth, blend_xy, cfg);
blendPixel<Scaler, ColorDistance, ROT_180>(ker3, out, trgWidth, blend_xy, cfg);
@@ -1076,15 +1153,15 @@ void xbrz::scale(size_t factor, const uint32_t* src, uint32_t* trg, int srcWidth
switch (factor)
{
case 2:
- return scaleImage<Scaler2x<ColorGradientRGB>, ColorDistanceRGB>(src, trg, srcWidth, srcHeight, cfg, yFirst, yLast);
+ return scaleImage<Scaler2x<ColorGradientRGB>, ColorDistanceRGB, OobReaderDuplicate>(src, trg, srcWidth, srcHeight, cfg, yFirst, yLast);
case 3:
- return scaleImage<Scaler3x<ColorGradientRGB>, ColorDistanceRGB>(src, trg, srcWidth, srcHeight, cfg, yFirst, yLast);
+ return scaleImage<Scaler3x<ColorGradientRGB>, ColorDistanceRGB, OobReaderDuplicate>(src, trg, srcWidth, srcHeight, cfg, yFirst, yLast);
case 4:
- return scaleImage<Scaler4x<ColorGradientRGB>, ColorDistanceRGB>(src, trg, srcWidth, srcHeight, cfg, yFirst, yLast);
+ return scaleImage<Scaler4x<ColorGradientRGB>, ColorDistanceRGB, OobReaderDuplicate>(src, trg, srcWidth, srcHeight, cfg, yFirst, yLast);
case 5:
- return scaleImage<Scaler5x<ColorGradientRGB>, ColorDistanceRGB>(src, trg, srcWidth, srcHeight, cfg, yFirst, yLast);
+ return scaleImage<Scaler5x<ColorGradientRGB>, ColorDistanceRGB, OobReaderDuplicate>(src, trg, srcWidth, srcHeight, cfg, yFirst, yLast);
case 6:
- return scaleImage<Scaler6x<ColorGradientRGB>, ColorDistanceRGB>(src, trg, srcWidth, srcHeight, cfg, yFirst, yLast);
+ return scaleImage<Scaler6x<ColorGradientRGB>, ColorDistanceRGB, OobReaderDuplicate>(src, trg, srcWidth, srcHeight, cfg, yFirst, yLast);
}
break;
@@ -1092,15 +1169,15 @@ void xbrz::scale(size_t factor, const uint32_t* src, uint32_t* trg, int srcWidth
switch (factor)
{
case 2:
- return scaleImage<Scaler2x<ColorGradientARGB>, ColorDistanceARGB>(src, trg, srcWidth, srcHeight, cfg, yFirst, yLast);
+ return scaleImage<Scaler2x<ColorGradientARGB>, ColorDistanceARGB, OobReaderTransparent>(src, trg, srcWidth, srcHeight, cfg, yFirst, yLast);
case 3:
- return scaleImage<Scaler3x<ColorGradientARGB>, ColorDistanceARGB>(src, trg, srcWidth, srcHeight, cfg, yFirst, yLast);
+ return scaleImage<Scaler3x<ColorGradientARGB>, ColorDistanceARGB, OobReaderTransparent>(src, trg, srcWidth, srcHeight, cfg, yFirst, yLast);
case 4:
- return scaleImage<Scaler4x<ColorGradientARGB>, ColorDistanceARGB>(src, trg, srcWidth, srcHeight, cfg, yFirst, yLast);
+ return scaleImage<Scaler4x<ColorGradientARGB>, ColorDistanceARGB, OobReaderTransparent>(src, trg, srcWidth, srcHeight, cfg, yFirst, yLast);
case 5:
- return scaleImage<Scaler5x<ColorGradientARGB>, ColorDistanceARGB>(src, trg, srcWidth, srcHeight, cfg, yFirst, yLast);
+ return scaleImage<Scaler5x<ColorGradientARGB>, ColorDistanceARGB, OobReaderTransparent>(src, trg, srcWidth, srcHeight, cfg, yFirst, yLast);
case 6:
- return scaleImage<Scaler6x<ColorGradientARGB>, ColorDistanceARGB>(src, trg, srcWidth, srcHeight, cfg, yFirst, yLast);
+ return scaleImage<Scaler6x<ColorGradientARGB>, ColorDistanceARGB, OobReaderTransparent>(src, trg, srcWidth, srcHeight, cfg, yFirst, yLast);
}
break;
@@ -1108,15 +1185,15 @@ void xbrz::scale(size_t factor, const uint32_t* src, uint32_t* trg, int srcWidth
switch (factor)
{
case 2:
- return scaleImage<Scaler2x<ColorGradientARGB>, ColorDistanceUnbufferedARGB>(src, trg, srcWidth, srcHeight, cfg, yFirst, yLast);
+ return scaleImage<Scaler2x<ColorGradientARGB>, ColorDistanceUnbufferedARGB, OobReaderTransparent>(src, trg, srcWidth, srcHeight, cfg, yFirst, yLast);
case 3:
- return scaleImage<Scaler3x<ColorGradientARGB>, ColorDistanceUnbufferedARGB>(src, trg, srcWidth, srcHeight, cfg, yFirst, yLast);
+ return scaleImage<Scaler3x<ColorGradientARGB>, ColorDistanceUnbufferedARGB, OobReaderTransparent>(src, trg, srcWidth, srcHeight, cfg, yFirst, yLast);
case 4:
- return scaleImage<Scaler4x<ColorGradientARGB>, ColorDistanceUnbufferedARGB>(src, trg, srcWidth, srcHeight, cfg, yFirst, yLast);
+ return scaleImage<Scaler4x<ColorGradientARGB>, ColorDistanceUnbufferedARGB, OobReaderTransparent>(src, trg, srcWidth, srcHeight, cfg, yFirst, yLast);
case 5:
- return scaleImage<Scaler5x<ColorGradientARGB>, ColorDistanceUnbufferedARGB>(src, trg, srcWidth, srcHeight, cfg, yFirst, yLast);
+ return scaleImage<Scaler5x<ColorGradientARGB>, ColorDistanceUnbufferedARGB, OobReaderTransparent>(src, trg, srcWidth, srcHeight, cfg, yFirst, yLast);
case 6:
- return scaleImage<Scaler6x<ColorGradientARGB>, ColorDistanceUnbufferedARGB>(src, trg, srcWidth, srcHeight, cfg, yFirst, yLast);
+ return scaleImage<Scaler6x<ColorGradientARGB>, ColorDistanceUnbufferedARGB, OobReaderTransparent>(src, trg, srcWidth, srcHeight, cfg, yFirst, yLast);
}
break;
}
diff --git a/xBRZ/src/xbrz.h b/xBRZ/src/xbrz.h
index f7f7169a..492fb43a 100644
--- a/xBRZ/src/xbrz.h
+++ b/xBRZ/src/xbrz.h
@@ -51,7 +51,6 @@ const int SCALE_FACTOR_MAX = 6;
/*
-> map source (srcWidth * srcHeight) to target (scale * width x scale * height) image, optionally processing a half-open slice of rows [yFirst, yLast) only
--> support for source/target pitch in bytes!
-> if your emulator changes only a few image slices during each cycle (e.g. DOSBox) then there's no need to run xBRZ on the complete image:
Just make sure you enlarge the source image slice by 2 rows on top and 2 on bottom (this is the additional range the xBRZ algorithm is using during analysis)
CAVEAT: If there are multiple changed slices, make sure they do not overlap after adding these additional rows in order to avoid a memory race condition
diff --git a/xBRZ/src/xbrz_tools.h b/xBRZ/src/xbrz_tools.h
index 15bea025..b8bb8aa0 100644
--- a/xBRZ/src/xbrz_tools.h
+++ b/xBRZ/src/xbrz_tools.h
@@ -56,7 +56,7 @@ Pix* byteAdvance(Pix* ptr, int bytes)
//fill block with the given color
template <class Pix> inline
-void fillBlock(Pix* trg, int pitch, Pix col, int blockWidth, int blockHeight)
+void fillBlock(Pix* trg, int pitch /*[bytes]*/, Pix col, int blockWidth, int blockHeight)
{
//for (int y = 0; y < blockHeight; ++y, trg = byteAdvance(trg, pitch))
// std::fill(trg, trg + blockWidth, col);
@@ -69,8 +69,8 @@ void fillBlock(Pix* trg, int pitch, Pix col, int blockWidth, int blockHeight)
//nearest-neighbor (going over target image - slow for upscaling, since source is read multiple times missing out on cache! Fast for similar image sizes!)
template <class PixSrc, class PixTrg, class PixConverter>
-void nearestNeighborScale(const PixSrc* src, int srcWidth, int srcHeight, int srcPitch,
- /**/ PixTrg* trg, int trgWidth, int trgHeight, int trgPitch,
+void nearestNeighborScale(const PixSrc* src, int srcWidth, int srcHeight, int srcPitch /*[bytes]*/,
+ /**/ PixTrg* trg, int trgWidth, int trgHeight, int trgPitch /*[bytes]*/,
int yFirst, int yLast, PixConverter pixCvrt /*convert PixSrc to PixTrg*/)
{
static_assert(std::is_integral<PixSrc>::value, "PixSrc* is expected to be cast-able to char*");
@@ -106,8 +106,8 @@ void nearestNeighborScale(const PixSrc* src, int srcWidth, int srcHeight, int sr
//nearest-neighbor (going over source image - fast for upscaling, since source is read only once
template <class PixSrc, class PixTrg, class PixConverter>
-void nearestNeighborScaleOverSource(const PixSrc* src, int srcWidth, int srcHeight, int srcPitch,
- /**/ PixTrg* trg, int trgWidth, int trgHeight, int trgPitch,
+void nearestNeighborScaleOverSource(const PixSrc* src, int srcWidth, int srcHeight, int srcPitch /*[bytes]*/,
+ /**/ PixTrg* trg, int trgWidth, int trgHeight, int trgPitch /*[bytes]*/,
int yFirst, int yLast, PixConverter pixCvrt /*convert PixSrc to PixTrg*/)
{
static_assert(std::is_integral<PixSrc>::value, "PixSrc* is expected to be cast-able to char*");
bgstack15