Hi again,
sorry, the first patch contained some unnecessary diff. This one is better.
On 09.01.2014 18:24, Prakash Punnoor wrote:
Hi,
following patch yields a speed-up of roughly 1.4 (~210ms to ~150ms)
for a testimage from my Nikon D5100 using GCC 4.8.2 on Linux on my AMD
Phenom II X4 840.
Unfortunately line endings seem to be messed up, so sorry for that in
advance (dos2unix/unix2dos applied to the source file helps).
Some comments:
- BitPumpMsb doesn't need to be a field in Nikon decompressor, as it
is only instantiated on Decompress.
- Allocating BitPumpMsb on stack seems to speed up decoding. (I guess,
because of memory locality and/or the compiler can do a better job).
- No need for dtor in Nikon decompressor anymore.
- BitPumpMsb: Allocating current_buffer inline also helps locality.
- BitPumpMsb has no need for a (virtual) dtor anymore.
- Maybe aligning current_buffer using some compiler attributes might
further help.
- Maybe other decompressors might profit from similar code changes.
Cheers,
Prakash
_______________________________________________
Rawstudio-dev mailing list
[email protected]
http://rawstudio.org/cgi-bin/mailman/listinfo/rawstudio-dev
Index: BitPumpMSB.cpp
===================================================================
--- BitPumpMSB.cpp (revision 611)
+++ BitPumpMSB.cpp (working copy)
@@ -40,9 +40,6 @@
__inline void BitPumpMSB::init() {
mStuffed = 0;
- current_buffer = (uchar8*)_aligned_malloc(16, 16);
- if (!current_buffer)
- ThrowRDE("BitPumpMSB::init(): Unable to allocate memory");
memset(current_buffer,0,16);
fill();
}
@@ -122,11 +119,5 @@
fill();
}
-
-
-BitPumpMSB::~BitPumpMSB(void) {
- _aligned_free(current_buffer);
-}
-
} // namespace RawSpeed
Index: BitPumpMSB.h
===================================================================
--- BitPumpMSB.h (revision 611)
+++ BitPumpMSB.h (working copy)
@@ -126,11 +126,10 @@
return ret & 0xff;
}
- virtual ~BitPumpMSB(void);
protected:
void __inline init();
+ uchar8 current_buffer[16];
const uchar8* buffer;
- uchar8* current_buffer;
const uint32 size; // This if the end of buffer.
uint32 mLeft;
uint32 off; // Offset in bytes
Index: NikonDecompressor.cpp
===================================================================
--- NikonDecompressor.cpp (revision 611)
+++ NikonDecompressor.cpp (working copy)
@@ -1,5 +1,7 @@
#include "StdAfx.h"
#include "NikonDecompressor.h"
+#include "BitPumpMSB.h"
+
/*
RawSpeed - RAW file decoder.
@@ -29,15 +31,8 @@
for (uint32 i = 0; i < 0x8000 ; i++) {
curve[i] = i;
}
- bits = 0;
}
-NikonDecompressor::~NikonDecompressor(void) {
- if (bits)
- delete(bits);
- bits = 0;
-
-}
void NikonDecompressor::initTable(uint32 huffSelect) {
HuffmanTable *dctbl1 = &huff[0];
uint32 acc = 0;
@@ -104,7 +99,7 @@
curve[i] = top;
uint32 x, y;
- bits = new BitPumpMSB(mFile->getData(offset), size);
+ BitPumpMSB bits(mFile->getData(offset), size);
uchar8 *draw = mRaw->getData();
uint32 *dest;
uint32 pitch = mRaw->pitch;
@@ -118,15 +113,15 @@
initTable(huffSelect + 1);
}
dest = (uint32*) & draw[y*pitch]; // Adjust destination
- pUp1[y&1] += HuffDecodeNikon();
- pUp2[y&1] += HuffDecodeNikon();
+ pUp1[y&1] += HuffDecodeNikon(bits);
+ pUp2[y&1] += HuffDecodeNikon(bits);
pLeft1 = pUp1[y&1];
pLeft2 = pUp2[y&1];
dest[0] = curve[clampbits(pLeft1,15)] | ((uint32)curve[clampbits(pLeft2,15)] << 16);
for (x = 1; x < cw; x++) {
- bits->checkPos();
- pLeft1 += HuffDecodeNikon();
- pLeft2 += HuffDecodeNikon();
+ bits.checkPos();
+ pLeft1 += HuffDecodeNikon(bits);
+ pLeft2 += HuffDecodeNikon(bits);
dest[x] = curve[clampbits(pLeft1,15)] | ((uint32)curve[clampbits(pLeft2,15)] << 16);
}
}
@@ -148,7 +143,7 @@
*
*--------------------------------------------------------------
*/
-int NikonDecompressor::HuffDecodeNikon() {
+int NikonDecompressor::HuffDecodeNikon(BitPumpMSB& bits) {
int rv;
int l, temp;
int code, val ;
@@ -155,26 +150,26 @@
HuffmanTable *dctbl1 = &huff[0];
- bits->fill();
- code = bits->peekBitsNoFill(14);
+ bits.fill();
+ code = bits.peekBitsNoFill(14);
val = dctbl1->bigTable[code];
if ((val&0xff) != 0xff) {
- bits->skipBitsNoFill(val&0xff);
+ bits.skipBitsNoFill(val&0xff);
return val >> 8;
}
rv = 0;
- code = bits->peekByteNoFill();
+ code = bits.peekByteNoFill();
val = dctbl1->numbits[code];
l = val & 15;
if (l) {
- bits->skipBitsNoFill(l);
+ bits.skipBitsNoFill(l);
rv = val >> 4;
} else {
- bits->skipBits(8);
+ bits.skipBits(8);
l = 8;
while (code > dctbl1->maxcode[l]) {
- temp = bits->getBitNoFill();
+ temp = bits.getBitNoFill();
code = (code << 1) | temp;
l++;
}
@@ -196,7 +191,7 @@
*/
uint32 len = rv & 15;
uint32 shl = rv >> 4;
- int diff = ((bits->getBits(len - shl) << 1) + 1) << shl >> 1;
+ int diff = ((bits.getBits(len - shl) << 1) + 1) << shl >> 1;
if ((diff & (1 << (len - 1))) == 0)
diff -= (1 << len) - !shl;
return diff;
Index: NikonDecompressor.h
===================================================================
--- NikonDecompressor.h (revision 611)
+++ NikonDecompressor.h (working copy)
@@ -2,7 +2,6 @@
#define NIKON_DECOMPRESSOR_H
#include "LJpegDecompressor.h"
-#include "BitPumpMSB.h"
/*
RawSpeed - RAW file decoder.
@@ -27,6 +26,8 @@
namespace RawSpeed {
+class BitPumpMSB;
+
class NikonDecompressor :
public LJpegDecompressor
{
@@ -33,14 +34,12 @@
public:
NikonDecompressor(FileMap* file, RawImage img );
public:
- virtual ~NikonDecompressor(void);
void DecompressNikon(ByteStream *meta, uint32 w, uint32 h, uint32 bitsPS, uint32 offset, uint32 size);
bool uncorrectedRawValues;
private:
void initTable(uint32 huffSelect);
- int HuffDecodeNikon();
+ int HuffDecodeNikon(BitPumpMSB& bits);
ushort16 curve[0x8000];
- BitPumpMSB *bits;
};
static const uchar8 nikon_tree[][32] = {
_______________________________________________
Rawstudio-dev mailing list
[email protected]
http://rawstudio.org/cgi-bin/mailman/listinfo/rawstudio-dev