File : PdfIdentityEncoding.cpp
Method : ParseToUnicode
/***************************************************************************
 *   Copyright (C) 2010 by Dominik Seichter                                *
 *   domseich...@web.de                                                    *
 *                                                                         *
 *   This program is free software; you can redistribute it and/or modify  *
 *   it under the terms of the GNU Library General Public License as       *
 *   published by the Free Software Foundation; either version 2 of the    *
 *   License, or (at your option) any later version.                       *
 *                                                                         *
 *   This program is distributed in the hope that it will be useful,       *
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
 *   GNU General Public License for more details.                          *
 *                                                                         *
 *   You should have received a copy of the GNU Library General Public     *
 *   License along with this program; if not, write to the                 *
 *   Free Software Foundation, Inc.,                                       *
 *   59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.             *
 *                                                                         *
 *   In addition, as a special exception, the copyright holders give       *
 *   permission to link the code of portions of this program with the      *
 *   OpenSSL library under certain conditions as described in each         *
 *   individual source file, and distribute linked combinations            *
 *   including the two.                                                    *
 *   You must obey the GNU General Public License in all respects          *
 *   for all of the code used other than OpenSSL.  If you modify           *
 *   file(s) with this exception, you may extend this exception to your    *
 *   version of the file(s), but you are not obligated to do so.  If you   *
 *   do not wish to do so, delete this exception statement from your       *
 *   version.  If you delete this exception statement from all source      *
 *   files in the program, then also delete it here.                       *
 ***************************************************************************/

#include "PdfIdentityEncoding.h"

#include "base/PdfDefinesPrivate.h"

#include "base/PdfDictionary.h"
#include "base/PdfLocale.h"
#include "base/PdfStream.h"
#include "base/PdfContentsTokenizer.h"

#include "PdfFont.h"

#include <sstream>
#include <iostream>
#include <stack>
#include <iomanip>
#include <string>

using namespace std;

namespace PoDoFo {

PdfIdentityEncoding::PdfIdentityEncoding( int nFirstChar, int nLastChar, bool 
bAutoDelete, PdfObject *pToUnicode )
    : PdfEncoding( nFirstChar, nLastChar ), m_bAutoDelete( bAutoDelete ), 
m_pToUnicode(pToUnicode), m_bToUnicodeIsLoaded(false)
{
    // create a unique ID
    std::ostringstream oss;
    oss << "/Identity-H" << nFirstChar << "_" << nLastChar;

    m_id = PdfName( oss.str() );
    
    ParseToUnicode();
}

void PdfIdentityEncoding::AddToDictionary( PdfDictionary & rDictionary ) const
{
    rDictionary.AddKey( "Encoding", PdfName("Identity-H") );
}

pdf_utf16be PdfIdentityEncoding::GetCharCode( int nIndex ) const
{
    if( nIndex < this->GetFirstChar() ||
        nIndex > this->GetLastChar() )
    {
        PODOFO_RAISE_ERROR( ePdfError_ValueOutOfRange );
    }

#ifdef PODOFO_IS_LITTLE_ENDIAN
    return ((nIndex & 0xff00) >> 8) | ((nIndex & 0xff) << 8);
#else
    return static_cast<pdf_utf16be>(nIndex);
#endif // PODOFO_IS_LITTLE_ENDIAN
}

PdfString PdfIdentityEncoding::ConvertToUnicode( const PdfString & 
rEncodedString, const PdfFont* ) const
{
    if(m_pToUnicode && m_bToUnicodeIsLoaded)
    {
        
        const pdf_utf16be* pStr = reinterpret_cast<const 
pdf_utf16be*>(rEncodedString.GetString());
        const size_t lLen = rEncodedString.GetLength()/2;
        pdf_utf16be lCID, lUnicodeValue;
        
        pdf_utf16be* pszUtf16 = 
static_cast<pdf_utf16be*>(malloc(sizeof(pdf_utf16be)*lLen));
        if( !pszUtf16 )
        {
            PODOFO_RAISE_ERROR( ePdfError_OutOfMemory );
        }
        
        for(size_t i = 0 ; i<lLen ; i++)
        {
#ifdef PODOFO_IS_LITTLE_ENDIAN
            lCID = (pStr[i] << 8) | (pStr[i] >> 8 );
#else
            lCID = pStr[i];
#endif // PODOFO_IS_LITTLE_ENDIAN
            
            lUnicodeValue = this->GetUnicodeValue(lCID);

#ifdef PODOFO_IS_LITTLE_ENDIAN
            pszUtf16[i] = (lUnicodeValue << 8) | (lUnicodeValue >> 8 );
#else
            pszUtf16[i] = lUnicodeValue;
#endif // PODOFO_IS_LITTLE_ENDIAN
        }
 
        PdfString ret( pszUtf16, lLen );
        free( pszUtf16 );
        
        return ret;
        
    }
    else
        return(PdfString("\0"));
}

PdfRefCountedBuffer PdfIdentityEncoding::ConvertToEncoding( const PdfString & 
rString, const PdfFont* pFont ) const
{
    // Get the string in UTF-16be format
    PdfString sStr = rString.ToUnicode();
    const pdf_utf16be* pStr = sStr.GetUnicode();
    pdf_utf16be lUnicodeValue, lCID;
    
    std::ostringstream out;
    PdfLocaleImbue(out);

    while( *pStr ) 
    {
        
#ifdef PODOFO_IS_LITTLE_ENDIAN
        lUnicodeValue = (*pStr << 8) | (*pStr >> 8);
#else
        lUnicodeValue = *pStr;
#endif // PODOFO_IS_LITTLE_ENDIAN
        
        lCID = this->GetCIDValue(lUnicodeValue);
        if (lCID == 0 && pFont) {
#ifdef PODOFO_IS_LITTLE_ENDIAN
            lCID = 
static_cast<pdf_utf16be>(pFont->GetFontMetrics()->GetGlyphId( (((*pStr & 0xff) 
<< 8) | ((*pStr & 0xff00) >> 8)) ));
#else
            lCID = 
static_cast<pdf_utf16be>(pFont->GetFontMetrics()->GetGlyphId( *pStr ));
#endif // PODOFO_IS_LITTLE_ENDIAN
        }
        
        out << static_cast<unsigned char>((lCID & 0xff00) >> 8);
        out << static_cast<unsigned char>(lCID & 0x00ff);

        ++pStr;
    }

    PdfRefCountedBuffer buffer( out.str().length() );
    memcpy( buffer.GetBuffer(), out.str().c_str(), out.str().length() );
    return buffer;
}

pdf_utf16be PdfIdentityEncoding::GetUnicodeValue( pdf_utf16be  value ) const
{
    if(m_bToUnicodeIsLoaded)
    {
        const map<pdf_utf16be, pdf_utf16be>::const_iterator found = 
m_cMapEncoding.find(value);
        return (found == m_cMapEncoding.end() ? 0 : found->second);
    }
    else
        return 0;
}
    
pdf_utf16be PdfIdentityEncoding::GetCIDValue( pdf_utf16be lUnicodeValue ) const
{
    if(m_bToUnicodeIsLoaded)
    {
        // TODO: optimize
        for(map<pdf_utf16be, pdf_utf16be>::const_iterator it = 
m_cMapEncoding.begin(); it != m_cMapEncoding.end(); ++it)
            if(it->second == lUnicodeValue)
                return it->first;
    }
    
    return 0;
}

void PdfIdentityEncoding::ParseToUnicode()
{
    if (m_pToUnicode && m_pToUnicode->HasStream())
    {
        stack<string> stkToken;
        pdf_uint16 loop = 0;
        char *streamBuffer;
        const char *streamToken = NULL;
        EPdfTokenType *streamTokenType = NULL;
        pdf_long streamBufferLen;
        bool in_beginbfrange = 0;
        bool in_beginbfchar = 0;
        pdf_uint16 range_entries = 0;
        pdf_uint16 char_entries = 0;
        pdf_uint16 inside_hex_string = 0;
        pdf_uint16 inside_array = 0;
        pdf_uint16 range_start;
        pdf_uint16 range_end;
        pdf_uint16 i = 0;
        pdf_utf16be firstvalue = 0;
        const PdfStream *CIDStreamdata = m_pToUnicode->GetStream ();
        CIDStreamdata->GetFilteredCopy (&streamBuffer, &streamBufferLen);
        
        PdfContentsTokenizer streamTokenizer (streamBuffer, streamBufferLen);
        while (streamTokenizer.GetNextToken (streamToken, streamTokenType))
        {
            stkToken.push (streamToken);
            
            if (strcmp (streamToken, ">") == 0)
            {
                if (inside_hex_string == 0)
                    PODOFO_RAISE_ERROR_INFO(ePdfError_InvalidStream, "Pdf 
Error, got > before <")
                else
                    inside_hex_string = 0;
                
               // i++;
                if (in_beginbfrange == 1)
                    i++;
            }
            
            if (strcmp (streamToken, "]") == 0)
            {
                if (inside_array == 0)
                    PODOFO_RAISE_ERROR_INFO(ePdfError_InvalidStream, "Pdf 
Error, got ] before [")
                else
                    inside_array = 0;
                
                //i++;
                if (in_beginbfrange == 1)
                    i++;
            }
            
            if (in_beginbfrange == 1)
            {
                if (loop < range_entries)
                {
                    if (inside_hex_string == 1)
                    {
                        pdf_utf16be num_value;
                        std::stringstream ss;
                        ss << std::hex << streamToken;
                        ss >> num_value;
                        if (i % 3 == 0)
                            range_start = num_value;
                        if (i % 3 == 1)
                        {
                            range_end = num_value;
                        }
                        if (i % 3 == 2)
                        {
                            //for (int k = range_start; k < range_end; k++)
                            for (int k = range_start; k <= range_end; k++)
                            {
                                m_cMapEncoding[k] = num_value;
                                num_value++;
                            }
                            
                            loop++;
                            
                        }
                    }
                }
            }
            
            if (in_beginbfchar == 1)
            {
                if (loop < char_entries)
                {
                    if (inside_hex_string == 1)
                    {
                        pdf_utf16be num_value;
                        std::stringstream ss;
                        ss << std::hex << streamToken;
                        ss >> num_value;
                        if (i % 2 == 0)
                        {
                            firstvalue = num_value;
                        }
                        if (i % 2 == 1)
                        {
                            m_cMapEncoding[firstvalue] = num_value;
                        }
                    }
                }
            }
            
            
            if (strcmp (streamToken, "<") == 0)
            {
                inside_hex_string = 1;
            }
            
            
            
            if (strcmp (streamToken, "[") == 0)
            {
                inside_array = 1;
            }
            
            
            if (strcmp (streamToken, "beginbfrange") == 0)
            {
                in_beginbfrange = 1;
                stkToken.pop ();
                std::stringstream ss;
                ss << std::hex << stkToken.top ();
                ss >> range_entries;
            }
            
            if (strcmp (streamToken, "endbfrange") == 0)
            {
                in_beginbfrange = 0;
                i = 0;
                loop = 0;
            }
            
            if (strcmp (streamToken, "beginbfchar") == 0)
            {
                in_beginbfchar = 1;
                stkToken.pop ();
                std::stringstream ss;
                ss << std::hex << stkToken.top ();
                ss >> char_entries;
            }
            
            if (strcmp (streamToken, "endbfchar") == 0)
            {
                in_beginbfchar = 0;
                i = 0;
            }
        }
        
        free(streamBuffer);
        
        m_bToUnicodeIsLoaded = true;
    }
}
    
}; /* namespace PoDoFo */
------------------------------------------------------------------------------
Download BIRT iHub F-Type - The Free Enterprise-Grade BIRT Server
from Actuate! Instantly Supercharge Your Business Reports and Dashboards
with Interactivity, Sharing, Native Excel Exports, App Integration & more
Get technology previously reserved for billion-dollar corporations, FREE
http://pubads.g.doubleclick.net/gampad/clk?id=164703151&iu=/4140/ostg.clktrk
_______________________________________________
Podofo-users mailing list
Podofo-users@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/podofo-users

Reply via email to