Hi,

 a lot  of people  sometimes need  order same  data in  same DB  by more
 different locales. For  example multi-language web application  with DB
 in UTF-8.  It's  problem in PostgreSQL, because  PostgreSQL require set
 LC_COLLATE by initdb.

 I  think possible  solution is  special function  used ORDER  BY clause
 which knows to switch by safe  way to wanted locales, convert string by
 strxfrm() and switch back to backend locales.

 Is this  function interesting  for PostgreSQL  contrib or  main tree? I
 think it's very useful for a lot of users.  I can prepare a patch.

 Note, the original idea and patch is from 
 Honza Pazdziora <[EMAIL PROTECTED]>.

 For example, the Czech alphabet has between 'h' and 'i' letter 'ch':

        # SHOW LC_COLLATE;
         lc_collate 
        ------------
            C

        # SELECT data FROM str ORDER BY nls_string(data,'en_US');
         data  
        -------
         aaaa
         cccc
         chccc
         dddd
         hhhh
         iiii
         zzzz

        # SELECT data FROM str ORDER BY nls_string(data,'cs_CZ');
         data  
        -------
         aaaa
         cccc
         dddd
         hhhh
         chccc
         iiii
         zzzz

 The function returns result encoded in unsigned octal:

        # SELECT nls_string('pg','en_US');
                nls_string        
        --------------------------
         033022001010010001002002


 Source:

static char *lc_collate_cache = NULL;

PG_FUNCTION_INFO_V1(nls_string);

Datum
nls_string(PG_FUNCTION_ARGS) 
{
        text *locale = PG_GETARG_TEXT_P(1);
        char *locale_str;
        int locale_len;
        
        text *txt = PG_GETARG_TEXT_P(0);
        char *txt_str;
        int txt_len;
        text *txt_out;
        char *txt_tmp;
        size_t size = 0;
        size_t rest = 0;
        int i;

        if ((VARSIZE(locale) - VARHDRSZ) <= 0 || (VARSIZE(txt) - VARHDRSZ) <= 0)
                PG_RETURN_NULL();
        
        /*
         * Save original locale setting
         */
        if (!lc_collate_cache)
        {
                if ((lc_collate_cache = setlocale(LC_COLLATE, NULL)))
                        /* cached independent on PostgreSQL mmgr */
                        lc_collate_cache = strdup(lc_collate_cache);
        }
        if (!lc_collate_cache)
                elog(ERROR, "invalid system LC_COLLATE setting");

        /*
         * Conversion to standard strings
         */
        locale_len = VARSIZE(locale) - VARHDRSZ;
        locale_str = palloc(locale_len + 1);
        memcpy(locale_str, VARDATA(locale), locale_len);
        *(locale_str + locale_len) = '\0';

        txt_len = VARSIZE(txt) - VARHDRSZ;
        txt_str = palloc(txt_len + 1);
        memcpy(txt_str, VARDATA(txt), txt_len);
        *(txt_str + txt_len) = '\0';

        /*
         * Set wanted locale
         */
        if (!setlocale(LC_COLLATE, locale_str))
        {
                setlocale(LC_COLLATE, lc_collate_cache);        /* paranoid? */
                elog(ERROR, "invalid LC_COLLATE setting: %s", locale_str);
        }
        
        pfree(locale_str);
        
        /*
         * Text transformation
         */
        size = txt_len * 2;
        txt_tmp = palloc(size);
        memset(txt_tmp, 0, size);

        rest = strxfrm(txt_tmp, txt_str, size) + 1;
        if (rest >= size) 
        {
                pfree(txt_tmp);
                txt_tmp = palloc(rest);
                memset(txt_tmp, 0, rest);
                rest = strxfrm(txt_tmp, txt_str, rest);
        }
        
        /*
         * Transformation to unsigned octal
         */
        txt_out = (text *) palloc(3 * rest + VARHDRSZ);
        memset(txt_out, 0, 3 * rest + VARHDRSZ);
        
        for (i = 0; i < rest; i++) 
        {
                sprintf(VARDATA(txt_out) + 3 * i, "%03o",
                        (int)(unsigned char)*(txt_tmp + i));
        }
        pfree(txt_tmp);

        VARATT_SIZEP(txt_out) = 3 * rest + VARHDRSZ;

        /*
         * Set original locale
         */
        if (!setlocale(LC_COLLATE, lc_collate_cache))
                elog(ERROR, "invalid LC_COLLATE setting: %s", lc_collate_cache);
        
        PG_RETURN_TEXT_P(txt_out);
}


-- 
 Karel Zak  <[EMAIL PROTECTED]>
 http://home.zf.jcu.cz/~zakkr/

---------------------------(end of broadcast)---------------------------
TIP 5: Have you checked our extensive FAQ?

               http://www.postgresql.org/docs/faqs/FAQ.html

Reply via email to