On 6/8/21 9:34 PM, Paul Procacci wrote:
Hopefully a pretty quick question....

GIven the following:

my Buf $b .= new([72, 105, 0, 32, 97, 103, 97, 105, 110, 0]);
say $b.decode;

I would expect this to print 'Hi'.
Instead it prints 'Hi again'.

https://docs.raku.org/type/Buf#(Blob)_method_decode <https://docs.raku.org/type/Buf#(Blob)_method_decode>

The decode documentation for Buf only states that 'Applies an encoding to turn the blob into a Str <https://docs.raku.org/type/Str>; the encoding will be UTF-8 by default.'

The zero (0) in that Buf should imply an end of string yet decode seems to want to decode the number of elements instead.

Furthermore, If I 'say $b.decode.chars;' it counts the initial null as part of Str.
In my mind, that means Str doesn't really mean string.

So the question, how does one ACTUALLY decode what's in a buffer to a string where it adheres to the semantics of NULL termination for strings cleanly.

Another question might be, should decode() follow null terminating semantics instead of number of elements in a given Buf.

Thanks,
Paul


Hi Paul,

This is a module I wrote myself to handle such.  Feel
free to carve it up to your linking.

-T

<NativeConvert.pm6>
# unit module NativeConvert;
# NativeConvert.pm6

#`{

   Utilities to convert back and forth from C types to Raku types
   for use with NativeCall.

   To use, place the following at the top of your program:

   use NativeCall;
   use NativeConstants;
   use NativeConvert :to-UTF8-c-str, :to-UTF16-c-str,
       :c-to-raku-str, :Raku-uint-to-Cuint32, :Cuint32-to-Raku-uint;

   Sub Headers:
sub to-UTF8-c-str( Str $RakuStr ) returns CArray[uint8] is export( :to-UTF8-c-str ) sub to-UTF16-c-str( Str $RakuStr ) returns CArray[uint16] is export( :to-UTF16-c-str ) sub c-to-raku-str( BYTES $CStr ) returns Str is export( :c-to-raku-str ) sub Cuint32-to-Raku-uint( BYTES $CInt ) returns uint32 is export( :Cuint32-to-Raku-uint )

}

use NativeCall;
use NativeConstants;


sub to-UTF8-c-str( Str $RakuStr ) returns CArray[uint8] is export( :to-UTF8-c-str ) {
   # Converts a UTF8 Raku string into a UTF8 little endian C string
# Note: C Strings are always terminated with a nul. WinAPI will malfunction without it

   my $CStr = CArray[uint8].new();

   # $CStr = CArray[uint8].new( $RakuStr.encode.list );
   # $CStr[ $CStr.elems ] = 0;
   $CStr = CArray[uint8].new( $RakuStr.encode.list, 0 );

   return $CStr;
}


sub to-UTF16-c-str( Str $RakuStr ) returns CArray[uint16] is export( :to-UTF16-c-str ) {
   # Converts a UTF8 Raku string into a UTF16 little endian C string
# Note: C Strings are always terminated with a nul. WinAPI will malfunction without it

   my $CStr = CArray[uint16].new();

   # $CStr = CArray[uint16].new( $RakuStr.encode.list );
   # $CStr[ $CStr.elems ] = 0;
$CStr = CArray[uint16].new( $RakuStr.encode.list, 0 ); # add a nul to the end

   return $CStr;
}


sub c-to-raku-str( BYTES $CStr ) returns Str  is export( :c-to-raku-str ) {
# Note: C Strings are always terminated with a nul. This sub will malfunction without it.
   # Note: presumes a UTF16 little endian C String and converts to UTF8

   my Str    $SubName   = &?ROUTINE.name;
   my Str    $RakuStr   = "";
   my Str    $Msg       = "";
   my uint32 $CStrElems = $CStr.elems;
   # say $CStrElems;

   loop (my $i = 0; $i < $CStrElems - 2 ; $i += 2) {
      if  $CStr[ $i ] == 0  &&  $CStr[ $i + 1 ] == 0  { last; }

      if $i == $CStrElems - 4  {
$Msg = "$SubName ERROR:" ~ " C Strings are required to be terminated with a nul\n\n" ~
                "                     Returning an empty string\n" ~
                "                     Cowardly existing\n";
         exit;
      }

      # print $CStr[ $i ] ~ "," ~ $CStr[ $i + 1 ] ~ ",  ";
      $RakuStr ~= chr( $CStr[ $i ] );
   }
   # say "";

   # say "$RakuStr";
   return $RakuStr;
}


sub Raku-uint-to-Cuint32( uint32 $RakuInt ) returns CArray[BYTE] is export( :Raku-uint-to-Cuint32 ) { # converts a 4 byte, 32 bit Raku integer into a little endian WinApi C integer

   my BYTES $CInt    = CArray[BYTE].new();

my BYTE $Byte3 = $RakuInt +& 0x000000FF; # say $Byte3.base(16); my BYTE $Byte2 = ( $RakuInt +& 0x0000FF00 ) +> 8; # say $Byte2.base(16); my BYTE $Byte1 = ( $RakuInt +& 0x00FF0000 ) +> 16; # say $Byte1.base(16); my BYTE $Byte0 = ( $RakuInt +& 0xFF000000 ) +> 24; # say $Byte0.base(16);

   $CInt[ 3 ] = $Byte0;
   $CInt[ 2 ] = $Byte1;
   $CInt[ 1 ] = $Byte2;
   $CInt[ 0 ] = $Byte3;
# say $CInt[3].base(16) ~ ", " ~ $CInt[2].base(16) ~ ", " ~$CInt[1].base(16) ~ ", " ~$CInt[0].base(16) ~ "\n";
   # say "$CInt";
   # print $CInt ~ "\n";

   return $CInt;
}


sub Cuint32-to-Raku-uint( BYTES $CInt ) returns uint32 is export( :Cuint32-to-Raku-uint ) { # converts a 4 byte, 32 bit little endian WinApi integer to a Raku integer

   my uint32 $RakuInt = 0;

   my BYTE  $Byte0   = $CInt[ 0 ] +| 0x0000;
   my BYTE  $Byte1   = $CInt[ 1 ] +| 0x0000;
   my BYTE  $Byte2   = $CInt[ 2 ] +| 0x0000;
   my BYTE  $Byte3   = $CInt[ 3 ] +| 0x0000;

# say $CInt[ 0 ].base(16) ~ ", " ~ $CInt[ 1 ].base(16) ~ ", " ~ $CInt[ 2 ].base(16) ~ ", " ~ $$CInt[ 3 ].base(16); # say "$CInt" ~ "\n" ~ $Byte3.base(16) ~ ", " ~ $Byte3.base(16) ~ ", " ~ $Byte3.base(16) ~ ", " ~ $Byte3.base(16);

   $RakuInt += $Byte0;
   $RakuInt += $Byte1 +<  8;
   $RakuInt += $Byte2 +< 16;
   $RakuInt += $Byte3 +< 24;

   return $RakuInt;
}
</NativeConvert.pm6>


<NativeConstants.pm6>
# unit module NativeConstants;
# NativeConstants.pm6

#`{

    Constants used across Windows libraries that use Native Call

    Reference to types and values:
          http://dsource.org/projects/tango/ticket/820
          https://www.originlab.com/doc/OriginC/ref/Registry-Registry

https://github.com/jonasstrandstedt/MinGW/blob/master/MinGW/include/winreg.h#L14
    https://stackoverflow.com/questions/321413/lpcstr-lpctstr-and-lptstr

https://docs.microsoft.com/en-us/windows/win32/sysinfo/structure-of-the-registry

    perl6 -I. -c NativeConstants.pm6

    Test one liner:
perl6 -I. -e "use NativeConstants; say FORMAT_MESSAGE_ALLOCATE_BUFFER.base(16);"

}


use NativeCall;

# https://docs.microsoft.com/en-us/windows/win32/winprog/windows-data-types
constant BYTE     := uint8;
constant WCHAR    := uint16;
constant DWORD    := uint32;
constant REGSAM   := uint32;
constant WCHARS   := CArray[WCHAR];
constant BYTES    := CArray[BYTE];
constant CPOINTER := uint32;

constant INT      = uint32;
constant UINT     = uint32;
constant HANDLE   = Pointer[void];
constant LPWCTSTR = CArray[WCHAR];
constant NULL     = 0x0000;


# https://docs.microsoft.com/en-us/windows/win32/api/winbase/nf-winbase-formatmessage
constant FORMAT_MESSAGE_ALLOCATE_BUFFER = 0x00000100;
constant FORMAT_MESSAGE_FROM_SYSTEM     = 0x00001000;
constant FORMAT_MESSAGE_IGNORE_INSERTS  = 0x00000200;
constant FORMAT_MESSAGE_FROM_STRING     = 0x00000400;


# https://docs.microsoft.com/en-us/windows/win32/intl/language-identifiers
# https://docs.microsoft.com/en-us/windows/win32/intl/language-identifier-constants-and-strings # ^^^^^^ use the values in the third column to the right, not the first columns on the left
constant LANG_USER_DEFAULT              = 0x0000;
constant LANG_SYSTEM_DEFAULT            = 0x0002;


# https://docs.microsoft.com/en-us/windows/win32/debug/system-error-codes--0-499-
constant ERROR_SUCCESS     = 0; # Win-Api  0 = success

# https://docs.microsoft.com/en-us/windows/win32/sysinfo/registry-key-security-and-access-rights
# say KEY_ALL_ACCESS.value.base(16);
# 0xF003F
enum WinRegAccessRights (
   # constant KEY_READ      = 0x20019;
   # constant KEY_SET_VALUE = 0x0002;
KEY_ALL_ACCESS => 0xF003F; # Combines the STANDARD_RIGHTS_REQUIRED, KEY_QUERY_VALUE, KEY_SET_VALUE, KEY_CREATE_SUB_KEY, # KEY_ENUMERATE_SUB_KEYS, KEY_NOTIFY, and KEY_CREATE_LINK access rights.
  KEY_CREATE_LINK        => 0x0020;  #       Reserved for system use.
KEY_CREATE_SUB_KEY => 0x0004; # Required to create a subkey of a registry key. KEY_ENUMERATE_SUB_KEYS => 0x0008; # Required to enumerate the subkeys of a registry key.
  KEY_EXECUTE            => 0x20019; #       Equivalent to KEY_READ.
KEY_NOTIFY => 0x0010; # Required to request change notifications for a registry key or for subkeys of a registry key. KEY_QUERY_VALUE => 0x0001; # Required to query the values of a registry key. KEY_READ => 0x20019; # Combines the STANDARD_RIGHTS_READ, KEY_QUERY_VALUE, KEY_ENUMERATE_SUB_KEYS, and KEY_NOTIFY values. KEY_SET_VALUE => 0x0002; # Required to create, delete, or set a registry value. KEY_WOW64_32KEY => 0x0200; # Indicates that an application on 64-bit Windows should operate on the 32-bit registry view. # This flag is ignored by 32-bit Windows. For more information, see Accessing an Alternate Registry # View. This flag must be combined using the OR operator with the other flags in this table # that either query or access registry values. Windows 2000: This flag is not supported. KEY_WOW64_64KEY => 0x0100; # Indicates that an application on 64-bit Windows should operate on the 64-bit registry view. This # flag is ignored by 32-bit Windows. For more information, see Accessing an Alternate Registry View. # This flag must be combined using the OR operator with the other flags in this table that either # query or access registry values. Windows 2000: This flag is not supported. KEY_WRITE => 0x20006; # Combines the STANDARD_RIGHTS_WRITE, KEY_SET_VALUE, and KEY_CREATE_SUB_KEY access rights.
);

# say HKEY_CURRENT_USER.value.base(16);
# 0x80000001
enum WinRegHives (
   HKEY_CLASSES_ROOT     => 0x80000000;
   HKEY_CURRENT_USER     => 0x80000001;
   HKEY_LOCAL_MACHINE    => 0x80000002;
   HKEY_USERS            => 0x80000003;
   HKEY_PERFORMANCE_DATA => 0x80000004;
   HKEY_CURRENT_CONFIG   => 0x80000005;
   HKEY_DYN_DATA         => 0x80000006;
);

enum ValueNames (
   REG_NONE                       => 0;   # No value type
   REG_SZ                         => 1;   # Unicode nul terminated string
REG_EXPAND_SZ => 2; # Unicode nul terminated string = (with environment variable references)
   REG_BINARY                     => 3;   # Free form binary
   REG_DWORD                      => 4;   # 32-bit number
REG_DWORD_LITTLE_ENDIAN => 4; # 32-bit number = (same as REG_DWORD)
   REG_DWORD_BIG_ENDIAN           => 5;   # 32-bit number
   REG_LINK                       => 6;   # Symbolic Link = (unicode)
   REG_MULTI_SZ                   => 7;   # Multiple Unicode strings
REG_RESOURCE_LIST => 8; # Resource list in the resource map REG_FULL_RESOURCE_DESCRIPTOR => 9; # Resource list in the hardware description
   REG_RESOURCE_REQUIREMENTS_LIST => 10;
   REG_QWORD                      => 11;  # 64-bit number
   REG_QWORD_LITTLE_ENDIAN        => 11;  # same as REG_QWORD
);
</NativeConstants.pm6>

Reply via email to