On 6/8/21 9:34 PM, Paul Procacci wrote:
Hopefully a pretty quick question....
GIven the following:
my Buf $b .= new([72, 105, 0, 32, 97, 103, 97, 105, 110, 0]);
say $b.decode;
I would expect this to print 'Hi'.
Instead it prints 'Hi again'.
https://docs.raku.org/type/Buf#(Blob)_method_decode
<https://docs.raku.org/type/Buf#(Blob)_method_decode>
The decode documentation for Buf only states that 'Applies an encoding
to turn the blob into a Str <https://docs.raku.org/type/Str>; the
encoding will be UTF-8 by default.'
The zero (0) in that Buf should imply an end of string yet decode seems
to want to decode the number of elements instead.
Furthermore, If I 'say $b.decode.chars;' it counts the initial null as
part of Str.
In my mind, that means Str doesn't really mean string.
So the question, how does one ACTUALLY decode what's in a buffer to a
string where it adheres to the semantics of NULL termination for strings
cleanly.
Another question might be, should decode() follow null terminating
semantics instead of number of elements in a given Buf.
Thanks,
Paul
Hi Paul,
This is a module I wrote myself to handle such. Feel
free to carve it up to your linking.
-T
<NativeConvert.pm6>
# unit module NativeConvert;
# NativeConvert.pm6
#`{
Utilities to convert back and forth from C types to Raku types
for use with NativeCall.
To use, place the following at the top of your program:
use NativeCall;
use NativeConstants;
use NativeConvert :to-UTF8-c-str, :to-UTF16-c-str,
:c-to-raku-str, :Raku-uint-to-Cuint32, :Cuint32-to-Raku-uint;
Sub Headers:
sub to-UTF8-c-str( Str $RakuStr ) returns CArray[uint8] is
export( :to-UTF8-c-str )
sub to-UTF16-c-str( Str $RakuStr ) returns CArray[uint16] is
export( :to-UTF16-c-str )
sub c-to-raku-str( BYTES $CStr ) returns Str is export(
:c-to-raku-str )
sub Cuint32-to-Raku-uint( BYTES $CInt ) returns uint32 is export(
:Cuint32-to-Raku-uint )
}
use NativeCall;
use NativeConstants;
sub to-UTF8-c-str( Str $RakuStr ) returns CArray[uint8] is export(
:to-UTF8-c-str ) {
# Converts a UTF8 Raku string into a UTF8 little endian C string
# Note: C Strings are always terminated with a nul. WinAPI will
malfunction without it
my $CStr = CArray[uint8].new();
# $CStr = CArray[uint8].new( $RakuStr.encode.list );
# $CStr[ $CStr.elems ] = 0;
$CStr = CArray[uint8].new( $RakuStr.encode.list, 0 );
return $CStr;
}
sub to-UTF16-c-str( Str $RakuStr ) returns CArray[uint16] is export(
:to-UTF16-c-str ) {
# Converts a UTF8 Raku string into a UTF16 little endian C string
# Note: C Strings are always terminated with a nul. WinAPI will
malfunction without it
my $CStr = CArray[uint16].new();
# $CStr = CArray[uint16].new( $RakuStr.encode.list );
# $CStr[ $CStr.elems ] = 0;
$CStr = CArray[uint16].new( $RakuStr.encode.list, 0 ); # add a nul
to the end
return $CStr;
}
sub c-to-raku-str( BYTES $CStr ) returns Str is export( :c-to-raku-str ) {
# Note: C Strings are always terminated with a nul. This sub will
malfunction without it.
# Note: presumes a UTF16 little endian C String and converts to UTF8
my Str $SubName = &?ROUTINE.name;
my Str $RakuStr = "";
my Str $Msg = "";
my uint32 $CStrElems = $CStr.elems;
# say $CStrElems;
loop (my $i = 0; $i < $CStrElems - 2 ; $i += 2) {
if $CStr[ $i ] == 0 && $CStr[ $i + 1 ] == 0 { last; }
if $i == $CStrElems - 4 {
$Msg = "$SubName ERROR:" ~ " C Strings are required to be
terminated with a nul\n\n" ~
" Returning an empty string\n" ~
" Cowardly existing\n";
exit;
}
# print $CStr[ $i ] ~ "," ~ $CStr[ $i + 1 ] ~ ", ";
$RakuStr ~= chr( $CStr[ $i ] );
}
# say "";
# say "$RakuStr";
return $RakuStr;
}
sub Raku-uint-to-Cuint32( uint32 $RakuInt ) returns CArray[BYTE] is
export( :Raku-uint-to-Cuint32 ) {
# converts a 4 byte, 32 bit Raku integer into a little endian WinApi
C integer
my BYTES $CInt = CArray[BYTE].new();
my BYTE $Byte3 = $RakuInt +& 0x000000FF; # say
$Byte3.base(16);
my BYTE $Byte2 = ( $RakuInt +& 0x0000FF00 ) +> 8; # say
$Byte2.base(16);
my BYTE $Byte1 = ( $RakuInt +& 0x00FF0000 ) +> 16; # say
$Byte1.base(16);
my BYTE $Byte0 = ( $RakuInt +& 0xFF000000 ) +> 24; # say
$Byte0.base(16);
$CInt[ 3 ] = $Byte0;
$CInt[ 2 ] = $Byte1;
$CInt[ 1 ] = $Byte2;
$CInt[ 0 ] = $Byte3;
# say $CInt[3].base(16) ~ ", " ~ $CInt[2].base(16) ~ ", "
~$CInt[1].base(16) ~ ", " ~$CInt[0].base(16) ~ "\n";
# say "$CInt";
# print $CInt ~ "\n";
return $CInt;
}
sub Cuint32-to-Raku-uint( BYTES $CInt ) returns uint32 is export(
:Cuint32-to-Raku-uint ) {
# converts a 4 byte, 32 bit little endian WinApi integer to a Raku
integer
my uint32 $RakuInt = 0;
my BYTE $Byte0 = $CInt[ 0 ] +| 0x0000;
my BYTE $Byte1 = $CInt[ 1 ] +| 0x0000;
my BYTE $Byte2 = $CInt[ 2 ] +| 0x0000;
my BYTE $Byte3 = $CInt[ 3 ] +| 0x0000;
# say $CInt[ 0 ].base(16) ~ ", " ~ $CInt[ 1 ].base(16) ~ ", " ~
$CInt[ 2 ].base(16) ~ ", " ~ $$CInt[ 3 ].base(16);
# say "$CInt" ~ "\n" ~ $Byte3.base(16) ~ ", " ~ $Byte3.base(16) ~ ",
" ~ $Byte3.base(16) ~ ", " ~ $Byte3.base(16);
$RakuInt += $Byte0;
$RakuInt += $Byte1 +< 8;
$RakuInt += $Byte2 +< 16;
$RakuInt += $Byte3 +< 24;
return $RakuInt;
}
</NativeConvert.pm6>
<NativeConstants.pm6>
# unit module NativeConstants;
# NativeConstants.pm6
#`{
Constants used across Windows libraries that use Native Call
Reference to types and values:
http://dsource.org/projects/tango/ticket/820
https://www.originlab.com/doc/OriginC/ref/Registry-Registry
https://github.com/jonasstrandstedt/MinGW/blob/master/MinGW/include/winreg.h#L14
https://stackoverflow.com/questions/321413/lpcstr-lpctstr-and-lptstr
https://docs.microsoft.com/en-us/windows/win32/sysinfo/structure-of-the-registry
perl6 -I. -c NativeConstants.pm6
Test one liner:
perl6 -I. -e "use NativeConstants; say
FORMAT_MESSAGE_ALLOCATE_BUFFER.base(16);"
}
use NativeCall;
# https://docs.microsoft.com/en-us/windows/win32/winprog/windows-data-types
constant BYTE := uint8;
constant WCHAR := uint16;
constant DWORD := uint32;
constant REGSAM := uint32;
constant WCHARS := CArray[WCHAR];
constant BYTES := CArray[BYTE];
constant CPOINTER := uint32;
constant INT = uint32;
constant UINT = uint32;
constant HANDLE = Pointer[void];
constant LPWCTSTR = CArray[WCHAR];
constant NULL = 0x0000;
#
https://docs.microsoft.com/en-us/windows/win32/api/winbase/nf-winbase-formatmessage
constant FORMAT_MESSAGE_ALLOCATE_BUFFER = 0x00000100;
constant FORMAT_MESSAGE_FROM_SYSTEM = 0x00001000;
constant FORMAT_MESSAGE_IGNORE_INSERTS = 0x00000200;
constant FORMAT_MESSAGE_FROM_STRING = 0x00000400;
# https://docs.microsoft.com/en-us/windows/win32/intl/language-identifiers
#
https://docs.microsoft.com/en-us/windows/win32/intl/language-identifier-constants-and-strings
# ^^^^^^ use the values in the third column to the right, not the
first columns on the left
constant LANG_USER_DEFAULT = 0x0000;
constant LANG_SYSTEM_DEFAULT = 0x0002;
#
https://docs.microsoft.com/en-us/windows/win32/debug/system-error-codes--0-499-
constant ERROR_SUCCESS = 0; # Win-Api 0 = success
#
https://docs.microsoft.com/en-us/windows/win32/sysinfo/registry-key-security-and-access-rights
# say KEY_ALL_ACCESS.value.base(16);
# 0xF003F
enum WinRegAccessRights (
# constant KEY_READ = 0x20019;
# constant KEY_SET_VALUE = 0x0002;
KEY_ALL_ACCESS => 0xF003F; # Combines the
STANDARD_RIGHTS_REQUIRED, KEY_QUERY_VALUE, KEY_SET_VALUE,
KEY_CREATE_SUB_KEY,
# KEY_ENUMERATE_SUB_KEYS,
KEY_NOTIFY, and KEY_CREATE_LINK access rights.
KEY_CREATE_LINK => 0x0020; # Reserved for system use.
KEY_CREATE_SUB_KEY => 0x0004; # Required to create a subkey of a
registry key.
KEY_ENUMERATE_SUB_KEYS => 0x0008; # Required to enumerate the
subkeys of a registry key.
KEY_EXECUTE => 0x20019; # Equivalent to KEY_READ.
KEY_NOTIFY => 0x0010; # Required to request change
notifications for a registry key or for subkeys of a registry key.
KEY_QUERY_VALUE => 0x0001; # Required to query the values of
a registry key.
KEY_READ => 0x20019; # Combines the
STANDARD_RIGHTS_READ, KEY_QUERY_VALUE, KEY_ENUMERATE_SUB_KEYS, and
KEY_NOTIFY values.
KEY_SET_VALUE => 0x0002; # Required to create, delete, or
set a registry value.
KEY_WOW64_32KEY => 0x0200; # Indicates that an application on
64-bit Windows should operate on the 32-bit registry view.
# This flag is ignored by 32-bit
Windows. For more information, see Accessing an Alternate Registry
# View. This flag must be
combined using the OR operator with the other flags in this table
# that either query or access
registry values. Windows 2000: This flag is not supported.
KEY_WOW64_64KEY => 0x0100; # Indicates that an application on
64-bit Windows should operate on the 64-bit registry view. This
# flag is ignored by 32-bit
Windows. For more information, see Accessing an Alternate Registry View.
# This flag must be combined using
the OR operator with the other flags in this table that either
# query or access registry values.
Windows 2000: This flag is not supported.
KEY_WRITE => 0x20006; # Combines the
STANDARD_RIGHTS_WRITE, KEY_SET_VALUE, and KEY_CREATE_SUB_KEY access rights.
);
# say HKEY_CURRENT_USER.value.base(16);
# 0x80000001
enum WinRegHives (
HKEY_CLASSES_ROOT => 0x80000000;
HKEY_CURRENT_USER => 0x80000001;
HKEY_LOCAL_MACHINE => 0x80000002;
HKEY_USERS => 0x80000003;
HKEY_PERFORMANCE_DATA => 0x80000004;
HKEY_CURRENT_CONFIG => 0x80000005;
HKEY_DYN_DATA => 0x80000006;
);
enum ValueNames (
REG_NONE => 0; # No value type
REG_SZ => 1; # Unicode nul terminated string
REG_EXPAND_SZ => 2; # Unicode nul terminated
string = (with environment variable references)
REG_BINARY => 3; # Free form binary
REG_DWORD => 4; # 32-bit number
REG_DWORD_LITTLE_ENDIAN => 4; # 32-bit number = (same as
REG_DWORD)
REG_DWORD_BIG_ENDIAN => 5; # 32-bit number
REG_LINK => 6; # Symbolic Link = (unicode)
REG_MULTI_SZ => 7; # Multiple Unicode strings
REG_RESOURCE_LIST => 8; # Resource list in the
resource map
REG_FULL_RESOURCE_DESCRIPTOR => 9; # Resource list in the
hardware description
REG_RESOURCE_REQUIREMENTS_LIST => 10;
REG_QWORD => 11; # 64-bit number
REG_QWORD_LITTLE_ENDIAN => 11; # same as REG_QWORD
);
</NativeConstants.pm6>