Sometimes you want to write a patch just for the subject line. :) Here's the litany of lies:
* $^O on DOS is "dos" not "MSDOS" so its likely File::Basename never worked right on DOS. I added DOS to its list of recognized types and noted in the docs that MSDOS is for bug compat. * The docs said dirname() only acted like dirname(1) on Unix and DOS. This is a lie. It acts like dirname(1) everywhere but VMS (where it makes no sense to do so because there's no ambiguity between file and directory) and Amiga (which I suspect to simply be an oversight but I left it). * Epoc is another recognized OS. Other doc changes: * Document and test that you can call fileparse_set_fstype() as a getter. * Simplify the "what's case-insensitive" docs. Its just all non-Unix. Code changes: * Normalize $Fileparse_fstype in fileparse_set_fstype() so the code doesn't have to do regexes all over the place. This also lets us have a nice list of recognized types so the docs and code don't fall out of sync. * Don't bother returning $Fileparse_igncase when fileparse_set_fstype() is called in list context. It was never documented that it did so, its never used internally and its only useful internally. * Simplify the VMS "unix style" special case in fileparse(). * The test was using 'no_plan' which will cause t/TEST to puke. I think that wraps up File::Basename. -- Michael G Schwern [EMAIL PROTECTED] http://www.pobox.com/~schwern Reality is that which, when you stop believing in it, doesn't go away. -- Phillip K. Dick
--- lib/File/Basename.pm 2005/07/06 15:46:49 1.1 +++ lib/File/Basename.pm 2005/07/06 16:05:44 @@ -80,10 +80,9 @@ # On Unix returns ("baz", "/foo/bar", ".txt") fileparse("/foo/bar/baz", qr/\.[^.]*/); -If type is one of "VMS", "MSDOS", "MacOS", "AmigaOS", "OS2", "MSWin32" -or "RISCOS" (see C<fileparse_set_fstype()>) then the pattern matching -for suffix removal is performed case-insensitively, since those -systems are not case-sensitive when opening existing files. +If type is non-Unix (see C<fileparse_set_fstype()>) then the pattern +matching for suffix removal is performed case-insensitively, since +those systems are not case-sensitive when opening existing files. You are guaranteed that C<$directories . $filename . $suffix> will denote the same location as the original $path. @@ -93,41 +92,49 @@ sub fileparse { my($fullname,@suffices) = @_; + unless (defined $fullname) { require Carp; Carp::croak("fileparse(): need a valid pathname"); } - my($fstype,$igncase) = ($Fileparse_fstype, $Fileparse_igncase); - my($dirpath,$tail,$suffix,$basename); + + my $orig_type = ''; + my($type,$igncase) = ($Fileparse_fstype, $Fileparse_igncase); + my($taint) = substr($fullname,0,0); # Is $fullname tainted? - if ($fstype =~ /^VMS/i) { - if ($fullname =~ m#/#) { $fstype = '' } # We're doing Unix emulation - else { - ($dirpath,$basename) = ($fullname =~ /^(.*[:>\]])?(.*)/s); - $dirpath ||= ''; # should always be defined - } + if ($type eq "VMS" and $fullname =~ m{/} ) { + # We're doing Unix emulation + $orig_type = $type; + $type = 'Unix'; } - if ($fstype =~ /^MS(DOS|Win32)|epoc/i) { + + my($dirpath, $basename); + + if (grep { $type eq $_ } qw(MSDOS DOS MSWin32 Epoc)) { ($dirpath,$basename) = ($fullname =~ /^((?:.*[:\\\/])?)(.*)/s); $dirpath .= '.\\' unless $dirpath =~ /[\\\/]\z/; } - elsif ($fstype =~ /^os2/i) { + elsif ($type eq "OS2") { ($dirpath,$basename) = ($fullname =~ m#^((?:.*[:\\/])?)(.*)#s); $dirpath = './' unless $dirpath; # Can't be 0 $dirpath .= '/' unless $dirpath =~ m#[\\/]\z#; } - elsif ($fstype =~ /^MacOS/si) { + elsif ($type eq "MacOS") { ($dirpath,$basename) = ($fullname =~ /^(.*:)?(.*)/s); $dirpath = ':' unless $dirpath; } - elsif ($fstype =~ /^AmigaOS/i) { + elsif ($type eq "AmigaOS") { ($dirpath,$basename) = ($fullname =~ /(.*[:\/])?(.*)/s); $dirpath = './' unless $dirpath; } - elsif ($fstype !~ /^VMS/i) { # default to Unix + elsif ($type eq 'VMS' ) { + ($dirpath,$basename) = ($fullname =~ /^(.*[:>\]])?(.*)/s); + $dirpath ||= ''; # should always be defined + } + else { # Default to Unix semantics. ($dirpath,$basename) = ($fullname =~ m#^(.*/)?(.*)#s); - if ($^O eq 'VMS' and $fullname =~ m:^(/[^/]+/000000(/|$))(.*):) { + if ($orig_type eq 'VMS' and $fullname =~ m:^(/[^/]+/000000(/|$))(.*):) { # dev:[000000] is top of VMS tree, similar to Unix '/' # so strip it off and treat the rest as "normal" my $devspec = $1; @@ -138,7 +145,9 @@ } $dirpath = './' unless $dirpath; } + + my($tail, $suffix); if (@suffices) { $tail = ''; foreach $suffix (@suffices) { @@ -191,15 +200,14 @@ expect. To be safe, if you want the directory name of a path use C<fileparse()>. - # On all but Unix and MSDOS +Only on VMS (where there is no ambiguity between the file and directory +portions of a path) and AmigaOS (possibly due to an implementation quirk in +this module) does dirname() work like fileparse($path) returning just the +$directories. + + # On VMS and AmigaOS my $directories = dirname($path); -On all system types but Unix and MSDOS this works just like -C<fileparse($path)> but returning just the $directories. - - # On Unix and MSDOS - my $path_one_level_up = dirname($path); - When using Unix or MSDOS syntax this emulates the C<dirname(1)> shell function which is subtly different from how C<fileparse()> works. It returns all but the last level of a file path even if the last level is clearly a directory. @@ -226,34 +234,36 @@ sub dirname { - my($fstype) = $Fileparse_fstype; + my $path = shift; + + my($type) = $Fileparse_fstype; - if( $fstype =~ /VMS/i and $_[0] =~ m{/} ) { + if( $type eq 'VMS' and $path =~ m{/} ) { # Parse as Unix local($File::Basename::Fileparse_fstype) = ''; - return dirname(@_); + return dirname($path); } - my($basename,$dirname) = fileparse($_[0]); + my($basename, $dirname) = fileparse($path); - if ($fstype =~ /VMS/i) { + if ($type eq 'VMS') { $dirname ||= $ENV{DEFAULT}; } - elsif ($fstype =~ /MacOS/i) { + elsif ($type eq 'MacOS') { if( !length($basename) && $dirname !~ /^[^:]+:\z/) { $dirname =~ s/([^:]):\z/$1/s; ($basename,$dirname) = fileparse $dirname; } $dirname .= ":" unless $dirname =~ /:\z/; } - elsif ($fstype =~ /MS(DOS|Win32)|os2/i) { + elsif (grep { $type eq $_ } qw(MSDOS DOS MSWin32 OS2)) { $dirname =~ s/([^:])[\\\/]*\z/$1/; unless( length($basename) ) { ($basename,$dirname) = fileparse $dirname; $dirname =~ s/([^:])[\\\/]*\z/$1/; } } - elsif ($fstype =~ /AmigaOS/i) { + elsif ($type eq 'AmigaOS') { if ( $dirname =~ /:\z/) { return $dirname } chop $dirname; $dirname =~ s#[^:/]+\z## unless length($basename); @@ -272,15 +282,17 @@ =item C<fileparse_set_fstype> - my $previous_fstype = fileparse_set_fstype($type); + my $type = fileparse_set_fstype(); + my $previous_type = fileparse_set_fstype($type); Normally File::Basename will assume a file path type native to your current operating system (ie. /foo/bar style on Unix, \foo\bar on Windows, etc...). With this function you can override that assumption. -Valid $types are "VMS", "MSDOS", "MacOS", "AmigaOS", "OS2", "RISCOS", -"MSWin32" and "Unix" (case-insensitive). If an unrecognized $type is -given Unix semantics will be assumed. +Valid $types are "MacOS", "VMS", "AmigaOS", "OS2", "RISCOS", +"MSWin32", "DOS" (also "MSDOS" for backwards bug compatibility), +"Epoc" and "Unix" (all case-insensitive). If an unrecognized $type is +given "Unix" will be assumed. If you've selected VMS syntax, and the file specification you pass to one of these routines contains a "/", they assume you are using Unix @@ -291,14 +303,25 @@ =cut +my @Ignore_Case = qw(MacOS VMS AmigaOS OS2 RISCOS MSWin32 MSDOS DOS Epoc); +my @Types = (@Ignore_Case, qw(Unix)); sub fileparse_set_fstype { - my @old = ($Fileparse_fstype, $Fileparse_igncase); - if (@_) { - $Fileparse_fstype = $_[0]; - $Fileparse_igncase = ($_[0] =~ /^(?:MacOS|VMS|AmigaOS|os2|RISCOS|MSWin32|MSDOS)/i); - } - wantarray ? @old : $old[0]; + my $old = $Fileparse_fstype; + + if (@_) { + my $new_type = shift; + + $Fileparse_fstype = 'Unix'; # default + foreach my $type (@Types) { + $Fileparse_fstype = $type if $new_type =~ /^$type/i; + } + + $Fileparse_igncase = + (grep $Fileparse_fstype eq $_, @Ignore_Case) ? 1 : 0; + } + + return $old; } --- lib/File/Basename.t 2005/07/06 15:46:56 1.1 +++ lib/File/Basename.t 2005/07/06 16:08:59 @@ -5,7 +5,7 @@ @INC = '../lib'; } -use Test::More 'no_plan'; +use Test::More tests => 53; BEGIN { use_ok 'File::Basename' } @@ -15,6 +15,7 @@ ### Testing Unix { ok length fileparse_set_fstype('unix'), 'set fstype to unix'; + is( fileparse_set_fstype(), 'Unix', 'get fstype' ); my($base,$path,$type) = fileparse('/virgil/aeneid/draft.book7', qr'\.book\d+'); @@ -31,7 +32,7 @@ ### Testing VMS { - is(fileparse_set_fstype('VMS'), 'unix', 'set fstype to VMS'); + is(fileparse_set_fstype('VMS'), 'Unix', 'set fstype to VMS'); my($base,$path,$type) = fileparse('virgil:[aeneid]draft.book7', qr{\.book\d+}); @@ -52,9 +53,9 @@ } -### Testing MSDOS +### Testing DOS { - is(fileparse_set_fstype('MSDOS'), 'VMS', 'set fstype to MSDOS'); + is(fileparse_set_fstype('DOS'), 'VMS', 'set fstype to DOS'); my($base,$path,$type) = fileparse('C:\\virgil\\aeneid\\draft.book7', '\.book\d+'); @@ -67,8 +68,13 @@ is(dirname('A:\\'), 'A:\\'); is(dirname('arma\\'), '.'); - # Yes "/" is a legal path separator under MSDOS + # Yes "/" is a legal path separator under DOS is(basename("lib/File/Basename.pm"), "Basename.pm"); + + # $^O for DOS is "dos" not "MSDOS" but "MSDOS" is left in for + # backward bug compat. + is(fileparse_set_fstype('MSDOS'), 'DOS'); + is( dirname("\\foo\\bar\\baz"), "\\foo\\bar" ); } @@ -101,7 +107,7 @@ ### extra tests for a few specific bugs { - fileparse_set_fstype 'MSDOS'; + fileparse_set_fstype 'DOS'; # perl5.003_18 gives C:/perl/.\ is((fileparse 'C:/perl/lib')[1], 'C:/perl/'); # perl5.003_18 gives C:\perl\