lør, 07 03 2009 kl. 22:57 +0100, skrev Søren Hauberg:
> > Two ways of deletion is supported. If, e.g, 3 dimensional vector is
> > observed where one of the values are NA, then
> > 
> >   1) the entire observation is deleted, or
> > 
> >   2) only the actual NA is deleted.

The attached version of 'cov' implements this behaviour. I guess shows a
fairly general way of dealing with NA's for things that are a little bit
more complex than 'mean'. It comes with a performance hit, but only if
the user actively selects to delete NA's. I don't think this would be
too bad.

Søren
## Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2002, 2004, 2005,
##               2006, 2007 Kurt Hornik
##
## This file is part of Octave.
##
## Octave is free software; you can redistribute it and/or modify it
## under the terms of the GNU General Public License as published by
## the Free Software Foundation; either version 3 of the License, or (at
## your option) any later version.
##
## Octave is distributed in the hope that it will be useful, but
## WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
## General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with Octave; see the file COPYING.  If not, see
## <http://www.gnu.org/licenses/>.

## -*- texinfo -*-
## @deftypefn {Function File} {} cov (@var{x}, @var{y})
## Compute covariance.
##
## If each row of @var{x} and @var{y} is an observation and each column is
## a variable, the (@var{i}, @var{j})-th entry of
## @code{cov (@var{x}, @var{y})} is the covariance between the @var{i}-th
## variable in @var{x} and the @var{j}-th variable in @var{y}.
## @iftex
## @tex
## $$
## \sigma_{ij} = {1 \over N-1} \sum_{i=1}^N (x_i - \bar{x})(y_i - \bar{y})
## $$
## where $\bar{x}$ and $\bar{y}$ are the mean values of $x$ and $y$.
## @end tex
## @end iftex
## If called with one argument, compute @code{cov (@var{x}, @var{x})}.
## @end deftypefn

## Author: KH <[email protected]>
## Description: Compute covariances

function c = cov (x, y, method = "use all")

  if (nargin < 1 || nargin > 3)
    print_usage ();
  endif
  
  if (nargin == 1)
    two_inputs = false;
  elseif (nargin == 2 && isscalar (y))
    method = y;
    two_inputs = false;
  else
    two_inputs = true;
  endif
  
  if (! ischar (method))
    error ("cov: method must be a string");
  endif
  
  if (rows (x) == 1)
    x = x';
  endif
  n = rows (x);

  switch (lower (method))
    case "use all"
      if (two_inputs)
        if (rows (y) == 1)
          y = y';
        endif
        if (rows (y) != n)
          error ("cov: x and y must have the same number of observations");
        endif
        x = x - ones (n, 1) * sum (x) / n;
        y = y - ones (n, 1) * sum (y) / n;
        c = conj (x' * y / (n - 1));
      else
        x = x - ones (n, 1) * sum (x) / n;
        c = conj (x' * x / (n - 1));
      endif
    case "use complete"
      if (two_inputs)
        [rx, cx] = find (isna (x));
        [ry, cy] = find (isna (y));
        r = unique ([rx; ry]);
        x (r, :) = [];
        y (r, :) = [];
        c = cov (x, y);
      else
        [r, c] = find (isna (x));
        x (r, :) = [];
        c = cov (x);
      endif
    case "use pairs"
      dim = columns (x);
      c = zeros (dim, class (x));
      if (two_inputs)
        for i = 1:dim
          for j = 1:i
            s = cov (x (:, i), y (:, j), "use complete");
            c (i, j) = c (j, i) = s;
          endfor
        endfor
      else
        for i = 1:dim
          for j = 1:i
            s = cov (x (:, i), x (:, j), "use complete");
            c (i, j) = c (j, i) = s;
          endfor
        endfor
      endif
  endswitch

endfunction

%!test
%! x = rand (10);
%! cx1 = cov (x);
%! cx2 = cov (x, x);
%! assert(size (cx1) == [10, 10] && size (cx2) == [10, 10] && norm(cx1-cx2) < 1e1*eps);

%!error cov ();

%!error cov (1, 2, 3);

------------------------------------------------------------------------------
Open Source Business Conference (OSBC), March 24-25, 2009, San Francisco, CA
-OSBC tackles the biggest issue in open source: Open Sourcing the Enterprise
-Strategies to boost innovation and cut costs with open source participation
-Receive a $600 discount off the registration fee with the source code: SFAD
http://p.sf.net/sfu/XcvMzF8H
_______________________________________________
Octave-dev mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/octave-dev

Reply via email to