On 4/24/2015 11:23 AM, Dianne Skoll wrote:
On Fri, 24 Apr 2015 16:20:41 +0100
Paul Stead <paul.st...@zeninternet.co.uk> wrote:

I've had thoughts of an extension which calculates the number of IP
addresses specified in an SPF record, then calculating the % of
world-wide addresses this SPF declares... I don't seem to be able to
bend the Perl SPF module to spit out any numbers etc so seems it would
have to be coded separately
Someone sent me off-list some Perl that does that.  I haven't looked closely
at it.  If that person is on this list, maybe he'll send it on-list?

Regards,

Dianne.
I suppose it's safe enough to post publicly. Be aware that it's just a proof of concept and not tested thoroughly enough to guarantee it's correct, performant, or even if it terminates in all cases.

Theoretically, it does the following
    detect +all and ?all (both of which specify to deliver without marking)
    detect coverage of the IPv4 and v6 address spaces (by /16)
    detect when followed records exceed a max depth
    detect when an SPF record loops on itself
    detect uninterpolated exists
    detect syntax errors in exists macros

It also stores IP coverage as a bitmask, so it should measure somewhere around 16k - 20k of memory consumption as well. Script is attached, anyone can feel free to adapt it for SA.
use strict;
use warnings;

use Net::DNS;
use Net::IP;

# fetch spf record for domain
my $argument_domain = $ARGV[0];

print &check_domain($argument_domain) ."\n";

# returns one of "not useless", "useless - $reason", "gave up - $reason", 
"invalid - $reason"
# for SPF syntax, see http://www.openspf.org/SPF_Record_Syntax
# for macro syntax, see http://www.openspf.org/RFC_4408#macros
sub check_domain {
  my ($domain, %params) = @_;
  my $dns = Net::DNS::Resolver->new;
  my $query = $dns->search($domain, 'TXT') or die "Error performing TXT query 
for $domain! ". $dns->errorstring;

  if (not defined $params{'domains_seen'}) {
    $params{'domains_seen'} = [];
  }

  if (grep {$_ eq $domain} @{$params{'domains_seen'}}) {
    return "invalid - detected domain loop beginning with $domain";
  }

  push(@{$params{'domains_seen'}}, $domain);

  $params{'iteration'} ||= 1;
  $params{'max_iterations'} ||= 40;

  # build array of /16s for ip range masking
  # an spf record is useless if it allows at least one ip address in every /16
  # this is a messy heuristic to avoid resource exhaustion, especially with ipv6
  # array is 2 ** 16 flags stored as 32-bit bitmasks (each mask holding 2 ** 5 
flags)
  if (not defined $params{'ipv4_coverage'}) {
    $params{'ipv4_coverage'} = [];
    $#{$params{'ipv4_coverage'}} = 2 ** (16 - 5) - 1;
  }
  if (not defined $params{'ipv6_coverage'}) {
    $params{'ipv6_coverage'} = [];
    $#{$params{'ipv6_coverage'}} = 2 ** (16 - 5) - 1;
  }

  if ($params{'iteration'} > $params{'max_iterations'}) {
    return "gave up - max dns query iteration limit ($params{'max_iterations'}) 
reached";
  }

  foreach my $result ($query->answer) {
    next unless $result->type eq 'TXT';
    my $spf_line = $result->txtdata;

    if ($spf_line =~ /^v=spf[12]/i) {
      # split into clauses
      my @clauses = split / /, $spf_line;

      # first, search for replace and operate on that instead
      foreach my $clause (@clauses) {
        if ($clause =~ /^redirect=(.*)$/) {
          my $domain = $1;
          if ($domain =~ /%[{_-]/) {
            return "gave up - macros in redirect modifier not supported 
($domain)";
          } elsif ($domain =~ /%[^{_%-]/) {
            return "invalid - syntax error in macro interpolation for $domain";
          } else {
            # format escaped percent literals
            $domain =~ s/%%/%/g;

            # return recursed result
            return &check_domain($domain, %params, 
iteration=>$params{'iteration'} + 1);
          }
        }
      }

      foreach my $clause (@clauses) {
        # for each clause that is pass or neutral

        # clauses default to +
        # + (pass) and ? (neutral) both specify to deliver mail
        # - (fail) and ~ (soft fail) specify to deliver or mark
        # we don't care about - and ~ results because they can't be used to 
falsely improve score
        next if $clause =~ /^[-~]/;

        # if ip address or range, add to ip coverage
        # track ipv4 and ipv6 separately by /16
        if ($clause =~ /^.?ip4:(.*)/) {
          my $address = $1;
          &mark_ip_ranges($params{'ipv4_coverage'}, $params{'ipv6_coverage'}, 
$address);
        } elsif ($clause =~ /^.?ip6:(.*)/) {
          my $address = $1;
          &mark_ip_ranges($params{'ipv4_coverage'}, $params{'ipv6_coverage'}, 
$address);
        } elsif ($clause =~ /^.?all/) {
          # if +all, rule is clearly useless
          return "useless - use of universal pass rule $clause";
        } elsif ($clause =~ /^.?exists:(.*)/) {
          my $exists_domain = $1;

          # if using an exists rule without macros, rule is clearly useless
          if ($exists_domain !~ /%{/) {
            return "useless - use of uninterpolated exists rule $clause";
          }
        } elsif ($clause =~ /^.?a(.*)/) {
          # if using a, perform lookup and add to ip coverage in like and kind 
to ipv4
          my ($a_domain, $mask);
          $a_domain = $1 || $domain;
          # perform lookup for AAAA as well and add to ip coverage in like and 
kind to ipv6
          ($a_domain, $mask) = &parse_domain($a_domain);
          my @ips = (&dns_lookup($a_domain, 'A'), &dns_lookup($a_domain, 
'AAAA'));
          foreach my $ip (@ips) {
            &mark_ip_ranges($params{'ipv4_coverage'}, $params{'ipv6_coverage'}, 
"$ip/$mask");
          }
        } elsif ($clause =~ /^.?mx(.*)/) {
          # if using mx, perform lookup and add to ip coverage
          my ($a_domain, $mask);
          $a_domain = $1 || $domain;
          ($a_domain, $mask) = &parse_domain($a_domain);
          my @ips = &dns_lookup($a_domain, 'MX');
          foreach my $ip (@ips) {
            &mark_ip_ranges($params{'ipv4_coverage'}, $params{'ipv6_coverage'}, 
"$ip/$mask");
          }
        } else {
        # if using ptr, might be complicated
        # if using include, recurse with depth limit 10
        }
      }
    }
  }

  for (my $i = 0; $i < scalar (@{$params{'ipv4_coverage'}}); $i++) {
    my $mask = $params{'ipv4_coverage'}->[$i] || 0;
    if ($mask != 0) {
      warn sprintf("%d - %x\n", $i, $mask);
    }
  }

  return "$domain - not useless";
}

sub dns_lookup {
  my ($domain, $type) = @_;

  my $dns = Net::DNS::Resolver->new;
  my $query = $dns->search($domain, $type);
  if ($query) {
    return $query->answer;
  } else {
    warn "Error performing $type query for $domain! ". $dns->errorstring;
  }
}

sub parse_domain {
  my ($domain) = @_;

  $domain =~ s/^://;
  my $mask = '';
  if ($domain =~ /\/(.*)$/) {
    $mask = $2;
    $domain =~ s/\/.*$//;
  }

  # if exact match, change the subnet to the most precise we can track
  if ($mask ne '') {
    $mask = 16;
  }

  return ($domain, $mask);
}

sub mark_ip_ranges {
  my ($ip_range4, $ip_range6, $ip_string) = @_;
  my ($ip_range, $ip, @ip_ranges, $range_index, $ip_base, $ip_mask, 
$ip_range_count, $i, $ip_interval, $shift_factor);

  # parse ip string into number and /x mask - consider 
http://search.cpan.org/~manu/Net-IP-1.26/IP.pm
  $ip = new Net::IP($ip_string) or return;

  $ip_mask = $ip->prefixlen;

  # if /x mask is more than 16, set to 16
  if ($ip_mask >= 16) {
    $ip_range_count = 1;
  } else {
    $ip_range_count = 2 ** (16 - $ip_mask);
  }

  # set the interval we iterate by to list the /16s, and the number of bits to 
shift by to index into the mask of /16s
  if ($ip->version() == 6) {
    $ip_interval = new Net::IP("1::0");
    $shift_factor = 128 - 16;
    $ip_range = $ip_range6;
  } else {
    $ip_interval = new Net::IP("0.1.0.0");
    $shift_factor = 32 - 16;
    $ip_range = $ip_range4;
  }

  # ranges = $ip_range_count consecutive ranges beginning with $ip->ip()
  @ip_ranges = ();
  for ($i = 0; $i < $ip_range_count; $i++) {
    $range_index = $ip->intip()->copy()->brsft($shift_factor);
    push(@ip_ranges, $range_index);
    $ip = $ip->binadd($ip_interval);
  }

  # for each range, mark ip range bit
  for $range_index (@ip_ranges) {
    &mark_ip_range_bit($ip_range, $range_index);
  }
}

sub mark_ip_range_bit {
  my ($ip_range, $index) = @_;

  my $range_index = int($index / 32);
  my $mask_index = $index % 32;

  my $mask_apply = 1 << $mask_index;

  $ip_range->[$range_index] ||= 0;
  $ip_range->[$range_index] |= $mask_apply;
}

sub is_ip_range_full {
  my ($ip_range);

  foreach my $mask (@$ip_range) {
    if ($mask != 0xffffffff) {
      return 0;
    }
  }

  return 1;
}

Reply via email to