> > > I updated the patch to cover semi and anti joins with eqjoinsel_semi().
> > > I think it is better than returning a constant.
> >
> > What you did there is utterly unacceptable from a modularity standpoint;
> > and considering that the values will be nowhere near right, the argument
> > that "it's better than returning a constant" seems pretty weak.  I think
> > you should just take that out again.
> 
> I will try to come up with a better, data type specific implementation
> in a week.

New version with semi join estimation function attached.
diff --git a/src/backend/utils/adt/network_selfuncs.c b/src/backend/utils/adt/network_selfuncs.c
index d0d806f..f6cb2f6 100644
--- a/src/backend/utils/adt/network_selfuncs.c
+++ b/src/backend/utils/adt/network_selfuncs.c
@@ -1,32 +1,819 @@
 /*-------------------------------------------------------------------------
  *
  * network_selfuncs.c
  *	  Functions for selectivity estimation of inet/cidr operators
  *
- * Currently these are just stubs, but we hope to do better soon.
+ * Estimates are based on null fraction, distinct value count, most common
+ * values, and histogram of inet/cidr datatypes.
  *
  * Portions Copyright (c) 1996-2014, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  *
  * IDENTIFICATION
  *	  src/backend/utils/adt/network_selfuncs.c
  *
  *-------------------------------------------------------------------------
  */
 #include "postgres.h"
 
+#include <math.h>
+
+#include "access/htup_details.h"
+#include "catalog/pg_collation.h"
+#include "catalog/pg_operator.h"
+#include "catalog/pg_statistic.h"
+#include "utils/lsyscache.h"
 #include "utils/inet.h"
+#include "utils/selfuncs.h"
+
+
+/* Default selectivity constant for the inet overlap operator */
+#define DEFAULT_OVERLAP_SEL 0.01
+
+/* Default selectivity constant for the other operators */
+#define DEFAULT_INCLUSION_SEL 0.005
+
+/* Default selectivity for given operator */
+#define DEFAULT_SEL(operator) \
+	((operator) == OID_INET_OVERLAP_OP ? \
+			DEFAULT_OVERLAP_SEL : DEFAULT_INCLUSION_SEL)
 
+static Selectivity networkjoinsel_inner(Oid operator,
+					VariableStatData *vardata1, VariableStatData *vardata2);
+static Selectivity networkjoinsel_semi(Oid operator,
+					VariableStatData *vardata1, VariableStatData *vardata2);
+static short int inet_opr_order(Oid operator);
+static Selectivity inet_his_inclusion_selec(Datum *values, int nvalues,
+					Datum *constvalue, short int opr_order);
+static Selectivity inet_mcv_join_selec(Datum *mcv1_values,
+					float4 *mcv1_numbers, int mcv1_nvalues, Datum *mcv2_values,
+					float4 *mcv2_numbers, int mcv2_nvalues, Oid operator,
+					Selectivity *max_selec_p);
+static Selectivity inet_mcv_his_selec(Datum *mcv_values, float4 *mcv_numbers,
+					int mcv_nvalues, Datum *his_values, int his_nvalues,
+					short int opr_order, Selectivity *max_selec_p);
+static Selectivity inet_his_inclusion_join_selec(Datum *his1_values,
+					int his1_nvalues, Datum *his2_values, int his2_nvalues,
+					short int opr_order);
+static Selectivity inet_semi_join_selec(bool mcv2_exists, Datum *mcv2_values,
+					int mcv2_nvalues, bool his2_exists, Datum *his2_values,
+					int his2_nvalues, double his2_weight, Datum *constvalue,
+					FmgrInfo *proc, short int opr_order);
+static short int inet_inclusion_cmp(inet *left, inet *right,
+					short int opr_order);
+static short int inet_masklen_inclusion_cmp(inet *left, inet *right,
+					short int opr_order);
+static short int inet_his_match_divider(inet *boundary, inet *query,
+					short int opr_order);
 
+/*
+ * Selectivity estimation for the subnet inclusion operators
+ */
 Datum
 networksel(PG_FUNCTION_ARGS)
 {
-	PG_RETURN_FLOAT8(0.001);
+	PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
+	Oid			operator = PG_GETARG_OID(1);
+	List	   *args = (List *) PG_GETARG_POINTER(2);
+	int			varRelid = PG_GETARG_INT32(3),
+				his_nvalues;
+	VariableStatData vardata;
+	Node	   *other;
+	bool		varonleft;
+	Selectivity selec,
+				max_mcv_selec;
+	Datum		constvalue,
+			   *his_values;
+	Form_pg_statistic stats;
+	double		nullfrac;
+	FmgrInfo	proc;
+
+	/*
+	 * If expression is not (variable op something) or (something op
+	 * variable), then punt and return a default estimate.
+	 */
+	if (!get_restriction_variable(root, args, varRelid,
+								  &vardata, &other, &varonleft))
+		PG_RETURN_FLOAT8(DEFAULT_SEL(operator));
+
+	/*
+	 * Can't do anything useful if the something is not a constant, either.
+	 */
+	if (!IsA(other, Const))
+	{
+		ReleaseVariableStats(vardata);
+		PG_RETURN_FLOAT8(DEFAULT_SEL(operator));
+	}
+
+	/* All of the subnet inclusion operators are strict. */
+	if (((Const *) other)->constisnull)
+	{
+		ReleaseVariableStats(vardata);
+		PG_RETURN_FLOAT8(0.0);
+	}
+
+	if (!HeapTupleIsValid(vardata.statsTuple))
+	{
+		ReleaseVariableStats(vardata);
+		PG_RETURN_FLOAT8(DEFAULT_SEL(operator));
+	}
+
+	constvalue = ((Const *) other)->constvalue;
+	stats = (Form_pg_statistic) GETSTRUCT(vardata.statsTuple);
+	nullfrac = stats ? stats->stanullfrac : 0.0;
+
+	fmgr_info(get_opcode(operator), &proc);
+	selec = mcv_selectivity(&vardata, &proc, constvalue, varonleft,
+							&max_mcv_selec);
+
+	if (get_attstatsslot(vardata.statsTuple,
+						 vardata.atttype, vardata.atttypmod,
+						 STATISTIC_KIND_HISTOGRAM, InvalidOid,
+						 NULL,
+						 &his_values, &his_nvalues,
+						 NULL, NULL))
+	{
+		selec += (1.0 - nullfrac - max_mcv_selec) *
+			inet_his_inclusion_selec(his_values, his_nvalues, &constvalue,
+									 varonleft ? inet_opr_order(operator) :
+												 inet_opr_order(operator) * -1);
+
+		free_attstatsslot(vardata.atttype, his_values, his_nvalues, NULL, 0);
+	}
+	else if (max_mcv_selec == 0.0)
+		selec = (1.0 - nullfrac) * DEFAULT_SEL(operator);
+
+	/* Result should be in range, but make sure... */
+	CLAMP_PROBABILITY(selec);
+
+	ReleaseVariableStats(vardata);
+	PG_RETURN_FLOAT8(selec);
 }
 
+/*
+ * Join selectivity estimation for the subnet inclusion operators
+ *
+ * This function has the same structure of eqjoinsel() on selfuncs.c.
+ */
 Datum
 networkjoinsel(PG_FUNCTION_ARGS)
 {
-	PG_RETURN_FLOAT8(0.001);
+	PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
+	Oid			operator = PG_GETARG_OID(1);
+	List	   *args = (List *) PG_GETARG_POINTER(2);
+#ifdef NOT_USED
+	JoinType	jointype = (JoinType) PG_GETARG_INT16(3);
+#endif
+	SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) PG_GETARG_POINTER(4);
+	double		selec;
+	VariableStatData vardata1;
+	VariableStatData vardata2;
+	bool		join_is_reversed;
+
+	get_join_variables(root, args, sjinfo,
+					   &vardata1, &vardata2, &join_is_reversed);
+
+	switch (sjinfo->jointype)
+	{
+		case JOIN_INNER:
+		case JOIN_LEFT:
+		case JOIN_FULL:
+
+			/*
+			 * Selectivity for left join is not exactly same as inner join,
+			 * but is neglected.
+			 */
+			if (!join_is_reversed)
+				selec = networkjoinsel_inner(operator, &vardata1, &vardata2);
+			else
+				selec = networkjoinsel_inner(get_commutator(operator),
+											 &vardata2, &vardata1);
+			break;
+		case JOIN_SEMI:
+		case JOIN_ANTI:
+
+			if (!join_is_reversed)
+				selec = networkjoinsel_semi(operator, &vardata1, &vardata2);
+			else
+				selec = networkjoinsel_semi(get_commutator(operator),
+											&vardata2, &vardata1);
+			break;
+		default:
+			/* other values not expected here */
+			elog(ERROR, "unrecognized join type: %d",
+				 (int) sjinfo->jointype);
+			selec = 0;			/* keep compiler quiet */
+			break;
+	}
+
+	ReleaseVariableStats(vardata1);
+	ReleaseVariableStats(vardata2);
+
+	CLAMP_PROBABILITY(selec);
+
+	PG_RETURN_FLOAT8((float8) selec);
+}
+
+/*
+ * Inner join selectivity estimation for the subnet inclusion operators
+ *
+ * Calculates MCV vs MCV, MCV vs histogram and histogram vs histogram
+ * selectivity for join using the subnet inclusion operators.  Unlike the
+ * join selectivity function for the equality operator, eqjoinsel_inner(),
+ * one to one matching of the values is not enough.   Network inclusion
+ * operators are likely to match many to many.   It requires to loop the MVC
+ * and histogram lists to the end.  Also, MCV vs histogram selectivity is
+ * not neglected as in eqjoinsel_inner().
+ */
+static Selectivity
+networkjoinsel_inner(Oid operator,
+					 VariableStatData *vardata1, VariableStatData *vardata2)
+{
+	Form_pg_statistic stats;
+	double		nullfrac1 = 0.0,
+				nullfrac2 = 0.0;
+	Selectivity selec = 0.0,
+				mcv1_max_selec = 0.0,
+				mcv2_max_selec = 0.0;
+	bool		mcv1_exists = false,
+				mcv2_exists = false,
+				his1_exists = false,
+				his2_exists = false;
+	short int	opr_order;
+	int			mcv1_nvalues,
+				mcv2_nvalues,
+				mcv1_nnumbers,
+				mcv2_nnumbers,
+				his1_nvalues,
+				his2_nvalues;
+	Datum	   *mcv1_values,
+			   *mcv2_values,
+			   *his1_values,
+			   *his2_values;
+	float4	   *mcv1_numbers,
+			   *mcv2_numbers;
+
+	if (HeapTupleIsValid(vardata1->statsTuple))
+	{
+		if ((stats = (Form_pg_statistic) GETSTRUCT(vardata1->statsTuple)))
+			nullfrac1 = stats->stanullfrac;
+
+		mcv1_exists = get_attstatsslot(vardata1->statsTuple,
+									   vardata1->atttype, vardata1->atttypmod,
+									   STATISTIC_KIND_MCV, InvalidOid,
+									   NULL,
+									   &mcv1_values, &mcv1_nvalues,
+									   &mcv1_numbers, &mcv1_nnumbers);
+		his1_exists = get_attstatsslot(vardata1->statsTuple,
+									   vardata1->atttype, vardata1->atttypmod,
+									   STATISTIC_KIND_HISTOGRAM, InvalidOid,
+									   NULL,
+									   &his1_values, &his1_nvalues,
+									   NULL, NULL);
+	}
+
+	if (HeapTupleIsValid(vardata2->statsTuple))
+	{
+		if ((stats = (Form_pg_statistic) GETSTRUCT(vardata2->statsTuple)))
+			nullfrac2 = stats->stanullfrac;
+
+		mcv2_exists = get_attstatsslot(vardata2->statsTuple,
+									   vardata2->atttype, vardata2->atttypmod,
+									   STATISTIC_KIND_MCV, InvalidOid,
+									   NULL,
+									   &mcv2_values, &mcv2_nvalues,
+									   &mcv2_numbers, &mcv2_nnumbers);
+		his2_exists = get_attstatsslot(vardata2->statsTuple,
+									   vardata2->atttype, vardata2->atttypmod,
+									   STATISTIC_KIND_HISTOGRAM, InvalidOid,
+									   NULL,
+									   &his2_values, &his2_nvalues,
+									   NULL, NULL);
+	}
+
+	opr_order = inet_opr_order(operator);
+
+	if (mcv1_exists && mcv2_exists)
+		selec += inet_mcv_join_selec(mcv1_values, mcv1_numbers, mcv1_nvalues,
+									 mcv2_values, mcv2_numbers, mcv2_nvalues,
+									 operator, &mcv1_max_selec);
+	if (mcv2_exists && his1_exists)
+		selec += (1.0 - nullfrac1 - mcv1_max_selec) *
+			inet_mcv_his_selec(mcv2_values, mcv2_numbers, mcv2_nvalues,
+							   his1_values, his1_nvalues, opr_order,
+							   &mcv2_max_selec);
+	if (mcv1_exists && his2_exists)
+		selec += (1.0 - nullfrac2 - mcv2_max_selec) *
+			inet_mcv_his_selec(mcv1_values, mcv1_numbers, mcv1_nvalues,
+							   his2_values, his2_nvalues, opr_order,
+							   &mcv1_max_selec);
+	if (his1_exists && his2_exists)
+		selec += (1.0 - nullfrac1 - mcv1_max_selec) *
+			(1.0 - nullfrac2 - mcv2_max_selec) *
+			inet_his_inclusion_join_selec(his1_values, his1_nvalues,
+										  his2_values, his2_nvalues,
+										  opr_order);
+
+	if ((!mcv1_exists && !his1_exists) || (!mcv2_exists && !his2_exists))
+		selec = (1.0 - nullfrac1) * (1.0 - nullfrac2) * DEFAULT_SEL(operator);
+
+	if (mcv1_exists)
+		free_attstatsslot(vardata1->atttype, mcv1_values, mcv1_nvalues,
+						  mcv1_numbers, mcv1_nnumbers);
+	if (mcv2_exists)
+		free_attstatsslot(vardata2->atttype, mcv2_values, mcv2_nvalues,
+						  mcv2_numbers, mcv2_nnumbers);
+	if (his1_exists)
+		free_attstatsslot(vardata1->atttype, his1_values, his1_nvalues,
+						  NULL, 0);
+	if (his2_exists)
+		free_attstatsslot(vardata2->atttype, his2_values, his2_nvalues,
+						  NULL, 0);
+
+	return selec;
+}
+
+/*
+ * Semi join selectivity estimation for the subnet inclusion operators
+ *
+ * Calculates MCV vs MCV, MCV vs histogram, histogram vs MCV, and histogram
+ * vs histogram selectivity for semi join using the subnet inclusion
+ * operators.
+ */
+static Selectivity
+networkjoinsel_semi(Oid operator,
+					VariableStatData *vardata1, VariableStatData *vardata2)
+{
+	Form_pg_statistic stats;
+	Selectivity selec = 0.0,
+				mcv1_max_selec = 0.0,
+				mcv2_max_selec = 0.0;
+	double		nullfrac1 = 0.0,
+				nullfrac2 = 0.0,
+				his2_weight = 0.0;
+	bool		mcv1_exists = false,
+				mcv2_exists = false,
+				his1_exists = false,
+				his2_exists = false;
+	short int	opr_order = 0;
+	FmgrInfo	proc;
+	int			i,
+				mcv1_nvalues,
+				mcv2_nvalues,
+				mcv1_nnumbers,
+				mcv2_nnumbers,
+				his1_nvalues,
+				his2_nvalues;
+	Datum	   *mcv1_values,
+			   *mcv2_values,
+			   *his1_values,
+			   *his2_values;
+	float4	   *mcv1_numbers,
+			   *mcv2_numbers;
+
+	if (HeapTupleIsValid(vardata1->statsTuple))
+	{
+		if ((stats = (Form_pg_statistic) GETSTRUCT(vardata1->statsTuple)))
+			nullfrac1 = stats->stanullfrac;
+
+		mcv1_exists = get_attstatsslot(vardata1->statsTuple,
+									   vardata1->atttype, vardata1->atttypmod,
+									   STATISTIC_KIND_MCV, InvalidOid,
+									   NULL,
+									   &mcv1_values, &mcv1_nvalues,
+									   &mcv1_numbers, &mcv1_nnumbers);
+		his1_exists = get_attstatsslot(vardata1->statsTuple,
+									   vardata1->atttype, vardata1->atttypmod,
+									   STATISTIC_KIND_HISTOGRAM, InvalidOid,
+									   NULL,
+									   &his1_values, &his1_nvalues,
+									   NULL, NULL);
+	}
+
+	if (HeapTupleIsValid(vardata2->statsTuple))
+	{
+
+		if ((stats = (Form_pg_statistic) GETSTRUCT(vardata2->statsTuple)))
+			nullfrac2 = stats->stanullfrac;
+
+		mcv2_exists = get_attstatsslot(vardata2->statsTuple,
+									   vardata2->atttype, vardata2->atttypmod,
+									   STATISTIC_KIND_MCV, InvalidOid,
+									   NULL,
+									   &mcv2_values, &mcv2_nvalues,
+									   &mcv2_numbers, &mcv2_nnumbers);
+		his2_exists = get_attstatsslot(vardata2->statsTuple,
+									   vardata2->atttype, vardata2->atttypmod,
+									   STATISTIC_KIND_HISTOGRAM, InvalidOid,
+									   NULL,
+									   &his2_values, &his2_nvalues,
+									   NULL, NULL);
+	}
+
+	if (mcv2_exists)
+	{
+		fmgr_info(get_opcode(operator), &proc);
+
+		for (i = 0; i < mcv2_nvalues; i++)
+			 mcv2_max_selec += mcv2_numbers[i];
+	}
+
+	if (his2_exists)
+	{
+		his2_weight = (1.0 - nullfrac2 - mcv2_max_selec) * vardata2->rel->rows;
+
+		/* Match the histogram on the right hand side using the commutator. */
+		opr_order = inet_opr_order(operator) * -1;
+	}
+
+	if (mcv1_exists && (mcv2_exists || his2_exists))
+		for (i = 0; i < mcv1_nvalues; i++)
+			selec += mcv1_numbers[i] *
+				inet_semi_join_selec(mcv2_exists, mcv2_values, mcv2_nvalues,
+									 his2_exists, his2_values, his2_nvalues,
+									 his2_weight, &mcv1_values[i],
+									 &proc, opr_order);
+
+	if (his1_exists && (mcv2_exists || his2_exists))
+	{
+		double		his1_total_selec = 0.0;
+
+		/*
+		 * The first and the last element of the first histogram will not be
+		 * used, on the grounds that they are outliers and hence not very
+		 * representative.
+		 */
+		for (i = 1; i < his1_nvalues - 1; i++)
+			his1_total_selec +=
+				inet_semi_join_selec(mcv2_exists, mcv2_values, mcv2_nvalues,
+									 his2_exists, his2_values, his2_nvalues,
+									 his2_weight, &his1_values[i],
+									 &proc, opr_order);
+		selec += ((1.0 - nullfrac1 - mcv1_max_selec) / (his1_nvalues - 2)) *
+			his1_total_selec;
+	}
+
+	if ((!mcv1_exists && !his1_exists) || (!mcv2_exists && !his2_exists))
+		selec = (1.0 - nullfrac1) * (1.0 - nullfrac2) * DEFAULT_SEL(operator);
+
+	if (mcv1_exists)
+		free_attstatsslot(vardata1->atttype, mcv1_values, mcv1_nvalues,
+						  mcv1_numbers, mcv1_nnumbers);
+	if (mcv2_exists)
+		free_attstatsslot(vardata2->atttype, mcv2_values, mcv2_nvalues,
+						  mcv2_numbers, mcv2_nnumbers);
+	if (his1_exists)
+		free_attstatsslot(vardata1->atttype, his1_values, his1_nvalues,
+						  NULL, 0);
+	if (his2_exists)
+		free_attstatsslot(vardata2->atttype, his2_values, his2_nvalues,
+						  NULL, 0);
+
+	return selec;
+}
+
+/*
+ * Practical comparable numbers for the subnet inclusion operators
+ */
+static short int
+inet_opr_order(Oid operator)
+{
+	switch (operator)
+	{
+		case OID_INET_SUP_OP:
+			return -2;
+		case OID_INET_SUPEQ_OP:
+			return -1;
+		case OID_INET_OVERLAP_OP:
+			return 0;
+		case OID_INET_SUBEQ_OP:
+			return 1;
+		case OID_INET_SUB_OP:
+			return 2;
+		default:
+			elog(ERROR, "unknown operator for inet inclusion selectivity");
+	}
+}
+
+/*
+ * Inet histogram inclusion selectivity estimation
+ *
+ * Calculates histogram selectivity for the subnet inclusion operators of
+ * the inet type.  The return value is between 0 and 1.  It should be
+ * corrected with the MVC selectivity and null fraction.  If the constant
+ * is less than the first element or greater than the last element of
+ * the histogram the return value will be 0.
+ *
+ * The histogram is originally for the basic comparison operators.  Only
+ * the common bits of the network part and the length of the network part
+ * (masklen) are appropriate for the subnet inclusion operators.  Fortunately,
+ * basic comparison fits in this situation.  Even so, the length of the
+ * network part would not really be significant in the histogram.  This would
+ * lead to big mistakes for data sets with uneven masklen distribution.
+ * To avoid this problem, comparison with the left and the right side of the
+ * buckets used together.
+ *
+ * Histogram bucket matches are calculated in two forms.  If the constant
+ * matches both sides the bucket is considered as fully matched.  If the
+ * constant matches only the right side the bucket, it is not considered
+ * as matched unless it is the last bucket, because it will match the next
+ * bucked.  If all of these buckets would be considered as matched, it would
+ * lead to unfair multiple matches for some constants.
+ *
+ * The second form is to match the bucket partially.  We try to calculate
+ * dividers for both of the boundaries.  If the address family of the boundary
+ * does not match the constant or comparison of the length of the network
+ * parts is not true by the operator, the divider for the boundary would not
+ * taken into account.  If both of the dividers can be calculated the greater
+ * one will be used to minimize the mistake in the buckets which have
+ * disparate masklens.
+ *
+ * The divider on the partial bucket match is imagined as the distance
+ * between the decisive bits and the common bits of the addresses.  It will
+ * be used as power of two as it is the natural scale for the IP network
+ * inclusion.  The partial bucket match divider calculation is an empirical
+ * formula and subject to change with more experiment.
+ *
+ * For partial match with buckets which have different address families
+ * on the left and right sides only the boundary with the same address
+ * family is taken into consideration.  This can cause more mistakes for these
+ * buckets if the masklens of their boundaries are also disparate.  It can
+ * only be the case for one bucket, if there are addresses with different
+ * families on the column.  It seems as a better option than not considering
+ * these buckets.
+ */
+static Selectivity
+inet_his_inclusion_selec(Datum *values, int nvalues, Datum *constvalue,
+						 short int opr_order)
+{
+	inet	   *query,
+			   *left,
+			   *right;
+	float		match = 0.0;
+	int			i;
+	short int	left_order,
+				right_order,
+				left_divider,
+				right_divider;
+
+	query = DatumGetInetPP(*constvalue);
+	left = DatumGetInetPP(values[0]);
+	left_order = inet_inclusion_cmp(left, query, opr_order);
+
+	for (i = 1; i < nvalues; i++)
+	{
+		right = DatumGetInetP(values[i]);
+		right_order = inet_inclusion_cmp(right, query, opr_order);
+
+		if (left_order == 0 && right_order == 0)
+		{
+			/* Full bucket match. */
+
+			match += 1.0;
+		}
+		else if ((left_order <= 0 && right_order > 0) ||
+				 (left_order >= 0 && right_order < 0) ||
+				 (right_order == 0 && i == nvalues - 1))
+		{
+			/* Partial bucket match. */
+
+			left_divider = inet_his_match_divider(left, query, opr_order);
+			right_divider = inet_his_match_divider(right, query, opr_order);
+
+			if (left_divider >= 0 || right_divider >= 0)
+				match += 1.0 / pow(2, Max(left_divider, right_divider));
+		}
+
+		/* Shift the variables. */
+		left = right;
+		left_order = right_order;
+	}
+
+	/* There are nvalues - 1 buckets. */
+	return match / (nvalues - 1);
+}
+
+/*
+ * Inet MCV inner join selectivity estimation
+ *
+ * The original function of the operator used in this function, like the
+ * mcv_selectivity() on selfuncs.c.  Actually, this function has nothing
+ * to do with the network data types except its name and location.
+ *
+ * The function result is the selectivity, and the fraction of the total
+ * population of the left hand side MCV are returned into *max_selec_p.
+ */
+static Selectivity
+inet_mcv_join_selec(Datum *mcv1_values, float4 *mcv1_numbers, int mcv1_nvalues,
+					Datum *mcv2_values, float4 *mcv2_numbers, int mcv2_nvalues,
+					Oid operator, Selectivity *max_selec_p)
+{
+	Selectivity selec = 0.0,
+				max_selec = 0.0;
+	FmgrInfo	proc;
+	int			i,
+				j;
+
+	fmgr_info(get_opcode(operator), &proc);
+
+	for (i = 0; i < mcv1_nvalues; i++)
+	{
+		for (j = 0; j < mcv2_nvalues; j++)
+			if (DatumGetBool(FunctionCall2Coll(&proc, DEFAULT_COLLATION_OID,
+											   mcv1_values[i], mcv2_values[j])))
+				selec += mcv1_numbers[i] * mcv2_numbers[j];
+
+		max_selec += mcv1_numbers[i];
+	}
+
+	*max_selec_p = max_selec;
+	return selec;
+}
+
+/*
+ * Inet MCV vs histogram inclusion join selectivity estimation
+ *
+ * The function result is the selectivity, and the fraction of the total
+ * population of the MCV is returned into *max_selec_p.
+ */
+static Selectivity
+inet_mcv_his_selec(Datum *mcv_values, float4 *mcv_numbers, int mcv_nvalues,
+				   Datum *his_values, int his_nvalues, short int opr_order,
+				   Selectivity *max_selec_p)
+{
+	Selectivity selec = 0.0,
+				max_selec = 0.0;
+	int			i;
+
+	for (i = 0; i < mcv_nvalues; i++)
+	{
+		selec += mcv_numbers[i] *
+			inet_his_inclusion_selec(his_values, his_nvalues, &mcv_values[i],
+									 opr_order);
+
+		max_selec += mcv_numbers[i];
+	}
+
+	*max_selec_p = max_selec;
+	return selec;
+}
+
+/*
+ * Inet histogram inclusion join selectivity estimation
+ *
+ * Values from the first histogram will be matched with the second.  The first
+ * and the last element of the first histogram will not be used, on
+ * the grounds that they are outliers and hence not very representative.
+ */
+static Selectivity
+inet_his_inclusion_join_selec(Datum *his1_values, int his1_nvalues,
+							  Datum *his2_values, int his2_nvalues,
+							  short int opr_order)
+{
+	float		match = 0.0;
+	int			i;
+
+	for (i = 1; i < his1_nvalues - 1; i++)
+		match += inet_his_inclusion_selec(his2_values, his2_nvalues,
+										  &his1_values[i], opr_order);
+
+	/* his1_nvalues - 2 values should be checked. */
+	return match / (his1_nvalues - 2);
+}
+
+/*
+ * Inet semi join selectivity estimation
+ */
+static Selectivity
+inet_semi_join_selec(bool mcv2_exists, Datum *mcv2_values, int mcv2_nvalues,
+					 bool his2_exists, Datum *his2_values, int his2_nvalues,
+					 double his2_weight, Datum *constvalue,
+					 FmgrInfo *proc, short int opr_order)
+{
+	if (mcv2_exists)
+	{
+		int			i;
+
+		for (i = 0; i < mcv2_nvalues; i++)
+			if (DatumGetBool(FunctionCall2Coll(proc, DEFAULT_COLLATION_OID,
+											   *constvalue, mcv2_values[i])))
+				return 1.0;
+	}
+
+	/* Do not bother if histogram weight is smaller than 0.1. */
+	if (his2_exists && his2_weight > 0.1)
+	{
+		Selectivity	his_selec;
+
+		his_selec = inet_his_inclusion_selec(his2_values, his2_nvalues,
+											 constvalue, opr_order);
+
+		if (his_selec > 0)
+			return Min(1.0, his2_weight * his_selec);
+	}
+
+	return 0.0;
+}
+
+/*
+ * Comparison function for the subnet inclusion operators
+ *
+ * Comparison is compatible with the basic comparison function for the inet
+ * type.  See network_cmp_internal on network.c for the original.  Basic
+ * comparison operators are implemented with the network_cmp_internal
+ * function.  It is possible to implement the subnet inclusion operators with
+ * this function.
+ *
+ * Comparison is first on the common bits of the network part, then on
+ * the length of the network part (masklen) as the network_cmp_internal
+ * function.  Only the first part is on this function.  The second part is
+ * separated to another function for reusability.  The difference between
+ * the second part and the original network_cmp_internal is that the operator
+ * is used while comparing the lengths of the network parts.  See the second
+ * part on the inet_masklen_inclusion_cmp function below.
+ */
+static short int
+inet_inclusion_cmp(inet *left, inet *right, short int opr_order)
+{
+	if (ip_family(left) == ip_family(right))
+	{
+		short int	order;
+
+		order = bitncmp(ip_addr(left), ip_addr(right),
+						Min(ip_bits(left), ip_bits(right)));
+
+		if (order != 0)
+			return order;
+
+		return inet_masklen_inclusion_cmp(left, right, opr_order);
+	}
+
+	return ip_family(left) - ip_family(right);
+}
+
+/*
+ * Masklen comparison function for the subnet inclusion operators
+ *
+ * Compares the lengths of network parts of the inputs using the operator.
+ * If the comparison is okay for the operator the return value will be 0.
+ * Otherwise the return value will be less than or greater than 0 with
+ * respect to the operator.
+ */
+static short int
+inet_masklen_inclusion_cmp(inet *left, inet *right, short int opr_order)
+{
+	if (ip_family(left) == ip_family(right))
+	{
+		short int	order;
+
+		order = ip_bits(left) - ip_bits(right);
+
+		if ((order > 0 && opr_order >= 0) ||
+			(order == 0 && opr_order >= -1 && opr_order <= 1) ||
+			(order < 0 && opr_order <= 0))
+			return 0;
+
+		return opr_order;
+	}
+
+	return ip_family(left) - ip_family(right);
+}
+
+/*
+ * Inet histogram partial match divider calculation
+ *
+ * First the families and the lengths of the network parts are compared
+ * using the subnet inclusion operator.  The divider will be calculated
+ * using the masklens and the common bits of the addresses.  -1 will be
+ * returned if it cannot be calculated.
+ */
+static short int
+inet_his_match_divider(inet *boundary, inet *query, short int opr_order)
+{
+	if (inet_masklen_inclusion_cmp(boundary, query, opr_order) == 0)
+	{
+		short int	min_bits,
+					decisive_bits;
+
+		min_bits = Min(ip_bits(boundary), ip_bits(query));
+
+		/*
+		 * Set the decisive bits from the one which should contain the other
+		 * according to the operator.
+		 */
+		if (opr_order < 0)
+			decisive_bits = ip_bits(boundary);
+		else if (opr_order > 0)
+			decisive_bits = ip_bits(query);
+		else
+			decisive_bits = min_bits;
+
+		if (min_bits > 0)
+			return decisive_bits - bitncommon(ip_addr(boundary), ip_addr(query),
+											  min_bits);
+		return decisive_bits;
+	}
+
+	return -1;
 }
diff --git a/src/include/catalog/pg_operator.h b/src/include/catalog/pg_operator.h
index d7dcd1c..3b827fc 100644
--- a/src/include/catalog/pg_operator.h
+++ b/src/include/catalog/pg_operator.h
@@ -1135,32 +1135,33 @@ DESCR("not equal");
 DATA(insert OID = 1203 (  "<"	   PGNSP PGUID b f f 869 869	 16 1205 1206 network_lt scalarltsel scalarltjoinsel ));
 DESCR("less than");
 DATA(insert OID = 1204 (  "<="	   PGNSP PGUID b f f 869 869	 16 1206 1205 network_le scalarltsel scalarltjoinsel ));
 DESCR("less than or equal");
 DATA(insert OID = 1205 (  ">"	   PGNSP PGUID b f f 869 869	 16 1203 1204 network_gt scalargtsel scalargtjoinsel ));
 DESCR("greater than");
 DATA(insert OID = 1206 (  ">="	   PGNSP PGUID b f f 869 869	 16 1204 1203 network_ge scalargtsel scalargtjoinsel ));
 DESCR("greater than or equal");
 DATA(insert OID = 931  (  "<<"	   PGNSP PGUID b f f 869 869	 16 933		0 network_sub networksel networkjoinsel ));
 DESCR("is subnet");
-#define OID_INET_SUB_OP				  931
+#define OID_INET_SUB_OP			931
 DATA(insert OID = 932  (  "<<="    PGNSP PGUID b f f 869 869	 16 934		0 network_subeq networksel networkjoinsel ));
 DESCR("is subnet or equal");
-#define OID_INET_SUBEQ_OP				932
+#define OID_INET_SUBEQ_OP		932
 DATA(insert OID = 933  (  ">>"	   PGNSP PGUID b f f 869 869	 16 931		0 network_sup networksel networkjoinsel ));
 DESCR("is supernet");
-#define OID_INET_SUP_OP				  933
+#define OID_INET_SUP_OP			933
 DATA(insert OID = 934  (  ">>="    PGNSP PGUID b f f 869 869	 16 932		0 network_supeq networksel networkjoinsel ));
 DESCR("is supernet or equal");
-#define OID_INET_SUPEQ_OP				934
+#define OID_INET_SUPEQ_OP		934
 DATA(insert OID = 3552	(  "&&"    PGNSP PGUID b f f 869 869	 16 3552	0 network_overlap networksel networkjoinsel ));
 DESCR("overlaps (is subnet or supernet)");
+#define OID_INET_OVERLAP_OP		3552
 
 DATA(insert OID = 2634 (  "~"	   PGNSP PGUID l f f	  0 869 869 0 0 inetnot - - ));
 DESCR("bitwise not");
 DATA(insert OID = 2635 (  "&"	   PGNSP PGUID b f f	869 869 869 0 0 inetand - - ));
 DESCR("bitwise and");
 DATA(insert OID = 2636 (  "|"	   PGNSP PGUID b f f	869 869 869 0 0 inetor - - ));
 DESCR("bitwise or");
 DATA(insert OID = 2637 (  "+"	   PGNSP PGUID b f f	869  20 869 2638 0 inetpl - - ));
 DESCR("add");
 DATA(insert OID = 2638 (  "+"	   PGNSP PGUID b f f	 20 869 869 2637 0 int8pl_inet - - ));
-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to