Resending to -hackers as I realized this isn't a documentation issue so not
appropriate or apparently interesting to readers of -doc.

Inspired by David's patch [0], find attached fixing words duplicated, across
line boundaries.

I should probably just call the algorithm proprietary, but if you really wanted 
to know, I've suffered again through sed's black/slashes.

time find . -name '*.c' -o -name '*.h' |xargs sed -srn '/\/\*/!d; :l; 
/\*\//!{N; b l}; s/\n[[:space:]]*\*/\n/g; 
/(\<[[:alpha:]]{1,})\>\n[[:space:]]*\<\1\>/!d; s//>>&<</; p'

Alternately:
time for f in `find . -name '*.c' -o -name '*.h'`; do x=`<"$f" sed -rn 
'/\/\*/!d; :l; /\*\//!{N; b l}; s/\n[[:space:]]*\*/\n/g; 
/(\<[[:alpha:]]{1,})\>\n[[:space:]]*\<\1\>/!d; s//>>&<</; p'`; [ -n "$x" ] && 
echo "$f:" && echo "$x"; done |less

[0] 
https://www.postgresql.org/message-id/flat/CAKJS1f8du35u5DprpykWvgNEScxapbWYJdHq%2Bz06Wj3Y2KFPbw%40mail.gmail.com

PS. Not unrelated:
http://3.bp.blogspot.com/-qgW9kcbSh-Q/T5olkOrTWVI/AAAAAAAAAB0/BQhmO5AW_QQ/s1600/4de3efb5846e117e579edc91d6dceb9c.jpg
diff --git a/src/backend/access/gin/ginbtree.c b/src/backend/access/gin/ginbtree.c
index 030d0f4418..d5a568106c 100644
--- a/src/backend/access/gin/ginbtree.c
+++ b/src/backend/access/gin/ginbtree.c
@@ -211,7 +211,7 @@ freeGinBtreeStack(GinBtreeStack *stack)
 /*
  * Try to find parent for current stack position. Returns correct parent and
  * child's offset in stack->parent. The root page is never released, to
- * to prevent conflict with vacuum process.
+ * prevent conflict with vacuum process.
  */
 static void
 ginFindParents(GinBtree btree, GinBtreeStack *stack)
diff --git a/src/backend/access/transam/twophase.c b/src/backend/access/transam/twophase.c
index 4aff6cf7f2..3f778093cb 100644
--- a/src/backend/access/transam/twophase.c
+++ b/src/backend/access/transam/twophase.c
@@ -1737,7 +1737,7 @@ RecreateTwoPhaseFile(TransactionId xid, void *content, int len)
  * possible that GXACTs that were valid at checkpoint start will no longer
  * exist if we wait a little bit. With typical checkpoint settings this
  * will be about 3 minutes for an online checkpoint, so as a result we
- * we expect that there will be no GXACTs that need to be copied to disk.
+ * expect that there will be no GXACTs that need to be copied to disk.
  *
  * If a GXACT remains valid across multiple checkpoints, it will already
  * be on disk so we don't bother to repeat that write.
diff --git a/src/backend/access/transam/xlogarchive.c b/src/backend/access/transam/xlogarchive.c
index 5c6de4989c..4a039b1190 100644
--- a/src/backend/access/transam/xlogarchive.c
+++ b/src/backend/access/transam/xlogarchive.c
@@ -422,7 +422,7 @@ ExecuteRecoveryCommand(const char *command, const char *commandName, bool failOn
 /*
  * A file was restored from the archive under a temporary filename (path),
  * and now we want to keep it. Rename it under the permanent filename in
- * in pg_wal (xlogfname), replacing any existing file with the same name.
+ * pg_wal (xlogfname), replacing any existing file with the same name.
  */
 void
 KeepFileRestoredFromArchive(const char *path, const char *xlogfname)
diff --git a/src/backend/commands/analyze.c b/src/backend/commands/analyze.c
index 3e148f03d0..1262594058 100644
--- a/src/backend/commands/analyze.c
+++ b/src/backend/commands/analyze.c
@@ -2898,7 +2898,7 @@ analyze_mcv_list(int *mcv_counts,
 	 * significantly more common than the estimated selectivity they would
 	 * have if they weren't in the list.  All non-MCV values are assumed to be
 	 * equally common, after taking into account the frequencies of all the
-	 * the values in the MCV list and the number of nulls (c.f. eqsel()).
+	 * values in the MCV list and the number of nulls (c.f. eqsel()).
 	 *
 	 * Here sumcount tracks the total count of all but the last (least common)
 	 * value in the MCV list, allowing us to determine the effect of excluding
diff --git a/src/backend/commands/seclabel.c b/src/backend/commands/seclabel.c
index 5ee46905d8..1ac7756f2a 100644
--- a/src/backend/commands/seclabel.c
+++ b/src/backend/commands/seclabel.c
@@ -321,7 +321,7 @@ SetSharedSecurityLabel(const ObjectAddress *object,
 /*
  * SetSecurityLabel attempts to set the security label for the specified
  * provider on the specified object to the given value.  NULL means that any
- * any existing label should be deleted.
+ * existing label should be deleted.
  */
 void
 SetSecurityLabel(const ObjectAddress *object,
diff --git a/src/backend/libpq/be-secure-openssl.c b/src/backend/libpq/be-secure-openssl.c
index 1b659a5870..65f2ba85fe 100644
--- a/src/backend/libpq/be-secure-openssl.c
+++ b/src/backend/libpq/be-secure-openssl.c
@@ -956,7 +956,7 @@ info_cb(const SSL *ssl, int type, int args)
  * precomputed.
  *
  * Since few sites will bother to create a parameter file, we also
- * also provide a fallback to the parameters provided by the
+ * provide a fallback to the parameters provided by the
  * OpenSSL project.
  *
  * These values can be static (once loaded or computed) since the
diff --git a/src/backend/partitioning/partprune.c b/src/backend/partitioning/partprune.c
index 0dd55ac1ba..d241d9b3f9 100644
--- a/src/backend/partitioning/partprune.c
+++ b/src/backend/partitioning/partprune.c
@@ -2483,7 +2483,7 @@ get_matching_range_bounds(PartitionPruneContext *context,
 
 	/*
 	 * If the query does not constrain all key columns, we'll need to scan the
-	 * the default partition, if any.
+	 * default partition, if any.
 	 */
 	if (nvalues < partnatts)
 		result->scan_default = partition_bound_has_default(boundinfo);
diff --git a/src/backend/storage/ipc/barrier.c b/src/backend/storage/ipc/barrier.c
index 00ab57c0f6..bcfe87b854 100644
--- a/src/backend/storage/ipc/barrier.c
+++ b/src/backend/storage/ipc/barrier.c
@@ -115,7 +115,7 @@ BarrierInit(Barrier *barrier, int participants)
  *
  * While waiting, pg_stat_activity shows a wait_event_class and wait_event
  * controlled by the wait_event_info passed in, which should be a value from
- * from one of the WaitEventXXX enums defined in pgstat.h.
+ * one of the WaitEventXXX enums defined in pgstat.h.
  *
  * Return true in one arbitrarily chosen participant.  Return false in all
  * others.  The return code can be used to elect one participant to execute a
diff --git a/src/backend/utils/adt/datetime.c b/src/backend/utils/adt/datetime.c
index 017cc1a7b1..4f3d1f8872 100644
--- a/src/backend/utils/adt/datetime.c
+++ b/src/backend/utils/adt/datetime.c
@@ -1144,7 +1144,7 @@ DecodeDateTime(char **field, int *ftype, int nf,
 					 * Is this a YMD or HMS specification, or a year number?
 					 * YMD and HMS are required to be six digits or more, so
 					 * if it is 5 digits, it is a year.  If it is six or more
-					 * more digits, we assume it is YMD or HMS unless no date
+					 * digits, we assume it is YMD or HMS unless no date
 					 * and no time values have been specified.  This forces 6+
 					 * digit years to be at the end of the string, or to use
 					 * the ISO date specification.

Reply via email to