Inspired by David's patch [0], find attached fixing words duplicated, across
line boundaries.
I should probably just call the algorithm proprietary, but if you really wanted
to know, I've suffered again through sed's black/slashes.
time find . -name '*.c' -o -name '*.h' |xargs sed -srn '/\/\*/!d; :l;
/\*\//!{N; b l}; s/\n[[:space:]]*\*/\n/g;
/(\<[[:alpha:]]{1,})\>\n[[:space:]]*\<\1\>/!d; s//>>&<</; p'
Alternately:
time for f in `find . -name '*.c' -o -name '*.h'`; do x=`<"$f" sed -rn
'/\/\*/!d; :l; /\*\//!{N; b l}; s/\n[[:space:]]*\*/\n/g;
/(\<[[:alpha:]]{1,})\>\n[[:space:]]*\<\1\>/!d; s//>>&<</; p'`; [ -n "$x" ] &&
echo "$f:" && echo "$x"; done |less
[0]
https://www.postgresql.org/message-id/flat/CAKJS1f8du35u5DprpykWvgNEScxapbWYJdHq%2Bz06Wj3Y2KFPbw%40mail.gmail.com
PS. Not unrelated:
http://3.bp.blogspot.com/-qgW9kcbSh-Q/T5olkOrTWVI/AAAAAAAAAB0/BQhmO5AW_QQ/s1600/4de3efb5846e117e579edc91d6dceb9c.jpg
diff --git a/src/backend/access/gin/ginbtree.c b/src/backend/access/gin/ginbtree.c
index 030d0f4418..d5a568106c 100644
--- a/src/backend/access/gin/ginbtree.c
+++ b/src/backend/access/gin/ginbtree.c
@@ -211,7 +211,7 @@ freeGinBtreeStack(GinBtreeStack *stack)
/*
* Try to find parent for current stack position. Returns correct parent and
* child's offset in stack->parent. The root page is never released, to
- * to prevent conflict with vacuum process.
+ * prevent conflict with vacuum process.
*/
static void
ginFindParents(GinBtree btree, GinBtreeStack *stack)
diff --git a/src/backend/access/transam/twophase.c b/src/backend/access/transam/twophase.c
index 4aff6cf7f2..3f778093cb 100644
--- a/src/backend/access/transam/twophase.c
+++ b/src/backend/access/transam/twophase.c
@@ -1737,7 +1737,7 @@ RecreateTwoPhaseFile(TransactionId xid, void *content, int len)
* possible that GXACTs that were valid at checkpoint start will no longer
* exist if we wait a little bit. With typical checkpoint settings this
* will be about 3 minutes for an online checkpoint, so as a result we
- * we expect that there will be no GXACTs that need to be copied to disk.
+ * expect that there will be no GXACTs that need to be copied to disk.
*
* If a GXACT remains valid across multiple checkpoints, it will already
* be on disk so we don't bother to repeat that write.
diff --git a/src/backend/access/transam/xlogarchive.c b/src/backend/access/transam/xlogarchive.c
index 5c6de4989c..4a039b1190 100644
--- a/src/backend/access/transam/xlogarchive.c
+++ b/src/backend/access/transam/xlogarchive.c
@@ -422,7 +422,7 @@ ExecuteRecoveryCommand(const char *command, const char *commandName, bool failOn
/*
* A file was restored from the archive under a temporary filename (path),
* and now we want to keep it. Rename it under the permanent filename in
- * in pg_wal (xlogfname), replacing any existing file with the same name.
+ * pg_wal (xlogfname), replacing any existing file with the same name.
*/
void
KeepFileRestoredFromArchive(const char *path, const char *xlogfname)
diff --git a/src/backend/commands/analyze.c b/src/backend/commands/analyze.c
index 3e148f03d0..1262594058 100644
--- a/src/backend/commands/analyze.c
+++ b/src/backend/commands/analyze.c
@@ -2898,7 +2898,7 @@ analyze_mcv_list(int *mcv_counts,
* significantly more common than the estimated selectivity they would
* have if they weren't in the list. All non-MCV values are assumed to be
* equally common, after taking into account the frequencies of all the
- * the values in the MCV list and the number of nulls (c.f. eqsel()).
+ * values in the MCV list and the number of nulls (c.f. eqsel()).
*
* Here sumcount tracks the total count of all but the last (least common)
* value in the MCV list, allowing us to determine the effect of excluding
diff --git a/src/backend/commands/seclabel.c b/src/backend/commands/seclabel.c
index 5ee46905d8..1ac7756f2a 100644
--- a/src/backend/commands/seclabel.c
+++ b/src/backend/commands/seclabel.c
@@ -321,7 +321,7 @@ SetSharedSecurityLabel(const ObjectAddress *object,
/*
* SetSecurityLabel attempts to set the security label for the specified
* provider on the specified object to the given value. NULL means that any
- * any existing label should be deleted.
+ * existing label should be deleted.
*/
void
SetSecurityLabel(const ObjectAddress *object,
diff --git a/src/backend/libpq/be-secure-openssl.c b/src/backend/libpq/be-secure-openssl.c
index 1b659a5870..65f2ba85fe 100644
--- a/src/backend/libpq/be-secure-openssl.c
+++ b/src/backend/libpq/be-secure-openssl.c
@@ -956,7 +956,7 @@ info_cb(const SSL *ssl, int type, int args)
* precomputed.
*
* Since few sites will bother to create a parameter file, we also
- * also provide a fallback to the parameters provided by the
+ * provide a fallback to the parameters provided by the
* OpenSSL project.
*
* These values can be static (once loaded or computed) since the
diff --git a/src/backend/partitioning/partprune.c b/src/backend/partitioning/partprune.c
index 0dd55ac1ba..d241d9b3f9 100644
--- a/src/backend/partitioning/partprune.c
+++ b/src/backend/partitioning/partprune.c
@@ -2483,7 +2483,7 @@ get_matching_range_bounds(PartitionPruneContext *context,
/*
* If the query does not constrain all key columns, we'll need to scan the
- * the default partition, if any.
+ * default partition, if any.
*/
if (nvalues < partnatts)
result->scan_default = partition_bound_has_default(boundinfo);
diff --git a/src/backend/storage/ipc/barrier.c b/src/backend/storage/ipc/barrier.c
index 00ab57c0f6..bcfe87b854 100644
--- a/src/backend/storage/ipc/barrier.c
+++ b/src/backend/storage/ipc/barrier.c
@@ -115,7 +115,7 @@ BarrierInit(Barrier *barrier, int participants)
*
* While waiting, pg_stat_activity shows a wait_event_class and wait_event
* controlled by the wait_event_info passed in, which should be a value from
- * from one of the WaitEventXXX enums defined in pgstat.h.
+ * one of the WaitEventXXX enums defined in pgstat.h.
*
* Return true in one arbitrarily chosen participant. Return false in all
* others. The return code can be used to elect one participant to execute a
diff --git a/src/backend/utils/adt/datetime.c b/src/backend/utils/adt/datetime.c
index 017cc1a7b1..4f3d1f8872 100644
--- a/src/backend/utils/adt/datetime.c
+++ b/src/backend/utils/adt/datetime.c
@@ -1144,7 +1144,7 @@ DecodeDateTime(char **field, int *ftype, int nf,
* Is this a YMD or HMS specification, or a year number?
* YMD and HMS are required to be six digits or more, so
* if it is 5 digits, it is a year. If it is six or more
- * more digits, we assume it is YMD or HMS unless no date
+ * digits, we assume it is YMD or HMS unless no date
* and no time values have been specified. This forces 6+
* digit years to be at the end of the string, or to use
* the ISO date specification.