Expand palloc/pg_malloc API

2022-05-17 Thread Peter Eisentraut
This adds additional variants of palloc, pg_malloc, etc. that 
encapsulate common usage patterns and provide more type safety.


Examples:

-   result = (IndexBuildResult *) palloc(sizeof(IndexBuildResult));
+   result = palloc_obj(IndexBuildResult);

-   collector->tuples = (IndexTuple *) palloc(sizeof(IndexTuple) *
  collector->lentuples);
+   collector->tuples = palloc_array(IndexTuple, collector->lentuples);

One common point is that the new interfaces all have a return type that 
automatically matches what they are allocating, so you don't need any 
casts nor have to manually make sure the size matches the expected 
result.  Besides the additional safety, the notation is also more 
compact, as you can see above.


Inspired by the talloc library.

The interesting changes are in fe_memutils.h and palloc.h.  The rest of 
the patch is just randomly sprinkled examples to test/validate the new 
additions.From 9ba5753228e0fe9c1eca779e92795a29b23146af Mon Sep 17 00:00:00 2001
From: Peter Eisentraut 
Date: Tue, 17 May 2022 13:28:58 +0200
Subject: [PATCH] Expand palloc/pg_malloc API

This adds additional variants of palloc, pg_malloc, etc. that
encapsulate common usage patterns and provide more type safety.
Inspired by talloc library.
---
 src/include/common/fe_memutils.h | 46 ++
 src/include/utils/palloc.h   | 33 ++
 src/backend/access/brin/brin.c   | 14 
 src/backend/access/gin/ginfast.c | 17 --
 src/backend/commands/indexcmds.c | 44 
 src/backend/executor/nodeHash.c  | 57 +---
 src/bin/pg_dump/common.c | 24 +-
 src/bin/pg_dump/pg_backup_tar.c  | 10 +++---
 src/bin/psql/startup.c   |  6 ++--
 src/common/config_info.c |  2 +-
 src/common/controldata_utils.c   |  2 +-
 contrib/dblink/dblink.c  | 12 +++
 12 files changed, 163 insertions(+), 104 deletions(-)

diff --git a/src/include/common/fe_memutils.h b/src/include/common/fe_memutils.h
index 0a4c9126ea..993e0b07cd 100644
--- a/src/include/common/fe_memutils.h
+++ b/src/include/common/fe_memutils.h
@@ -29,6 +29,39 @@ extern void *pg_malloc_extended(size_t size, int flags);
 extern void *pg_realloc(void *pointer, size_t size);
 extern void pg_free(void *pointer);
 
+/*
+ * Variants with easier notation and more type safety
+ */
+
+/*
+ * Allocate space for one object of type "type"
+ */
+#define pg_malloc_obj(type) ((type *) pg_malloc(sizeof(type)))
+#define pg_malloc0_obj(type) ((type *) pg_malloc0(sizeof(type)))
+
+/*
+ * Allocate space for one object of what "ptr" points to
+ */
+#ifdef HAVE_TYPEOF
+#define pg_malloc_ptrtype(ptr) ((typeof(ptr)) pg_malloc(sizeof(*(ptr
+#define pg_malloc0_ptrtype(ptr) ((typeof(ptr)) pg_malloc0(sizeof(*(ptr
+#else
+#define pg_malloc_ptrtype(ptr) pg_malloc(sizeof(*(ptr)))
+#define pg_malloc0_ptrtype(ptr) pg_malloc0(sizeof(*(ptr)))
+#endif
+
+/*
+ * Allocate space for "count" objects of type "type"
+ */
+#define pg_malloc_array(type, count) ((type *) pg_malloc(sizeof(type) * 
(count)))
+#define pg_malloc0_array(type, count) ((type *) pg_malloc0(sizeof(type) * 
(count)))
+
+/*
+ * Change size of allocation pointed to by "pointer" to have space for "count"
+ * objects of type "type"
+ */
+#define pg_realloc_array(pointer, type, count) ((type *) pg_realloc(pointer, 
sizeof(type) * (count)))
+
 /* Equivalent functions, deliberately named the same as backend functions */
 extern char *pstrdup(const char *in);
 extern char *pnstrdup(const char *in, Size size);
@@ -38,6 +71,19 @@ extern void *palloc_extended(Size size, int flags);
 extern void *repalloc(void *pointer, Size size);
 extern void pfree(void *pointer);
 
+#define palloc_obj(type) ((type *) palloc(sizeof(type)))
+#define palloc0_obj(type) ((type *) palloc0(sizeof(type)))
+#ifdef HAVE_TYPEOF
+#define palloc_ptrtype(ptr) ((typeof(ptr)) palloc(sizeof(*(ptr
+#define palloc0_ptrtype(ptr) ((typeof(ptr)) palloc0(sizeof(*(ptr
+#else
+#define palloc_ptrtype(ptr) palloc(sizeof(*(ptr)))
+#define palloc0_ptrtype(ptr) palloc0(sizeof(*(ptr)))
+#endif
+#define palloc_array(type, count) ((type *) palloc(sizeof(type) * (count)))
+#define palloc0_array(type, count) ((type *) palloc0(sizeof(type) * (count)))
+#define repalloc_array(pointer, type, count) ((type *) repalloc(pointer, 
sizeof(type) * (count)))
+
 /* sprintf into a palloc'd buffer --- these are in psprintf.c */
 extern char *psprintf(const char *fmt,...) pg_attribute_printf(1, 2);
 extern size_t pvsnprintf(char *buf, size_t len, const char *fmt, va_list args) 
pg_attribute_printf(3, 0);
diff --git a/src/include/utils/palloc.h b/src/include/utils/palloc.h
index 332575f518..cbc9c11ffa 100644
--- a/src/include/utils/palloc.h
+++ b/src/include/utils/palloc.h
@@ -80,6 +80,39 @@ extern void *palloc_extended(Size size, int fl

Re: Expand palloc/pg_malloc API

2022-10-11 Thread Peter Eisentraut

On 14.09.22 06:53, Tom Lane wrote:

I wrote:

It kind of feels that the argument order should be pointer, oldsize, size.
It feels even more strongly that people will get the ordering wrong,
whichever we choose.  Is there a way to make that more bulletproof?


Actually ... an even-more-terrifyingly-plausible misuse is that the
supplied oldsize is different from the actual previous allocation.
We should try to check that.  In MEMORY_CONTEXT_CHECKING builds
it should be possible to assert that oldsize == requested_size.
We don't have that data if !MEMORY_CONTEXT_CHECKING, but we could
at least assert that oldsize <= allocated chunk size.


I'm not very familiar with MEMORY_CONTEXT_CHECKING.  Where would one get 
these values?






Re: Expand palloc/pg_malloc API

2022-10-11 Thread Tom Lane
Peter Eisentraut  writes:
> On 14.09.22 06:53, Tom Lane wrote:
>> Actually ... an even-more-terrifyingly-plausible misuse is that the
>> supplied oldsize is different from the actual previous allocation.
>> We should try to check that.  In MEMORY_CONTEXT_CHECKING builds
>> it should be possible to assert that oldsize == requested_size.
>> We don't have that data if !MEMORY_CONTEXT_CHECKING, but we could
>> at least assert that oldsize <= allocated chunk size.

> I'm not very familiar with MEMORY_CONTEXT_CHECKING.  Where would one get 
> these values?

Hmm ... the individual allocators have that info, but mcxt.c doesn't
have access to it.  I guess we could invent an additional "method"
to return the requested size of a chunk, which is only available in
MEMORY_CONTEXT_CHECKING builds, or maybe in !MEMORY_CONTEXT_CHECKING
it returns the allocated size instead.

regards, tom lane




Re: Expand palloc/pg_malloc API

2022-10-31 Thread Peter Eisentraut

On 11.10.22 18:04, Tom Lane wrote:

Peter Eisentraut  writes:

On 14.09.22 06:53, Tom Lane wrote:

Actually ... an even-more-terrifyingly-plausible misuse is that the
supplied oldsize is different from the actual previous allocation.
We should try to check that.  In MEMORY_CONTEXT_CHECKING builds
it should be possible to assert that oldsize == requested_size.
We don't have that data if !MEMORY_CONTEXT_CHECKING, but we could
at least assert that oldsize <= allocated chunk size.



I'm not very familiar with MEMORY_CONTEXT_CHECKING.  Where would one get
these values?


Hmm ... the individual allocators have that info, but mcxt.c doesn't
have access to it.  I guess we could invent an additional "method"
to return the requested size of a chunk, which is only available in
MEMORY_CONTEXT_CHECKING builds, or maybe in !MEMORY_CONTEXT_CHECKING
it returns the allocated size instead.


I'm not sure whether that amount of additional work would be useful 
relative to the size of this patch.  Is the patch as it stands now 
making the code less robust than what the code is doing now?


In the meantime, here is an updated patch with the argument order 
swapped and an additional assertion, as previously discussed.
From cc0069bf2b0b3d40dda462d441af5d8b07ef1f57 Mon Sep 17 00:00:00 2001
From: Peter Eisentraut 
Date: Mon, 31 Oct 2022 09:17:57 +0100
Subject: [PATCH v2] Add repalloc0 and repalloc0_array

These zero out the space added by repalloc.  This is a common pattern
that is quite hairy to code by hand.

Discussion: 
https://www.postgresql.org/message-id/b66dfc89-9365-cb57-4e1f-b7d31813e...@enterprisedb.com
---
 src/backend/executor/nodeHash.c  |  8 ++-
 src/backend/libpq/be-fsstubs.c   |  9 +++-
 src/backend/optimizer/util/placeholder.c | 13 ---
 src/backend/optimizer/util/relnode.c | 29 +++-
 src/backend/parser/parse_param.c | 10 +++-
 src/backend/storage/lmgr/lwlock.c|  9 ++--
 src/backend/utils/adt/ruleutils.c|  9 ++--
 src/backend/utils/cache/typcache.c   | 10 ++--
 src/backend/utils/mmgr/mcxt.c| 18 +++
 src/include/utils/palloc.h   |  2 ++
 10 files changed, 46 insertions(+), 71 deletions(-)

diff --git a/src/backend/executor/nodeHash.c b/src/backend/executor/nodeHash.c
index 6622b202c229..2e6cce4802e5 100644
--- a/src/backend/executor/nodeHash.c
+++ b/src/backend/executor/nodeHash.c
@@ -940,12 +940,8 @@ ExecHashIncreaseNumBatches(HashJoinTable hashtable)
else
{
/* enlarge arrays and zero out added entries */
-   hashtable->innerBatchFile = 
repalloc_array(hashtable->innerBatchFile, BufFile *, nbatch);
-   hashtable->outerBatchFile = 
repalloc_array(hashtable->outerBatchFile, BufFile *, nbatch);
-   MemSet(hashtable->innerBatchFile + oldnbatch, 0,
-  (nbatch - oldnbatch) * sizeof(BufFile *));
-   MemSet(hashtable->outerBatchFile + oldnbatch, 0,
-  (nbatch - oldnbatch) * sizeof(BufFile *));
+   hashtable->innerBatchFile = 
repalloc0_array(hashtable->innerBatchFile, BufFile *, oldnbatch, nbatch);
+   hashtable->outerBatchFile = 
repalloc0_array(hashtable->outerBatchFile, BufFile *, oldnbatch, nbatch);
}
 
MemoryContextSwitchTo(oldcxt);
diff --git a/src/backend/libpq/be-fsstubs.c b/src/backend/libpq/be-fsstubs.c
index 3e5cada7eb53..106fdcdf817b 100644
--- a/src/backend/libpq/be-fsstubs.c
+++ b/src/backend/libpq/be-fsstubs.c
@@ -696,19 +696,16 @@ newLOfd(void)
newsize = 64;
cookies = (LargeObjectDesc **)
MemoryContextAllocZero(fscxt, newsize * 
sizeof(LargeObjectDesc *));
-   cookies_size = newsize;
}
else
{
/* Double size of array */
i = cookies_size;
newsize = cookies_size * 2;
-   cookies = (LargeObjectDesc **)
-   repalloc(cookies, newsize * sizeof(LargeObjectDesc *));
-   MemSet(cookies + cookies_size, 0,
-  (newsize - cookies_size) * sizeof(LargeObjectDesc 
*));
-   cookies_size = newsize;
+   cookies =
+   repalloc0_array(cookies, LargeObjectDesc *, 
cookies_size, newsize);
}
+   cookies_size = newsize;
 
return i;
 }
diff --git a/src/backend/optimizer/util/placeholder.c 
b/src/backend/optimizer/util/placeholder.c
index c7bfa293c9ff..c55027377fe7 100644
--- a/src/backend/optimizer/util/placeholder.c
+++ b/src/backend/optimizer/util/placeholder.c
@@ -133,16 +133,11 @@ find_placeholder_info(PlannerInfo *root, PlaceHolderVar 
*phv)
while (phinfo->phid >= new_size)
new_size *= 2;
if (root->placeholder_array)
-   {
-   root->placeholder_array = (PlaceHolderInfo **)
-

Re: Expand palloc/pg_malloc API

2022-11-09 Thread Tom Lane
Peter Eisentraut  writes:
> On 11.10.22 18:04, Tom Lane wrote:
>> Hmm ... the individual allocators have that info, but mcxt.c doesn't
>> have access to it.  I guess we could invent an additional "method"
>> to return the requested size of a chunk, which is only available in
>> MEMORY_CONTEXT_CHECKING builds, or maybe in !MEMORY_CONTEXT_CHECKING
>> it returns the allocated size instead.

> I'm not sure whether that amount of additional work would be useful 
> relative to the size of this patch.  Is the patch as it stands now 
> making the code less robust than what the code is doing now?

No.  It's slightly annoying that the call sites still have to track
the old size of the allocation, but I guess that they have to have
that information in order to know that they need to repalloc in the
first place.  I agree that this patch does make things easier to
read and a bit less error-prone.

Also, I realized that what I proposed above doesn't really work
anyway for this purpose.  Consider

ptr = palloc(size);
... fill all "size" bytes ...
ptr = repalloc0(ptr, size, newsize);

where the initial size request isn't a power of 2.  If production builds
rely on the initial allocated size not requested size to decide where to
start zeroing, this would work (no uninitialized holes) in a debug build,
but leave some uninitialized bytes in a production build, which is
absolutely horrible.  So I guess we have to rely on the callers to
track their requests.

> In the meantime, here is an updated patch with the argument order 
> swapped and an additional assertion, as previously discussed.

I think it'd be worth expending an actual runtime test in repalloc0,
that is

if (unlikely(oldsize > size))
elog(ERROR, "invalid repalloc0 call: oldsize %zu, new size %zu",
 oldsize, size);

This seems cheap compared to the cost of the repalloc+memset, and the
consequences of not detecting the error seem pretty catastrophic ---
the memset would try to zero almost your whole address space.

No objections beyond that.  I've marked this RFC.

regards, tom lane




Re: Expand palloc/pg_malloc API

2022-08-12 Thread Peter Eisentraut

On 26.07.22 23:32, Tom Lane wrote:

1. Do we really want distinct names for the frontend and backend
versions of the macros?  Particularly since you're layering the
frontend ones over pg_malloc, which has exit-on-OOM behavior?
I think we've found that notational discrepancies between frontend
and backend code are generally more a hindrance than a help,
so I'm inclined to drop the pg_malloc_xxx macros and just use
"palloc"-based names across the board.


This seems like a question that is independent of this patch.  Given 
that both pg_malloc() and palloc() do exist in fe_memutils, I think it 
would be confusing to only extend one part of that and not the other. 
The amount of code is ultimately not a lot.


If we wanted to get rid of pg_malloc() altogether, maybe we could talk 
about that.


(Personally, I have always been a bit suspicious about using the name 
palloc() without memory context semantics in frontend code, but I guess 
this is wide-spread now.)



3. Likewise, "palloc_obj" is perhaps less clear than it could be.
I find palloc_array just fine though.  Maybe palloc_object or
palloc_struct?  (If "_obj" can be traced to talloc, I'm not
seeing where.)


In talloc, the talloc() function itself allocates an object of a given 
type.  To allocate something of a specified size, you'd use 
talloc_size().  So those names won't map exactly.  I'm fine with 
palloc_object() if that is clearer.



One thought that comes to mind is that palloc_ptrtype is almost
surely going to be used in the style

myvariable = palloc_ptrtype(myvariable);

and if it's not that it's very likely wrong.  So maybe we should cut
out the middleman and write something like

#define palloc_instantiate(ptr) ((ptr) = (typeof(ptr)) palloc(sizeof(*(ptr
...
palloc_instantiate(myvariable);


Right, this is sort of what you'd want, really.  But it looks like 
strange C code, since you are modifying the variable even though you are 
passing it by value.


I think the _ptrtype variant isn't that useful anyway, so if it's 
confusing we can leave it out.





Re: Expand palloc/pg_malloc API

2022-08-12 Thread Peter Eisentraut



On 27.07.22 01:58, David G. Johnston wrote:
Admittedly I'm still getting my head around reading pointer-using code 
(I get the general concept but haven't had to code them)


- lockrelid = palloc(sizeof(*lockrelid));
+ lockrelid = palloc_ptrtype(lockrelid);

// This definitely seems like an odd idiom until I remembered about 
short-lived memory contexts and the lost pointers are soon destroyed there.


So lockrelid (no star) is a pointer that has an underlying reference 
that the macro (and the orignal code) resolves via the *


I cannot reason out whether the following would be equivalent to the above:

lockrelid = palloc_obj(*lockrelid);


I think that would also work.

Ultimately, it would be more idiomatic (in Postgres), to write this as

lockrelid = palloc(sizeof(LockRelId));

and thus

lockrelid = palloc_obj(LockRelId);




Re: Expand palloc/pg_malloc API

2022-08-28 Thread Peter Eisentraut

On 12.08.22 09:31, Peter Eisentraut wrote:
In talloc, the talloc() function itself allocates an object of a given 
type.  To allocate something of a specified size, you'd use 
talloc_size().  So those names won't map exactly.  I'm fine with 
palloc_object() if that is clearer.


I think the _ptrtype variant isn't that useful anyway, so if it's 
confusing we can leave it out.


I have updated this patch set to rename the _obj() functions to 
_object(), and I have dropped the _ptrtype() variants.


I have also split the patch to put the new API and the example uses into 
separate patches.From 250cc273063a6cd1cb7c7a73a59323844215260d Mon Sep 17 00:00:00 2001
From: Peter Eisentraut 
Date: Sun, 28 Aug 2022 19:13:34 +0200
Subject: [PATCH v2 1/2] Expand palloc/pg_malloc API for more type safety

This adds additional variants of palloc, pg_malloc, etc. that
encapsulate common usage patterns and provide more type safety.
Inspired by the talloc library.

Discussion: 
https://www.postgresql.org/message-id/flat/bb755632-2a43-d523-36f8-a1e7a389a...@enterprisedb.com
---
 src/include/common/fe_memutils.h | 28 
 src/include/utils/palloc.h   | 22 ++
 2 files changed, 50 insertions(+)

diff --git a/src/include/common/fe_memutils.h b/src/include/common/fe_memutils.h
index 0a4c9126ea..63f2b6a802 100644
--- a/src/include/common/fe_memutils.h
+++ b/src/include/common/fe_memutils.h
@@ -29,6 +29,28 @@ extern void *pg_malloc_extended(size_t size, int flags);
 extern void *pg_realloc(void *pointer, size_t size);
 extern void pg_free(void *pointer);
 
+/*
+ * Variants with easier notation and more type safety
+ */
+
+/*
+ * Allocate space for one object of type "type"
+ */
+#define pg_malloc_object(type) ((type *) pg_malloc(sizeof(type)))
+#define pg_malloc0_object(type) ((type *) pg_malloc0(sizeof(type)))
+
+/*
+ * Allocate space for "count" objects of type "type"
+ */
+#define pg_malloc_array(type, count) ((type *) pg_malloc(sizeof(type) * 
(count)))
+#define pg_malloc0_array(type, count) ((type *) pg_malloc0(sizeof(type) * 
(count)))
+
+/*
+ * Change size of allocation pointed to by "pointer" to have space for "count"
+ * objects of type "type"
+ */
+#define pg_realloc_array(pointer, type, count) ((type *) pg_realloc(pointer, 
sizeof(type) * (count)))
+
 /* Equivalent functions, deliberately named the same as backend functions */
 extern char *pstrdup(const char *in);
 extern char *pnstrdup(const char *in, Size size);
@@ -38,6 +60,12 @@ extern void *palloc_extended(Size size, int flags);
 extern void *repalloc(void *pointer, Size size);
 extern void pfree(void *pointer);
 
+#define palloc_object(type) ((type *) palloc(sizeof(type)))
+#define palloc0_object(type) ((type *) palloc0(sizeof(type)))
+#define palloc_array(type, count) ((type *) palloc(sizeof(type) * (count)))
+#define palloc0_array(type, count) ((type *) palloc0(sizeof(type) * (count)))
+#define repalloc_array(pointer, type, count) ((type *) repalloc(pointer, 
sizeof(type) * (count)))
+
 /* sprintf into a palloc'd buffer --- these are in psprintf.c */
 extern char *psprintf(const char *fmt,...) pg_attribute_printf(1, 2);
 extern size_t pvsnprintf(char *buf, size_t len, const char *fmt, va_list args) 
pg_attribute_printf(3, 0);
diff --git a/src/include/utils/palloc.h b/src/include/utils/palloc.h
index 332575f518..a0b62aa7b0 100644
--- a/src/include/utils/palloc.h
+++ b/src/include/utils/palloc.h
@@ -80,6 +80,28 @@ extern void *palloc_extended(Size size, int flags);
 extern pg_nodiscard void *repalloc(void *pointer, Size size);
 extern void pfree(void *pointer);
 
+/*
+ * Variants with easier notation and more type safety
+ */
+
+/*
+ * Allocate space for one object of type "type"
+ */
+#define palloc_object(type) ((type *) palloc(sizeof(type)))
+#define palloc0_object(type) ((type *) palloc0(sizeof(type)))
+
+/*
+ * Allocate space for "count" objects of type "type"
+ */
+#define palloc_array(type, count) ((type *) palloc(sizeof(type) * (count)))
+#define palloc0_array(type, count) ((type *) palloc0(sizeof(type) * (count)))
+
+/*
+ * Change size of allocation pointed to by "pointer" to have space for "count"
+ * objects of type "type"
+ */
+#define repalloc_array(pointer, type, count) ((type *) repalloc(pointer, 
sizeof(type) * (count)))
+
 /*
  * The result of palloc() is always word-aligned, so we can skip testing
  * alignment of the pointer when deciding which MemSet variant to use.
-- 
2.37.1

From d22fe38b8d5dfee16e2794daeaa3834c8f5aa05b Mon Sep 17 00:00:00 2001
From: Peter Eisentraut 
Date: Sun, 28 Aug 2022 19:17:49 +0200
Subject: [PATCH v2 2/2] Assorted examples of expanded type-safer
 palloc/pg_malloc API

---
 contrib/dblink/dblink.c  | 12 +++
 src/backend/access/brin/brin.c   | 14 
 src/backend/access/gin/ginfast.c | 17 --
 

Re: Expand palloc/pg_malloc API

2022-08-29 Thread Robert Haas
On Fri, Aug 12, 2022 at 3:31 AM Peter Eisentraut
 wrote:
> (Personally, I have always been a bit suspicious about using the name
> palloc() without memory context semantics in frontend code, but I guess
> this is wide-spread now.)

I think it would be a good thing to add memory context support to the
frontend. We could just put everything in a single context for
starters, and then frontend utilities that wanted to create other
contexts could do so.

There are difficulties, though. For instance, memory contexts are
nodes, and have a NodeTag. And I'm pretty sure we don't want frontend
code to know about all the backend node types. My suspicion is that
memory context types really shouldn't be node types, but right now,
they are. Whether that's the correct view or not, this kind of problem
means it's not a simple lift-and-shift to move the memory context code
into src/common. Someone would need to spend some time thinking about
how to engineer it.

-- 
Robert Haas
EDB: http://www.enterprisedb.com




Re: Expand palloc/pg_malloc API

2022-09-09 Thread Tom Lane
Peter Eisentraut  writes:
> I have updated this patch set to rename the _obj() functions to 
> _object(), and I have dropped the _ptrtype() variants.

> I have also split the patch to put the new API and the example uses into 
> separate patches.

This patch set seems fine to me, so I've marked it Ready for Committer.

I think serious consideration should be given to back-patching the
0001 part (that is, addition of the macros).  Otherwise we'll have
to remember not to use these macros in code intended for back-patch,
and that'll be mighty annoying once we are used to them.

regards, tom lane




Re: Expand palloc/pg_malloc API

2022-09-09 Thread Tom Lane
Robert Haas  writes:
> On Fri, Aug 12, 2022 at 3:31 AM Peter Eisentraut
>  wrote:
>> (Personally, I have always been a bit suspicious about using the name
>> palloc() without memory context semantics in frontend code, but I guess
>> this is wide-spread now.)

> I think it would be a good thing to add memory context support to the
> frontend. We could just put everything in a single context for
> starters, and then frontend utilities that wanted to create other
> contexts could do so.

Perhaps, but I think we should have at least one immediate use-case
for multiple contexts in frontend.  Otherwise it's just useless extra
code.  The whole point of memory contexts in the backend is that we
have well-defined lifespans for certain types of allocations (executor
state, function results, etc); but it's not very clear to me that the
same concept will be helpful in any of our frontend programs.

> There are difficulties, though. For instance, memory contexts are
> nodes, and have a NodeTag. And I'm pretty sure we don't want frontend
> code to know about all the backend node types. My suspicion is that
> memory context types really shouldn't be node types, but right now,
> they are. Whether that's the correct view or not, this kind of problem
> means it's not a simple lift-and-shift to move the memory context code
> into src/common. Someone would need to spend some time thinking about
> how to engineer it.

I don't really think that's much of an issue.  We could replace the
nodetag fields with some sort of magic number and have just as much
wrong-pointer safety as in the backend.  What I do take issue with
is moving the code into src/common.  I think we'd be better off
just writing a distinct implementation for frontend.  For one thing,
it's not apparent to me that aset.c is a good allocator for frontend
(and the other two surely are not).

This is all pretty off-topic for Peter's patch, though.

regards, tom lane




Re: Expand palloc/pg_malloc API

2022-09-11 Thread Peter Eisentraut

On 09.09.22 22:13, Tom Lane wrote:

Peter Eisentraut  writes:

I have updated this patch set to rename the _obj() functions to
_object(), and I have dropped the _ptrtype() variants.



I have also split the patch to put the new API and the example uses into
separate patches.


This patch set seems fine to me, so I've marked it Ready for Committer.


committed


I think serious consideration should be given to back-patching the
0001 part (that is, addition of the macros).  Otherwise we'll have
to remember not to use these macros in code intended for back-patch,
and that'll be mighty annoying once we are used to them.


Yes, the 0001 patch is kept separate so that we can do that when we feel 
the time is right.






Re: Expand palloc/pg_malloc API

2022-09-12 Thread Tom Lane
Peter Eisentraut  writes:
> On 09.09.22 22:13, Tom Lane wrote:
>> I think serious consideration should be given to back-patching the
>> 0001 part (that is, addition of the macros).  Otherwise we'll have
>> to remember not to use these macros in code intended for back-patch,
>> and that'll be mighty annoying once we are used to them.

> Yes, the 0001 patch is kept separate so that we can do that when we feel 
> the time is right.

I think the right time is now, or at least as soon as you're
satisfied that the buildfarm is happy.

regards, tom lane




Re: Expand palloc/pg_malloc API

2022-09-13 Thread Peter Eisentraut

On 12.09.22 15:49, Tom Lane wrote:

Peter Eisentraut  writes:

On 09.09.22 22:13, Tom Lane wrote:

I think serious consideration should be given to back-patching the
0001 part (that is, addition of the macros).  Otherwise we'll have
to remember not to use these macros in code intended for back-patch,
and that'll be mighty annoying once we are used to them.



Yes, the 0001 patch is kept separate so that we can do that when we feel
the time is right.


I think the right time is now, or at least as soon as you're
satisfied that the buildfarm is happy.


This has been done.





Re: Expand palloc/pg_malloc API

2022-09-13 Thread Peter Eisentraut
I have another little idea that fits well here: repalloc0 and 
repalloc0_array.  These zero out the space added by repalloc.  This is a 
common pattern in the backend code that is quite hairy to code by hand. 
See attached patch.
From fa611ecf7c8a7b99d37c77da66b421d2f9ebfec3 Mon Sep 17 00:00:00 2001
From: Peter Eisentraut 
Date: Wed, 14 Sep 2022 06:05:46 +0200
Subject: [PATCH] Add repalloc0 and repalloc0_array

These zero out the space added by repalloc.  This is a common pattern
that is quite hairy to code by hand.
---
 src/backend/executor/nodeHash.c  |  8 ++-
 src/backend/libpq/be-fsstubs.c   |  9 +++-
 src/backend/optimizer/util/placeholder.c | 13 ---
 src/backend/optimizer/util/relnode.c | 29 +++-
 src/backend/parser/parse_param.c | 10 +++-
 src/backend/storage/lmgr/lwlock.c|  9 ++--
 src/backend/utils/adt/ruleutils.c|  9 ++--
 src/backend/utils/cache/typcache.c   | 10 ++--
 src/backend/utils/mmgr/mcxt.c| 15 
 src/include/utils/palloc.h   |  2 ++
 10 files changed, 43 insertions(+), 71 deletions(-)

diff --git a/src/backend/executor/nodeHash.c b/src/backend/executor/nodeHash.c
index 77dd1dae8b..674802a4ff 100644
--- a/src/backend/executor/nodeHash.c
+++ b/src/backend/executor/nodeHash.c
@@ -940,12 +940,8 @@ ExecHashIncreaseNumBatches(HashJoinTable hashtable)
else
{
/* enlarge arrays and zero out added entries */
-   hashtable->innerBatchFile = 
repalloc_array(hashtable->innerBatchFile, BufFile *, nbatch);
-   hashtable->outerBatchFile = 
repalloc_array(hashtable->outerBatchFile, BufFile *, nbatch);
-   MemSet(hashtable->innerBatchFile + oldnbatch, 0,
-  (nbatch - oldnbatch) * sizeof(BufFile *));
-   MemSet(hashtable->outerBatchFile + oldnbatch, 0,
-  (nbatch - oldnbatch) * sizeof(BufFile *));
+   hashtable->innerBatchFile = 
repalloc0_array(hashtable->innerBatchFile, BufFile *, nbatch, oldnbatch);
+   hashtable->outerBatchFile = 
repalloc0_array(hashtable->outerBatchFile, BufFile *, nbatch, oldnbatch);
}
 
MemoryContextSwitchTo(oldcxt);
diff --git a/src/backend/libpq/be-fsstubs.c b/src/backend/libpq/be-fsstubs.c
index 3e5cada7eb..bf901a9cdc 100644
--- a/src/backend/libpq/be-fsstubs.c
+++ b/src/backend/libpq/be-fsstubs.c
@@ -696,19 +696,16 @@ newLOfd(void)
newsize = 64;
cookies = (LargeObjectDesc **)
MemoryContextAllocZero(fscxt, newsize * 
sizeof(LargeObjectDesc *));
-   cookies_size = newsize;
}
else
{
/* Double size of array */
i = cookies_size;
newsize = cookies_size * 2;
-   cookies = (LargeObjectDesc **)
-   repalloc(cookies, newsize * sizeof(LargeObjectDesc *));
-   MemSet(cookies + cookies_size, 0,
-  (newsize - cookies_size) * sizeof(LargeObjectDesc 
*));
-   cookies_size = newsize;
+   cookies =
+   repalloc0_array(cookies, LargeObjectDesc *, newsize, 
cookies_size);
}
+   cookies_size = newsize;
 
return i;
 }
diff --git a/src/backend/optimizer/util/placeholder.c 
b/src/backend/optimizer/util/placeholder.c
index c7bfa293c9..e40b3a12fd 100644
--- a/src/backend/optimizer/util/placeholder.c
+++ b/src/backend/optimizer/util/placeholder.c
@@ -133,16 +133,11 @@ find_placeholder_info(PlannerInfo *root, PlaceHolderVar 
*phv)
while (phinfo->phid >= new_size)
new_size *= 2;
if (root->placeholder_array)
-   {
-   root->placeholder_array = (PlaceHolderInfo **)
-   repalloc(root->placeholder_array,
-sizeof(PlaceHolderInfo *) * 
new_size);
-   MemSet(root->placeholder_array + 
root->placeholder_array_size, 0,
-  sizeof(PlaceHolderInfo *) * (new_size - 
root->placeholder_array_size));
-   }
+   root->placeholder_array =
+   repalloc0_array(root->placeholder_array, 
PlaceHolderInfo *, new_size, root->placeholder_array_size);
else
-   root->placeholder_array = (PlaceHolderInfo **)
-   palloc0(new_size * sizeof(PlaceHolderInfo *));
+   root->placeholder_array =
+   palloc0_array(PlaceHolderInfo *, new_size);
root->placeholder_array_size = new_size;
}
root->placeholder_array[phinfo->phid] = phinfo;
diff --git a/src/backend/optimizer/util/relnode.c 
b/src/backend/optimizer/util/relnode.c
index edcdd0a360..3ea40ff1b6 100644
--- a/src/b

Re: Expand palloc/pg_malloc API

2022-09-13 Thread Tom Lane
Peter Eisentraut  writes:
> I have another little idea that fits well here: repalloc0 and 
> repalloc0_array.  These zero out the space added by repalloc.  This is a 
> common pattern in the backend code that is quite hairy to code by hand. 
> See attached patch.

+1 in general --- you've put your finger on something I felt was
missing, but couldn't quite identify.

However, I'm a bit bothered by the proposed API:

+extern pg_nodiscard void *repalloc0(void *pointer, Size size, Size oldsize);

It kind of feels that the argument order should be pointer, oldsize, size.
It feels even more strongly that people will get the ordering wrong,
whichever we choose.  Is there a way to make that more bulletproof?

The only thought that comes to mind offhand is that the only plausible
use-case is with size >= oldsize, so maybe an assertion or even a
runtime check would help to catch getting it backwards.  (I notice
that your proposed coding will fail rather catastrophically if the
caller gets it backwards.  An assertion failure would be better.)

regards, tom lane




Re: Expand palloc/pg_malloc API

2022-09-13 Thread Tom Lane
I wrote:
> It kind of feels that the argument order should be pointer, oldsize, size.
> It feels even more strongly that people will get the ordering wrong,
> whichever we choose.  Is there a way to make that more bulletproof?

Actually ... an even-more-terrifyingly-plausible misuse is that the
supplied oldsize is different from the actual previous allocation.
We should try to check that.  In MEMORY_CONTEXT_CHECKING builds
it should be possible to assert that oldsize == requested_size.
We don't have that data if !MEMORY_CONTEXT_CHECKING, but we could
at least assert that oldsize <= allocated chunk size.

regards, tom lane




Re: Expand palloc/pg_malloc API

2022-07-26 Thread Tom Lane
Peter Eisentraut  writes:
> On 17.05.22 20:43, Tom Lane wrote:
>> So I think Peter's got a good idea here (I might quibble with the details
>> of some of these macros).  But it's not really going to move the
>> safety goalposts very far unless we make a concerted effort to make
>> these be the style everywhere.  Are we willing to do that?  What
>> will it do to back-patching difficulty?  Dare I suggest back-patching
>> these changes?

> I think it could go like the castNode() introduction: first we adopt it 
> sporadically for new code, then we change over some larger pieces of 
> code, then we backpatch the API, then someone sends in a big patch to 
> change the rest.

OK, that seems like a reasonable plan.

I've now studied this a little more closely, and I think the
functionality is fine, but I have naming quibbles.

1. Do we really want distinct names for the frontend and backend
versions of the macros?  Particularly since you're layering the
frontend ones over pg_malloc, which has exit-on-OOM behavior?
I think we've found that notational discrepancies between frontend
and backend code are generally more a hindrance than a help,
so I'm inclined to drop the pg_malloc_xxx macros and just use
"palloc"-based names across the board.

2. I don't like the "palloc_ptrtype" name at all.  I see that you
borrowed that name from talloc, but I doubt that's a precedent that
very many people are familiar with.  To me it sounds like it might
allocate something that's the size of a pointer, not the size of the
pointed-to object.  I have to confess though that I don't have an
obviously better name to suggest.  "palloc_pointed_to" would be
clear perhaps, but it's kind of long.

3. Likewise, "palloc_obj" is perhaps less clear than it could be.
I find palloc_array just fine though.  Maybe palloc_object or
palloc_struct?  (If "_obj" can be traced to talloc, I'm not
seeing where.)


One thought that comes to mind is that palloc_ptrtype is almost
surely going to be used in the style

myvariable = palloc_ptrtype(myvariable);

and if it's not that it's very likely wrong.  So maybe we should cut
out the middleman and write something like

#define palloc_instantiate(ptr) ((ptr) = (typeof(ptr)) palloc(sizeof(*(ptr
...
palloc_instantiate(myvariable);

I'm not wedded to "instantiate" here, there's probably better names.

regards, tom lane




Re: Expand palloc/pg_malloc API

2022-07-26 Thread David G. Johnston
On Tue, Jul 26, 2022 at 2:32 PM Tom Lane  wrote:

>
> 2. I don't like the "palloc_ptrtype" name at all.  I see that you
> borrowed that name from talloc, but I doubt that's a precedent that
> very many people are familiar with.



> To me it sounds like it might
> allocate something that's the size of a pointer, not the size of the
> pointed-to object.  I have to confess though that I don't have an
> obviously better name to suggest.  "palloc_pointed_to" would be
> clear perhaps, but it's kind of long.
>

I agree that ptrtype reads "the type of a pointer".

This may not be a C-idiom but the pointed-to thing is a "reference" (hence
pass by value vs pass by reference).  So:

palloc_ref(myvariablepointer)

will allocate using the type of the referenced object.  Just like _array
and _obj, which name the thing being used as a size template as opposed to
instantiate which seems more like another word for "allocate/palloc".

David J.
P.S.

Admittedly I'm still getting my head around reading pointer-using code (I
get the general concept but haven't had to code them)

- lockrelid = palloc(sizeof(*lockrelid));
+ lockrelid = palloc_ptrtype(lockrelid);

// This definitely seems like an odd idiom until I remembered about
short-lived memory contexts and the lost pointers are soon destroyed there.

So lockrelid (no star) is a pointer that has an underlying reference that
the macro (and the orignal code) resolves via the *

I cannot reason out whether the following would be equivalent to the above:

lockrelid = palloc_obj(*lockrelid);

I assume not because:  typeof(lockrelid) != (*lockrelid *)


Re: Expand palloc/pg_malloc API

2022-05-17 Thread Bharath Rupireddy
On Tue, May 17, 2022 at 5:11 PM Peter Eisentraut
 wrote:
>
> This adds additional variants of palloc, pg_malloc, etc. that
> encapsulate common usage patterns and provide more type safety.
>
> Examples:
>
> -   result = (IndexBuildResult *) palloc(sizeof(IndexBuildResult));
> +   result = palloc_obj(IndexBuildResult);
>
> -   collector->tuples = (IndexTuple *) palloc(sizeof(IndexTuple) *
>collector->lentuples);
> +   collector->tuples = palloc_array(IndexTuple, collector->lentuples);
>
> One common point is that the new interfaces all have a return type that
> automatically matches what they are allocating, so you don't need any
> casts nor have to manually make sure the size matches the expected
> result.  Besides the additional safety, the notation is also more
> compact, as you can see above.
>
> Inspired by the talloc library.
>
> The interesting changes are in fe_memutils.h and palloc.h.  The rest of
> the patch is just randomly sprinkled examples to test/validate the new
> additions.

It seems interesting. Are we always type-casting explicitly the output
of palloc/palloc0? Does this mean the compiler takes care of
type-casting the returned void * to the target type?

I see lots of instances where there's no explicit type-casting to the
target variable type -
retval = palloc(sizeof(GISTENTRY));
Interval   *p = palloc(sizeof(Interval));
macaddr*v = palloc0(sizeof(macaddr)); and so on.

Regards,
Bharath Rupireddy.




Re: Expand palloc/pg_malloc API

2022-05-17 Thread Tom Lane
Bharath Rupireddy  writes:
> On Tue, May 17, 2022 at 5:11 PM Peter Eisentraut
>  wrote:
>> This adds additional variants of palloc, pg_malloc, etc. that
>> encapsulate common usage patterns and provide more type safety.

> I see lots of instances where there's no explicit type-casting to the
> target variable type -
> retval = palloc(sizeof(GISTENTRY));
> Interval   *p = palloc(sizeof(Interval));
> macaddr*v = palloc0(sizeof(macaddr)); and so on.

Yeah.  IMO the first of those is very poor style, because there's
basically nothing enforcing that you wrote the right thing in sizeof().
The others are a bit safer, in that at least a human can note that
the two types mentioned on the same line match --- but I doubt any
compiler would detect it if they don't.  Our current preferred style

 Interval   *p = (Interval *) palloc(sizeof(Interval));

is really barely an improvement, because only two of the three types
mentioned are going to be checked against each other.

So I think Peter's got a good idea here (I might quibble with the details
of some of these macros).  But it's not really going to move the
safety goalposts very far unless we make a concerted effort to make
these be the style everywhere.  Are we willing to do that?  What
will it do to back-patching difficulty?  Dare I suggest back-patching
these changes?

regards, tom lane




Re: Expand palloc/pg_malloc API

2022-05-24 Thread Peter Eisentraut

On 17.05.22 20:43, Tom Lane wrote:

So I think Peter's got a good idea here (I might quibble with the details
of some of these macros).  But it's not really going to move the
safety goalposts very far unless we make a concerted effort to make
these be the style everywhere.  Are we willing to do that?  What
will it do to back-patching difficulty?  Dare I suggest back-patching
these changes?


I think it could go like the castNode() introduction: first we adopt it 
sporadically for new code, then we change over some larger pieces of 
code, then we backpatch the API, then someone sends in a big patch to 
change the rest.