Use Generation memory contexts to store tuples in sorts
authorDavid Rowley <drowley@postgresql.org>
Mon, 4 Apr 2022 10:52:35 +0000 (22:52 +1200)
committerDavid Rowley <drowley@postgresql.org>
Mon, 4 Apr 2022 10:52:35 +0000 (22:52 +1200)
The general usage pattern when we store tuples in tuplesort.c is that
we store a series of tuples one by one then either perform a sort or spill
them to disk.  In the common case, there is no pfreeing of already stored
tuples.  For the common case since we do not individually pfree tuples, we
have very little need for aset.c memory allocation behavior which
maintains freelists and always rounds allocation sizes up to the next
power of 2 size.

Here we conditionally use generation.c contexts for storing tuples in
tuplesort.c when the sort will never be bounded.  Unfortunately, the
memory context to store tuples is already created by the time any calls
would be made to tuplesort_set_bound(), so here we add a new sort option
that allows callers to specify if they're going to need a bounded sort or
not.  We'll use a standard aset.c allocator when this sort option is not
set.

Extension authors must ensure that the TUPLESORT_ALLOWBOUNDED flag is
used when calling tuplesort_begin_* for any sorts that make a call to
tuplesort_set_bound().

Author: David Rowley
Reviewed-by: Andy Fan
Discussion: https://postgr.es/m/CAApHDvoH4ASzsAOyHcxkuY01Qf++8JJ0paw+03dk+W25tQEcNQ@mail.gmail.com

src/backend/executor/nodeIncrementalSort.c
src/backend/executor/nodeSort.c
src/backend/utils/sort/tuplesort.c
src/include/utils/tuplesort.h

index 4f50bc845dac76a57905a572b977405baa4a0e00..d1b97d46bc036b5570864d4c80db527a619ff164 100644 (file)
@@ -315,7 +315,7 @@ switchToPresortedPrefixMode(PlanState *pstate)
                                                &(plannode->sort.nullsFirst[nPresortedCols]),
                                                work_mem,
                                                NULL,
-                                               TUPLESORT_NONE);
+                                               node->bounded ? TUPLESORT_ALLOWBOUNDED : TUPLESORT_NONE);
        node->prefixsort_state = prefixsort_state;
    }
    else
@@ -616,6 +616,8 @@ ExecIncrementalSort(PlanState *pstate)
                                                  plannode->sort.nullsFirst,
                                                  work_mem,
                                                  NULL,
+                                                 node->bounded ?
+                                                 TUPLESORT_ALLOWBOUNDED :
                                                  TUPLESORT_NONE);
            node->fullsort_state = fullsort_state;
        }
index a113d737955dd7bdfe253b7b825570f6bb672d16..3c28d60c3ef302db5a73842bf7f1bc973ef550f3 100644 (file)
@@ -99,6 +99,8 @@ ExecSort(PlanState *pstate)
 
        if (node->randomAccess)
            tuplesortopts |= TUPLESORT_RANDOMACCESS;
+       if (node->bounded)
+           tuplesortopts |= TUPLESORT_ALLOWBOUNDED;
 
        if (node->datumSort)
            tuplesortstate = tuplesort_begin_datum(TupleDescAttr(tupDesc, 0)->atttypid,
index a8a5cc52047dc5d6971b9ee5272513488b2e6ebf..571fb95532735cae5f675779beea4fff45374f59 100644 (file)
@@ -935,11 +935,21 @@ tuplesort_begin_batch(Tuplesortstate *state)
     * eases memory management.  Resetting at key points reduces
     * fragmentation. Note that the memtuples array of SortTuples is allocated
     * in the parent context, not this context, because there is no need to
-    * free memtuples early.
+    * free memtuples early.  For bounded sorts, tuples may be pfreed in any
+    * order, so we use a regular aset.c context so that it can make use of
+    * free'd memory.  When the sort is not bounded, we make use of a
+    * generation.c context as this keeps allocations more compact with less
+    * wastage.  Allocations are also slightly more CPU efficient.
     */
-   state->tuplecontext = AllocSetContextCreate(state->sortcontext,
-                                               "Caller tuples",
-                                               ALLOCSET_DEFAULT_SIZES);
+   if (state->sortopt & TUPLESORT_ALLOWBOUNDED)
+       state->tuplecontext = AllocSetContextCreate(state->sortcontext,
+                                                   "Caller tuples",
+                                                   ALLOCSET_DEFAULT_SIZES);
+   else
+       state->tuplecontext = GenerationContextCreate(state->sortcontext,
+                                                     "Caller tuples",
+                                                     ALLOCSET_DEFAULT_SIZES);
+
 
    state->status = TSS_INITIAL;
    state->bounded = false;
@@ -1444,6 +1454,8 @@ tuplesort_set_bound(Tuplesortstate *state, int64 bound)
 {
    /* Assert we're called before loading any tuples */
    Assert(state->status == TSS_INITIAL && state->memtupcount == 0);
+   /* Assert we allow bounded sorts */
+   Assert(state->sortopt & TUPLESORT_ALLOWBOUNDED);
    /* Can't set the bound twice, either */
    Assert(!state->bounded);
    /* Also, this shouldn't be called in a parallel worker */
index 345f4ce8024df729a2073d554753ebf99f482809..364cf132fcb63638a7d55029d07bba8c6e407934 100644 (file)
@@ -92,6 +92,9 @@ typedef enum
 /* specifies whether non-sequential access to the sort result is required */
 #define    TUPLESORT_RANDOMACCESS          (1 << 0)
 
+/* specifies if the tuplesort is able to support bounded sorts */
+#define TUPLESORT_ALLOWBOUNDED         (1 << 1)
+
 typedef struct TuplesortInstrumentation
 {
    TuplesortMethod sortMethod; /* sort algorithm used */