亚洲激情专区-91九色丨porny丨老师-久久久久久久女国产乱让韩-国产精品午夜小视频观看

溫馨提示×

溫馨提示×

您好,登錄后才能下訂單哦!

密碼登錄×
登錄注冊×
其他方式登錄
點擊 登錄注冊 即表示同意《億速云用戶服務條款》

PostgreSQL 源碼解讀(181)- 查詢#97(聚合函數#2-ExecInitAgg)

發布時間:2020-08-09 22:17:32 來源:ITPUB博客 閱讀:331 作者:husthxd 欄目:關系型數據庫

本節簡單介紹了PostgreSQL執行聚合函數時的初始化工作,主要實現函數是ExecInitAgg.

一、數據結構

AggState
聚合函數執行時狀態結構體,內含AggStatePerAgg等結構體


/* ---------------------
 *    AggState information
 *
 *    ss.ss_ScanTupleSlot refers to output of underlying plan.
 *  ss.ss_ScanTupleSlot指的是基礎計劃的輸出.
 *    (ss = ScanState,ps = PlanState)
 *
 *    Note: ss.ps.ps_ExprContext contains ecxt_aggvalues and
 *    ecxt_aggnulls arrays, which hold the computed agg values for the current
 *    input group during evaluation of an Agg node's output tuple(s).  We
 *    create a second ExprContext, tmpcontext, in which to evaluate input
 *    expressions and run the aggregate transition functions.
 *    注意:ss.ps.ps_ExprContext包含了ecxt_aggvalues和ecxt_aggnulls數組,
 *      這兩個數組保存了在計算agg節點的輸出元組時當前輸入組已計算的agg值.
 * ---------------------
 */
/* these structs are private in nodeAgg.c: */
//在nodeAgg.c中私有的結構體
typedef struct AggStatePerAggData *AggStatePerAgg;
typedef struct AggStatePerTransData *AggStatePerTrans;
typedef struct AggStatePerGroupData *AggStatePerGroup;
typedef struct AggStatePerPhaseData *AggStatePerPhase;
typedef struct AggStatePerHashData *AggStatePerHash;
typedef struct AggState
{
    //第一個字段是NodeTag(繼承自ScanState)
    ScanState    ss;                /* its first field is NodeTag */
    //targetlist和quals中所有的Aggref
    List       *aggs;            /* all Aggref nodes in targetlist & quals */
    //鏈表的大小(可以為0)
    int            numaggs;        /* length of list (could be zero!) */
    //pertrans條目大小
    int            numtrans;        /* number of pertrans items */
    //Agg策略模式
    AggStrategy aggstrategy;    /* strategy mode */
    //agg-splitting模式,參見nodes.h
    AggSplit    aggsplit;        /* agg-splitting mode, see nodes.h */
    //指向當前步驟數據的指針
    AggStatePerPhase phase;        /* pointer to current phase data */
    //步驟數(包括0)
    int            numphases;        /* number of phases (including phase 0) */
    //當前步驟
    int            current_phase;    /* current phase number */
    //per-Aggref信息
    AggStatePerAgg peragg;        /* per-Aggref information */
    //per-Trans狀態信息
    AggStatePerTrans pertrans;    /* per-Trans state information */
    //長生命周期數據的ExprContexts(hashtable)
    ExprContext *hashcontext;    /* econtexts for long-lived data (hashtable) */
    ////長生命周期數據的ExprContexts(每一個GS使用)
    ExprContext **aggcontexts;    /* econtexts for long-lived data (per GS) */
    //輸入表達式的ExprContext
    ExprContext *tmpcontext;    /* econtext for input expressions */
#define FIELDNO_AGGSTATE_CURAGGCONTEXT 14
    //當前活躍的aggcontext
    ExprContext *curaggcontext; /* currently active aggcontext */
    //當前活躍的aggregate(如存在)
    AggStatePerAgg curperagg;    /* currently active aggregate, if any */
#define FIELDNO_AGGSTATE_CURPERTRANS 16
    //當前活躍的trans state
    AggStatePerTrans curpertrans;    /* currently active trans state, if any */
    //輸入結束?
    bool        input_done;        /* indicates end of input */
    //Agg掃描結束?
    bool        agg_done;        /* indicates completion of Agg scan */
    //最后一個grouping set
    int            projected_set;    /* The last projected grouping set */
#define FIELDNO_AGGSTATE_CURRENT_SET 20
    //將要解析的當前grouping set
    int            current_set;    /* The current grouping set being evaluated */
    //當前投影操作的分組列
    Bitmapset  *grouped_cols;    /* grouped cols in current projection */
    //倒序的分組列鏈表
    List       *all_grouped_cols;    /* list of all grouped cols in DESC order */
    /* These fields are for grouping set phase data */
    //-------- 下面的列用于grouping set步驟數據
    //所有步驟中最大的sets大小
    int            maxsets;        /* The max number of sets in any phase */
    //所有步驟的數組
    AggStatePerPhase phases;    /* array of all phases */
    //對于phases > 1,已排序的輸入信息
    Tuplesortstate *sort_in;    /* sorted input to phases > 1 */
    //對于下一個步驟,輸入已拷貝
    Tuplesortstate *sort_out;    /* input is copied here for next phase */
    //排序結果的slot
    TupleTableSlot *sort_slot;    /* slot for sort results */
    /* these fields are used in AGG_PLAIN and AGG_SORTED modes: */
    //------- 下面的列用于AGG_PLAIN和AGG_SORTED模式:
    //per-group指針的grouping set編號數組
    AggStatePerGroup *pergroups;    /* grouping set indexed array of per-group
                                     * pointers */
    //當前組的第一個元組拷貝
    HeapTuple    grp_firstTuple; /* copy of first tuple of current group */
    /* these fields are used in AGG_HASHED and AGG_MIXED modes: */
    //--------- 下面的列用于AGG_HASHED和AGG_MIXED模式:
    //是否已填充hash表?
    bool        table_filled;    /* hash table filled yet? */
    //hash桶數?
    int            num_hashes;
    //相應的哈希表數據數組
    AggStatePerHash perhash;    /* array of per-hashtable data */
    //per-group指針的grouping set編號數組
    AggStatePerGroup *hash_pergroup;    /* grouping set indexed array of
                                         * per-group pointers */
    /* support for evaluation of agg input expressions: */
    //---------- agg輸入表達式解析支持
#define FIELDNO_AGGSTATE_ALL_PERGROUPS 34
    //首先是->pergroups,然后是hash_pergroup
    AggStatePerGroup *all_pergroups;    /* array of first ->pergroups, than
                                         * ->hash_pergroup */
    //投影實現機制
    ProjectionInfo *combinedproj;    /* projection machinery */
} AggState;
/* Primitive options supported by nodeAgg.c: */
//nodeag .c支持的基本選項
#define AGGSPLITOP_COMBINE        0x01    /* substitute combinefn for transfn */
#define AGGSPLITOP_SKIPFINAL    0x02    /* skip finalfn, return state as-is */
#define AGGSPLITOP_SERIALIZE    0x04    /* apply serializefn to output */
#define AGGSPLITOP_DESERIALIZE    0x08    /* apply deserializefn to input */
/* Supported operating modes (i.e., useful combinations of these options): */
//支持的操作模式
typedef enum AggSplit
{
    /* Basic, non-split aggregation: */
    //基本 : 非split聚合
    AGGSPLIT_SIMPLE = 0,
    /* Initial phase of partial aggregation, with serialization: */
    //部分聚合的初始步驟,序列化
    AGGSPLIT_INITIAL_SERIAL = AGGSPLITOP_SKIPFINAL | AGGSPLITOP_SERIALIZE,
    /* Final phase of partial aggregation, with deserialization: */
    //部分聚合的最終步驟,反序列化
    AGGSPLIT_FINAL_DESERIAL = AGGSPLITOP_COMBINE | AGGSPLITOP_DESERIALIZE
} AggSplit;
/* Test whether an AggSplit value selects each primitive option: */
//測試AggSplit選擇了哪些基本選項
#define DO_AGGSPLIT_COMBINE(as)        (((as) & AGGSPLITOP_COMBINE) != 0)
#define DO_AGGSPLIT_SKIPFINAL(as)    (((as) & AGGSPLITOP_SKIPFINAL) != 0)
#define DO_AGGSPLIT_SERIALIZE(as)    (((as) & AGGSPLITOP_SERIALIZE) != 0)
#define DO_AGGSPLIT_DESERIALIZE(as) (((as) & AGGSPLITOP_DESERIALIZE) != 0)

二、源碼解讀

ExecInitAgg為優化器生成的agg節點創建運行期信息并初始化outer子樹(左樹).
其主要實現邏輯如下:
1.初始化AggState結構體
2.計算分為幾個階段(Hash vs Group)
3.如存在grouping set,則初始化相關信息
4.分配內存上下文
5.初始化outer plan子節點
6.初始化結果類型,slot和投影
7.初始化子表達式
8.為AggStatePerPhaseData等結構體分配內存
9.循環遍歷各個階段
9.1計算分組列,存儲在phasedata->grouped_cols數組和all_grouped_cols中
9.2初始化AggState->phases數組(數組元素對應的結構體為AggStatePerPhase)
9.3初始化AggState->perhash數組(對應的結構體為AggStatePerHash)
10.轉換all_grouped_cols為倒序鏈表
11.在輸出expr上下文中設置aggregate-result存儲,同時分配私有per-agg工作存儲
12.如使用Hash算法,則調用find_hash_columns和build_hash_table方法初始化相關數據
13.調用initialize_phase/select_current_set初始化階段數據
14.檢索聚合函數信息,初始化per-agg和per-trans數據不可變字段
15.構建一次就完成所有轉換工作的表達式.


/* -----------------
 * ExecInitAgg
 *
 *    Creates the run-time information for the agg node produced by the
 *    planner and initializes its outer subtree.
 *  為優化器生成的agg節點創建運行期信息并初始化outer子樹(左樹).
 *
 * -----------------
 */
AggState *
ExecInitAgg(Agg *node, EState *estate, int eflags)
{
    AggState   *aggstate;//AggState結構體指針
    AggStatePerAgg peraggs;//AggStatePerAggData結構體指針
    AggStatePerTrans pertransstates;//聚合狀態值信息
    AggStatePerGroup *pergroups;//per-aggregate-per-group工作狀態
    Plan       *outerPlan;//outer計劃(左樹)
    ExprContext *econtext;//內存上下文
    TupleDesc    scanDesc;//掃描描述器
    int            numaggs,//agg個數
                transno,//轉換器
                aggno;
    int            phase;//階段
    int            phaseidx;//階段編號
    ListCell   *l;//臨時變量
    Bitmapset  *all_grouped_cols = NULL;//分組列集合
    int            numGroupingSets = 1;//Grouping Sets數
    int            numPhases;//階段數
    int            numHashes;//
    int            i = 0;
    int            j = 0;
    bool        use_hashing = (node->aggstrategy == AGG_HASHED ||
                               node->aggstrategy == AGG_MIXED);//是否使用Hash聚合算法
    /* check for unsupported flags */
    //檢查驗證
    Assert(!(eflags & (EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK)));
    /*
     * create state structure
     * 創建AggState結構體
     */
    aggstate = makeNode(AggState);
    aggstate->ss.ps.plan = (Plan *) node;
    aggstate->ss.ps.state = estate;
    aggstate->ss.ps.ExecProcNode = ExecAgg;
    aggstate->aggs = NIL;
    aggstate->numaggs = 0;
    aggstate->numtrans = 0;
    aggstate->aggstrategy = node->aggstrategy;
    aggstate->aggsplit = node->aggsplit;
    aggstate->maxsets = 0;
    aggstate->projected_set = -1;
    aggstate->current_set = 0;
    aggstate->peragg = NULL;
    aggstate->pertrans = NULL;
    aggstate->curperagg = NULL;
    aggstate->curpertrans = NULL;
    aggstate->input_done = false;
    aggstate->agg_done = false;
    aggstate->pergroups = NULL;
    aggstate->grp_firstTuple = NULL;
    aggstate->sort_in = NULL;
    aggstate->sort_out = NULL;
    /*
     * phases[0] always exists, but is dummy in sorted/plain mode
     * phases[0]通常都會存在,在sorted/plain模式下其實是"虛擬"的.
     */
    numPhases = (use_hashing ? 1 : 2);
    numHashes = (use_hashing ? 1 : 0);
    /*
     * Calculate the maximum number of grouping sets in any phase; this
     * determines the size of some allocations.  Also calculate the number of
     * phases, since all hashed/mixed nodes contribute to only a single phase.
     * 在所有階段中計算最大的grouping sets個數.
     * 這決定了某些內存分配的大小.同時,計算階段數,因為所有的hashed/mixed節點只在一個階段中.
     */
    if (node->groupingSets)
    {
        //存在grouping sets
        numGroupingSets = list_length(node->groupingSets);
        foreach(l, node->chain)
        {
            Agg           *agg = lfirst(l);
            numGroupingSets = Max(numGroupingSets,
                                  list_length(agg->groupingSets));
            /*
             * additional AGG_HASHED aggs become part of phase 0, but all
             * others add an extra phase.
             */
            if (agg->aggstrategy != AGG_HASHED)
                ++numPhases;
            else
                ++numHashes;
        }
    }
    //賦值
    aggstate->maxsets = numGroupingSets;
    aggstate->numphases = numPhases;
    aggstate->aggcontexts = (ExprContext **)
        palloc0(sizeof(ExprContext *) * numGroupingSets);
    /*
     * Create expression contexts.  We need three or more, one for
     * per-input-tuple processing, one for per-output-tuple processing, one
     * for all the hashtables, and one for each grouping set.  The per-tuple
     * memory context of the per-grouping-set ExprContexts (aggcontexts)
     * replaces the standalone memory context formerly used to hold transition
     * values.  We cheat a little by using ExecAssignExprContext() to build
     * all of them.
     * 創建表達式上下文.起碼需要三個,一個用于per-input-tuple處理,
     *   一個用于per-output-tuple處理,另外一個用于每個grouping set.
     * per-grouping-set ExprContexts (aggcontexts)的per-tuple內存上下文會替換
     *   原來用于保存轉換值的獨立內存上下文.
     *
     * NOTE: the details of what is stored in aggcontexts and what is stored
     * in the regular per-query memory context are driven by a simple
     * decision: we want to reset the aggcontext at group boundaries (if not
     * hashing) and in ExecReScanAgg to recover no-longer-wanted space.
     * 注意:存儲在aggcontexts和per-query內存上下文中的數據具體是什么取決于:
         我們希望在組邊界(非hashing)重置aggcontext以及在ExecReScanAgg中恢復不再期望的空間
     */
    //分配內存上下文
    ExecAssignExprContext(estate, &aggstate->ss.ps);
    aggstate->tmpcontext = aggstate->ss.ps.ps_ExprContext;
    for (i = 0; i < numGroupingSets; ++i)
    {
        ExecAssignExprContext(estate, &aggstate->ss.ps);
        aggstate->aggcontexts[i] = aggstate->ss.ps.ps_ExprContext;
    }
    if (use_hashing)
    {
        ExecAssignExprContext(estate, &aggstate->ss.ps);
        aggstate->hashcontext = aggstate->ss.ps.ps_ExprContext;
    }
    ExecAssignExprContext(estate, &aggstate->ss.ps);
    /*
     * Initialize child nodes.
     * 初始化子節點
     *
     * If we are doing a hashed aggregation then the child plan does not need
     * to handle REWIND efficiently; see ExecReScanAgg.
     * 如果使用Hash聚合算法,子計劃不需要REWIND,詳細參考ExecReScanAgg.
     */
    if (node->aggstrategy == AGG_HASHED)
        eflags &= ~EXEC_FLAG_REWIND;
    //獲取outerPlan
    outerPlan = outerPlan(node);
    //初始化outerPlan
    outerPlanState(aggstate) = ExecInitNode(outerPlan, estate, eflags);
    /*
     * initialize source tuple type.
     * 初始化源元組類型
     */
    ExecCreateScanSlotFromOuterPlan(estate, &aggstate->ss);
    scanDesc = aggstate->ss.ss_ScanTupleSlot->tts_tupleDescriptor;
    if (node->chain)
        aggstate->sort_slot = ExecInitExtraTupleSlot(estate, scanDesc);
    /*
     * Initialize result type, slot and projection.
     * 初始化結果類型,slot和投影
     */
    ExecInitResultTupleSlotTL(estate, &aggstate->ss.ps);
    ExecAssignProjectionInfo(&aggstate->ss.ps, NULL);
    /*
     * initialize child expressions
     * 初始化子表達式
     *
     * We expect the parser to have checked that no aggs contain other agg
     * calls in their arguments (and just to be sure, we verify it again while
     * initializing the plan node).  This would make no sense under SQL
     * semantics, and it's forbidden by the spec.  Because it is true, we
     * don't need to worry about evaluating the aggs in any particular order.
     * 我們期望解析器已經檢查過參數中沒有agg包含其他agg調用(在初始化計劃節點時已驗證,這次再次確認)
     * 這在SQL語義下沒有意義,而且SQL規范禁止這樣做.
     * 因為這是真的話,將不需要擔心以任何特定的順序計算agg。
     *
     * Note: execExpr.c finds Aggrefs for us, and adds their AggrefExprState
     * nodes to aggstate->aggs.  Aggrefs in the qual are found here; Aggrefs
     * in the targetlist are found during ExecAssignProjectionInfo, below.
     * 注意:execExpr.c會幫我們找到Aggrefs,同時添加AggrefExprState節點到aggstate->aggs中.
     * 表達式中的Aggrefs會在這里被檢索到,targetlist中的Aggrefs會在下面的ExecAssignProjectionInfo中被檢索.
     */
    aggstate->ss.ps.qual =
        ExecInitQual(node->plan.qual, (PlanState *) aggstate);
    /*
     * We should now have found all Aggrefs in the targetlist and quals.
     * 現在我們已經檢索了所有在投影列和表達式中的Aggrefs.
     */
    numaggs = aggstate->numaggs;
    Assert(numaggs == list_length(aggstate->aggs));
    /*
     * For each phase, prepare grouping set data and fmgr lookup data for
     * compare functions.  Accumulate all_grouped_cols in passing.
     * 每個階段都需要準備grouping set數據和為對比函數準備fmgr檢索數據.
     * 通過累積所有分組的cols實現.
     */
    //分配內存
    aggstate->phases = palloc0(numPhases * sizeof(AggStatePerPhaseData));
    aggstate->num_hashes = numHashes;
    if (numHashes)
    {
        aggstate->perhash = palloc0(sizeof(AggStatePerHashData) * numHashes);
        aggstate->phases[0].numsets = 0;
        aggstate->phases[0].gset_lengths = palloc(numHashes * sizeof(int));
        aggstate->phases[0].grouped_cols = palloc(numHashes * sizeof(Bitmapset *));
    }
    phase = 0;
    for (phaseidx = 0; phaseidx <= list_length(node->chain); ++phaseidx)
    {
        //------------ 遍歷各個階段
        Agg           *aggnode;
        Sort       *sortnode;
        if (phaseidx > 0)
        {
            //不是第一個階段,從node鏈表中取得節點
            aggnode = list_nth_node(Agg, node->chain, phaseidx - 1);
            //排序節點
            sortnode = castNode(Sort, aggnode->plan.lefttree);
        }
        else
        {
            //第一階段,直接賦值
            aggnode = node;
            sortnode = NULL;
        }
        Assert(phase <= 1 || sortnode);
        if (aggnode->aggstrategy == AGG_HASHED
            || aggnode->aggstrategy == AGG_MIXED)
        {
            //---------- 使用Hash聚合
            //階段數據
            AggStatePerPhase phasedata = &aggstate->phases[0];
            AggStatePerHash perhash;
            Bitmapset  *cols = NULL;
            Assert(phase == 0);
            i = phasedata->numsets++;
            perhash = &aggstate->perhash[i];
            /* phase 0 always points to the "real" Agg in the hash case */
            //使用Hash聚合,階段0通常指向"實際的"Agg
            phasedata->aggnode = node;
            phasedata->aggstrategy = node->aggstrategy;
            /* but the actual Agg node representing this hash is saved here */
            //但表示該Hash的實際的Agg節點保存在這里
            perhash->aggnode = aggnode;
            phasedata->gset_lengths[i] = perhash->numCols = aggnode->numCols;
            //分組列放在集合中
            for (j = 0; j < aggnode->numCols; ++j)
                cols = bms_add_member(cols, aggnode->grpColIdx[j]);
            //存儲在階段數據中
            phasedata->grouped_cols[i] = cols;
            //添加到大集合中
            all_grouped_cols = bms_add_members(all_grouped_cols, cols);
            continue;
        }
        else
        {
            //使用Group聚合
            AggStatePerPhase phasedata = &aggstate->phases[++phase];
            int            num_sets;
            phasedata->numsets = num_sets = list_length(aggnode->groupingSets);
            if (num_sets)
            {
                phasedata->gset_lengths = palloc(num_sets * sizeof(int));
                phasedata->grouped_cols = palloc(num_sets * sizeof(Bitmapset *));
                i = 0;
                foreach(l, aggnode->groupingSets)
                {
                    int            current_length = list_length(lfirst(l));
                    Bitmapset  *cols = NULL;
                    /* planner forces this to be correct */
                    for (j = 0; j < current_length; ++j)
                        cols = bms_add_member(cols, aggnode->grpColIdx[j]);
                    phasedata->grouped_cols[i] = cols;
                    phasedata->gset_lengths[i] = current_length;
                    ++i;
                }
                all_grouped_cols = bms_add_members(all_grouped_cols,
                                                   phasedata->grouped_cols[0]);
            }
            else
            {
                Assert(phaseidx == 0);
                phasedata->gset_lengths = NULL;
                phasedata->grouped_cols = NULL;
            }
            /*
             * If we are grouping, precompute fmgr lookup data for inner loop.
             * 如果使用GroupAggregate,為內循環提前算好fmgr檢索數據
             */
            if (aggnode->aggstrategy == AGG_SORTED)
            {
                int            i = 0;
                Assert(aggnode->numCols > 0);
                /*
                 * Build a separate function for each subset of columns that
                 * need to be compared.
                 * 為每一個需要對比的列子集構建獨立的函數
                 */
                phasedata->eqfunctions =
                    (ExprState **) palloc0(aggnode->numCols * sizeof(ExprState *));
                /* for each grouping set */
                //對于每一個grouping set進行處理
                for (i = 0; i < phasedata->numsets; i++)
                {
                    int            length = phasedata->gset_lengths[i];
                    if (phasedata->eqfunctions[length - 1] != NULL)
                        continue;
                    phasedata->eqfunctions[length - 1] =
                        execTuplesMatchPrepare(scanDesc,
                                               length,
                                               aggnode->grpColIdx,
                                               aggnode->grpOperators,
                                               (PlanState *) aggstate);
                }
                /* and for all grouped columns, unless already computed */
                //處理所有需要分組的列,除非已完成計算
                if (phasedata->eqfunctions[aggnode->numCols - 1] == NULL)
                {
                    phasedata->eqfunctions[aggnode->numCols - 1] =
                        execTuplesMatchPrepare(scanDesc,
                                               aggnode->numCols,
                                               aggnode->grpColIdx,
                                               aggnode->grpOperators,
                                               (PlanState *) aggstate);
                }
            }
            phasedata->aggnode = aggnode;
            phasedata->aggstrategy = aggnode->aggstrategy;
            phasedata->sortnode = sortnode;
        }
    }
    /*
     * Convert all_grouped_cols to a descending-order list.
     * 轉換all_grouped_cols為倒序鏈表
     */
    i = -1;
    while ((i = bms_next_member(all_grouped_cols, i)) >= 0)
        aggstate->all_grouped_cols = lcons_int(i, aggstate->all_grouped_cols);
    /*
     * Set up aggregate-result storage in the output expr context, and also
     * allocate my private per-agg working storage
     * 在輸出expr上下文中設置aggregate-result存儲,同時分配私有per-agg工作存儲
     */
    econtext = aggstate->ss.ps.ps_ExprContext;
    //分配工作空間
    econtext->ecxt_aggvalues = (Datum *) palloc0(sizeof(Datum) * numaggs);
    econtext->ecxt_aggnulls = (bool *) palloc0(sizeof(bool) * numaggs);
    peraggs = (AggStatePerAgg) palloc0(sizeof(AggStatePerAggData) * numaggs);
    pertransstates = (AggStatePerTrans) palloc0(sizeof(AggStatePerTransData) * numaggs);
    aggstate->peragg = peraggs;
    aggstate->pertrans = pertransstates;
    aggstate->all_pergroups =
        (AggStatePerGroup *) palloc0(sizeof(AggStatePerGroup)
                                     * (numGroupingSets + numHashes));
    pergroups = aggstate->all_pergroups;
    if (node->aggstrategy != AGG_HASHED)
    {
        //---------- 使用Group聚合
        for (i = 0; i < numGroupingSets; i++)
        {
            pergroups[i] = (AggStatePerGroup) palloc0(sizeof(AggStatePerGroupData)
                                                      * numaggs);
        }
        aggstate->pergroups = pergroups;
        pergroups += numGroupingSets;
    }
    /*
     * Hashing can only appear in the initial phase.
     * Hashing只會出現在初始階段
     */
    if (use_hashing)
    {
        /* this is an array of pointers, not structures */
        //指針數組,但不是結構體
        aggstate->hash_pergroup = pergroups;
        find_hash_columns(aggstate);
        build_hash_table(aggstate);
        aggstate->table_filled = false;
    }
    /*
     * Initialize current phase-dependent values to initial phase. The initial
     * phase is 1 (first sort pass) for all strategies that use sorting (if
     * hashing is being done too, then phase 0 is processed last); but if only
     * hashing is being done, then phase 0 is all there is.
     * 初始化當前階段依賴值為初始階段.
     * 對于所有使用排序的策略(如果也進行hasing,那么階段0最后處理),初始階段都為1(第一次排序傳遞)
     * 但如果只是進行hashing,那么只有階段0.
     */
    if (node->aggstrategy == AGG_HASHED)
    {
        //Hashing
        aggstate->current_phase = 0;
        initialize_phase(aggstate, 0);
        select_current_set(aggstate, 0, true);
    }
    else
    {
        //非Hashing
        aggstate->current_phase = 1;
        initialize_phase(aggstate, 1);
        select_current_set(aggstate, 0, false);
    }
    /* -----------------
     * Perform lookups of aggregate function info, and initialize the
     * unchanging fields of the per-agg and per-trans data.
     * 檢索聚合函數信息,初始化per-agg和per-trans數據不可變字段
     *
     * We try to optimize by detecting duplicate aggregate functions so that
     * their state and final values are re-used, rather than needlessly being
     * re-calculated independently. We also detect aggregates that are not
     * the same, but which can share the same transition state.
     * 通過檢測重復聚合函數進行優化,以便它們的狀態和最終值可被重用,而不是無必要的重復計算.
     * 同時,我們檢測那些不一樣但可以共享轉換狀態的聚合.
     *
     * Scenarios:
     * 場景如下:
     * 
     * 1. Identical aggregate function calls appear in the query:
     *
     *      SELECT SUM(x) FROM ... HAVING SUM(x) > 0
     *
     *      Since these aggregates are identical, we only need to calculate
     *      the value once. Both aggregates will share the same 'aggno' value.
     *
     * 1. 查詢中出現相同的聚合函數調用:
     *     SELECT SUM(x) FROM ... HAVING SUM(x) > 0
     *     因為聚合是相同的,只需要計算該值一次即可.兩個聚合會共享同一個aggno值.
     *     
     * 2. Two different aggregate functions appear in the query, but the
     *      aggregates have the same arguments, transition functions and
     *      initial values (and, presumably, different final functions):
     *
     *      SELECT AVG(x), STDDEV(x) FROM ...
     *
     *      In this case we must create a new peragg for the varying aggregate,
     *      and we need to call the final functions separately, but we need
     *      only run the transition function once.  (This requires that the
     *      final functions be nondestructive of the transition state, but
     *       that's required anyway for other reasons.)
     * 2. 兩個不同的聚合函數出現在查詢中,但聚合有相同的參數/轉換函數和初始值(大概還有不同的最終函數):
     *      SELECT AVG(x), STDDEV(x) FROM ...
     *    在這種情況下,必須為不同的聚合創建新的peragg,同時需要單獨調用最終函數,
     *      但我們只需要執行轉換一次即可.
     *    (這需要最終函數對過渡狀態不具有破壞性,但由于其他原因,都需要這樣做)
     *
     * For either of these optimizations to be valid, all aggregate properties
     * used in the transition phase must be the same, including any modifiers
     * such as ORDER BY, DISTINCT and FILTER, and the arguments mustn't
     * contain any volatile functions.
     * 想要這兩種優化都起效,所有在轉換階段使用聚合屬性都必須是一樣的,
     *   包括所有修改器比如ORDER BY,DISTINCT和FILTER,同時參數中不能含有易變函數.
     * -----------------
     */
    aggno = -1;
    transno = -1;
    foreach(l, aggstate->aggs)
    {
        AggrefExprState *aggrefstate = (AggrefExprState *) lfirst(l);
        Aggref       *aggref = aggrefstate->aggref;
        AggStatePerAgg peragg;
        AggStatePerTrans pertrans;
        int            existing_aggno;
        int            existing_transno;
        List       *same_input_transnos;
        Oid            inputTypes[FUNC_MAX_ARGS];
        int            numArguments;
        int            numDirectArgs;
        HeapTuple    aggTuple;
        Form_pg_aggregate aggform;
        AclResult    aclresult;
        Oid            transfn_oid,
                    finalfn_oid;
        bool        shareable;
        Oid            serialfn_oid,
                    deserialfn_oid;
        Expr       *finalfnexpr;
        Oid            aggtranstype;
        Datum        textInitVal;
        Datum        initValue;
        bool        initValueIsNull;
        /* Planner should have assigned aggregate to correct level */
        //規劃器已為聚合分配了合適的層次
        Assert(aggref->agglevelsup == 0);
        /* ... and the split mode should match */
        //拆分模式需要匹配
        Assert(aggref->aggsplit == aggstate->aggsplit);
        /* 1. Check for already processed aggs which can be re-used */
        // 1.檢查已完成的aggs是否可以重用.
        existing_aggno = find_compatible_peragg(aggref, aggstate, aggno,
                                                &same_input_transnos);
        if (existing_aggno != -1)
        {
            /*
             * Existing compatible agg found. so just point the Aggref to the
             * same per-agg struct.
             * 發現了兼容的agg,Aggref指向同樣的per-agg結構體即可
             */
            aggrefstate->aggno = existing_aggno;
            continue;
        }
        /* Mark Aggref state node with assigned index in the result array */
        //為Aggref狀態節點分配結果數組中的位置索引
        peragg = &peraggs[++aggno];
        peragg->aggref = aggref;
        aggrefstate->aggno = aggno;
        /* Fetch the pg_aggregate row */
        //提前pg_aggregate中的行(獲取聚合函數信息)
        aggTuple = SearchSysCache1(AGGFNOID,
                                   ObjectIdGetDatum(aggref->aggfnoid));
        if (!HeapTupleIsValid(aggTuple))
            elog(ERROR, "cache lookup failed for aggregate %u",
                 aggref->aggfnoid);
        //轉換為相應的數據結構
        aggform = (Form_pg_aggregate) GETSTRUCT(aggTuple);
        /* Check permission to call aggregate function */
        //檢查訪問權限
        aclresult = pg_proc_aclcheck(aggref->aggfnoid, GetUserId(),
                                     ACL_EXECUTE);
        if (aclresult != ACLCHECK_OK)
            aclcheck_error(aclresult, OBJECT_AGGREGATE,
                           get_func_name(aggref->aggfnoid));
        //調用InvokeFunctionExecuteHook
        InvokeFunctionExecuteHook(aggref->aggfnoid);
        /* planner recorded transition state type in the Aggref itself */
        //規劃器在Aggref中記錄轉換狀態類型
        aggtranstype = aggref->aggtranstype;
        Assert(OidIsValid(aggtranstype));
        /*
         * If this aggregation is performing state combines, then instead of
         * using the transition function, we'll use the combine function
         * 如果該聚合正在執行狀態組合,使用組合函數而不是使用轉換函數
         */
        if (DO_AGGSPLIT_COMBINE(aggstate->aggsplit))
        {
            transfn_oid = aggform->aggcombinefn;
            /* If not set then the planner messed up */
            //如果沒有設置,會報錯
            if (!OidIsValid(transfn_oid))
                elog(ERROR, "combinefn not set for aggregate function");
        }
        else
            transfn_oid = aggform->aggtransfn;
        /* Final function only required if we're finalizing the aggregates */
        //在最后處理聚合時才需要Final function
        if (DO_AGGSPLIT_SKIPFINAL(aggstate->aggsplit))
            peragg->finalfn_oid = finalfn_oid = InvalidOid;
        else
            peragg->finalfn_oid = finalfn_oid = aggform->aggfinalfn;
        /*
         * If finalfn is marked read-write, we can't share transition states;
         * but it is okay to share states for AGGMODIFY_SHAREABLE aggs.  Also,
         * if we're not executing the finalfn here, we can share regardless.
         * 如果finalfn標記為RW,則不需要共享轉換狀態,但可以為AGGMODIFY_SHAREABLE agg共享狀態.
         * 同時,如果不在這里執行finalfn,則可以共享.
         */
        shareable = (aggform->aggfinalmodify != AGGMODIFY_READ_WRITE) ||
            (finalfn_oid == InvalidOid);
        peragg->shareable = shareable;
        serialfn_oid = InvalidOid;
        deserialfn_oid = InvalidOid;
        /*
         * Check if serialization/deserialization is required.  We only do it
         * for aggregates that have transtype INTERNAL.
         * 檢查是否需要序列化/反序列化.
         * 
         */
        if (aggtranstype == INTERNALOID)
        {
            /*
             * The planner should only have generated a serialize agg node if
             * every aggregate with an INTERNAL state has a serialization
             * function.  Verify that.
             * 如果每個有INTERNAL狀態的聚合有一個序列化函數,規劃器應該產生一個序列化agg節點,這里需要檢查!
             */
            if (DO_AGGSPLIT_SERIALIZE(aggstate->aggsplit))
            {
                /* serialization only valid when not running finalfn */
                //在沒有運行finalfn的情況下序列化才有效
                Assert(DO_AGGSPLIT_SKIPFINAL(aggstate->aggsplit));
                if (!OidIsValid(aggform->aggserialfn))
                    elog(ERROR, "serialfunc not provided for serialization aggregation");
                serialfn_oid = aggform->aggserialfn;
            }
            /* Likewise for deserialization functions */
            //反序列化
            if (DO_AGGSPLIT_DESERIALIZE(aggstate->aggsplit))
            {
                /* deserialization only valid when combining states */
                //在組合狀態時才有效
                Assert(DO_AGGSPLIT_COMBINE(aggstate->aggsplit));
                if (!OidIsValid(aggform->aggdeserialfn))
                    elog(ERROR, "deserialfunc not provided for deserialization aggregation");
                deserialfn_oid = aggform->aggdeserialfn;
            }
        }
        /* Check that aggregate owner has permission to call component fns */
        //檢查聚合宿主有權限調用相應的函數
        {
            HeapTuple    procTuple;
            Oid            aggOwner;
            procTuple = SearchSysCache1(PROCOID,
                                        ObjectIdGetDatum(aggref->aggfnoid));
            if (!HeapTupleIsValid(procTuple))
                elog(ERROR, "cache lookup failed for function %u",
                     aggref->aggfnoid);
            aggOwner = ((Form_pg_proc) GETSTRUCT(procTuple))->proowner;
            ReleaseSysCache(procTuple);
            aclresult = pg_proc_aclcheck(transfn_oid, aggOwner,
                                         ACL_EXECUTE);
            if (aclresult != ACLCHECK_OK)
                aclcheck_error(aclresult, OBJECT_FUNCTION,
                               get_func_name(transfn_oid));
            InvokeFunctionExecuteHook(transfn_oid);
            if (OidIsValid(finalfn_oid))
            {
                aclresult = pg_proc_aclcheck(finalfn_oid, aggOwner,
                                             ACL_EXECUTE);
                if (aclresult != ACLCHECK_OK)
                    aclcheck_error(aclresult, OBJECT_FUNCTION,
                                   get_func_name(finalfn_oid));
                InvokeFunctionExecuteHook(finalfn_oid);
            }
            if (OidIsValid(serialfn_oid))
            {
                aclresult = pg_proc_aclcheck(serialfn_oid, aggOwner,
                                             ACL_EXECUTE);
                if (aclresult != ACLCHECK_OK)
                    aclcheck_error(aclresult, OBJECT_FUNCTION,
                                   get_func_name(serialfn_oid));
                InvokeFunctionExecuteHook(serialfn_oid);
            }
            if (OidIsValid(deserialfn_oid))
            {
                aclresult = pg_proc_aclcheck(deserialfn_oid, aggOwner,
                                             ACL_EXECUTE);
                if (aclresult != ACLCHECK_OK)
                    aclcheck_error(aclresult, OBJECT_FUNCTION,
                                   get_func_name(deserialfn_oid));
                InvokeFunctionExecuteHook(deserialfn_oid);
            }
        }
        /*
         * Get actual datatypes of the (nominal) aggregate inputs.  These
         * could be different from the agg's declared input types, when the
         * agg accepts ANY or a polymorphic type.
         * 獲取聚合輸入的實際數據類型.
         * 在agg接受ANY或者多態類型時,這些信息可能與agg聲明的輸入類型不同
         */
        numArguments = get_aggregate_argtypes(aggref, inputTypes);
        /* Count the "direct" arguments, if any */
        //計算"direct"參數類型
        numDirectArgs = list_length(aggref->aggdirectargs);
        /* Detect how many arguments to pass to the finalfn */
        //檢查有多少參數傳遞給finalfn
        if (aggform->aggfinalextra)
            peragg->numFinalArgs = numArguments + 1;
        else
            peragg->numFinalArgs = numDirectArgs + 1;
        /* Initialize any direct-argument expressions */
        //初始化所有直接參數表達式
        peragg->aggdirectargs = ExecInitExprList(aggref->aggdirectargs,
                                                 (PlanState *) aggstate);
        /*
         * build expression trees using actual argument & result types for the
         * finalfn, if it exists and is required.
         * 如存在,則使用finalfn的實際參數和結果類型構建表達式樹.
         */
        if (OidIsValid(finalfn_oid))
        {
            build_aggregate_finalfn_expr(inputTypes,
                                         peragg->numFinalArgs,
                                         aggtranstype,
                                         aggref->aggtype,
                                         aggref->inputcollid,
                                         finalfn_oid,
                                         &finalfnexpr);
            fmgr_info(finalfn_oid, &peragg->finalfn);
            fmgr_info_set_expr((Node *) finalfnexpr, &peragg->finalfn);
        }
        /* get info about the output value's datatype */
        //獲取輸出值數據類型的相關信息.
        get_typlenbyval(aggref->aggtype,
                        &peragg->resulttypeLen,
                        &peragg->resulttypeByVal);
        /*
         * initval is potentially null, so don't try to access it as a struct
         * field. Must do it the hard way with SysCacheGetAttr.
         * initval可能是null,不要嘗試通過結構體域的方式訪問該變量.
         * 通過SysCacheGetAttr訪問.
         */
        textInitVal = SysCacheGetAttr(AGGFNOID, aggTuple,
                                      Anum_pg_aggregate_agginitval,
                                      &initValueIsNull);
        if (initValueIsNull)
            initValue = (Datum) 0;
        else
            initValue = GetAggInitVal(textInitVal, aggtranstype);
        /*
         * 2. Build working state for invoking the transition function, or
         * look up previously initialized working state, if we can share it.
         * 2. 為調用轉換函數創建工作狀態,或者檢索先前已初始化的工作狀態(如可共享).
         *
         * find_compatible_peragg() already collected a list of shareable
         * per-Trans's with the same inputs. Check if any of them have the
         * same transition function and initial value.
         * find_compatible_peragg() 已收集了具備相同輸入的per-Trans共享鏈表.
         * 檢查鏈表中是否存在相同轉換函數和初始值的的per-Trans.
         */
        existing_transno = find_compatible_pertrans(aggstate, aggref,
                                                    shareable,
                                                    transfn_oid, aggtranstype,
                                                    serialfn_oid, deserialfn_oid,
                                                    initValue, initValueIsNull,
                                                    same_input_transnos);
        if (existing_transno != -1)
        {
            /*
             * Existing compatible trans found, so just point the 'peragg' to
             * the same per-trans struct, and mark the trans state as shared.
             * 發現兼容的per-Trans,把peragg指向相同的per-trans結構體,同時標記trans狀態為共享.
             */
            pertrans = &pertransstates[existing_transno];
            pertrans->aggshared = true;
            peragg->transno = existing_transno;
        }
        else
        {
            pertrans = &pertransstates[++transno];
            build_pertrans_for_aggref(pertrans, aggstate, estate,
                                      aggref, transfn_oid, aggtranstype,
                                      serialfn_oid, deserialfn_oid,
                                      initValue, initValueIsNull,
                                      inputTypes, numArguments);
            peragg->transno = transno;
        }
        ReleaseSysCache(aggTuple);
    }
    /*
     * Update aggstate->numaggs to be the number of unique aggregates found.
     * Also set numstates to the number of unique transition states found.
     * 更新aggstate->numaggs變量為唯一的聚合函數個數.
     * 同時設置numstates為唯一的轉換狀態個數.
     */
    aggstate->numaggs = aggno + 1;
    aggstate->numtrans = transno + 1;
    /*
     * Last, check whether any more aggregates got added onto the node while
     * we processed the expressions for the aggregate arguments (including not
     * only the regular arguments and FILTER expressions handled immediately
     * above, but any direct arguments we might've handled earlier).  If so,
     * we have nested aggregate functions, which is semantically nonsensical,
     * so complain.  (This should have been caught by the parser, so we don't
     * need to work hard on a helpful error message; but we defend against it
     * here anyway, just to be sure.)
     * 最后,在處理聚合函數參數表達式時檢查是否有更多的聚合函數添加到節點中.
     * (除了常規參數以及上述馬上被處理的FILTER表達式外,還有所有先前已處理的所有直接參數)
     * 如存在,意味著存在嵌套聚合函數,這在語義上是不可能的,因此提示錯誤.
     * (解析器應該可以處理這周情況,因此不需要執行更多的處理,但為了安全起見,需要在這里檢查)
     */
    if (numaggs != list_length(aggstate->aggs))
        ereport(ERROR,
                (errcode(ERRCODE_GROUPING_ERROR),
                 errmsg("aggregate function calls cannot be nested")));
    /*
     * Build expressions doing all the transition work at once. We build a
     * different one for each phase, as the number of transition function
     * invocation can differ between phases. Note this'll work both for
     * transition and combination functions (although there'll only be one
     * phase in the latter case).
     * 構建一次完成所有轉換工作的表達式.
     * 每個階段構建一個表達式,因為不同的階段轉換函數的調用方式可能不同.
     * 注意這對轉換函數和組合函數同樣有效(盡管在一種情況下只有一個階段)
     */
    for (phaseidx = 0; phaseidx < aggstate->numphases; phaseidx++)
    {
        AggStatePerPhase phase = &aggstate->phases[phaseidx];
        bool        dohash = false;
        bool        dosort = false;
        /* phase 0 doesn't necessarily exist */
        //第一階段可能不存在
        if (!phase->aggnode)
            continue;
        if (aggstate->aggstrategy == AGG_MIXED && phaseidx == 1)
        {
            /*
             * Phase one, and only phase one, in a mixed agg performs both
             * sorting and aggregation.
             * 當且僅當階段1,在mixed agg,執行排序和聚合.
             */
            dohash = true;
            dosort = true;
        }
        else if (aggstate->aggstrategy == AGG_MIXED && phaseidx == 0)
        {
            /*
             * No need to compute a transition function for an AGG_MIXED phase
             * 0 - the contents of the hashtables will have been computed
             * during phase 1.
             * 在AGG_MIXED階段0,不需要計算轉換函數.
             * 哈希表的內容在第1階段已完成計算.
             */
            continue;
        }
        else if (phase->aggstrategy == AGG_PLAIN ||
                 phase->aggstrategy == AGG_SORTED)
        {
            dohash = false;
            dosort = true;
        }
        else if (phase->aggstrategy == AGG_HASHED)
        {
            dohash = true;
            dosort = false;
        }
        else
            Assert(false);
        phase->evaltrans = ExecBuildAggTrans(aggstate, phase, dosort, dohash);
    }
    return aggstate;
}

三、跟蹤分析

測試腳本


//禁用并行
testdb=# set max_parallel_workers_per_gather=0;
SET
testdb=# explain verbose select bh,avg(c1),min(c1),max(c2) from t_agg group by bh;
                                QUERY PLAN                                 
---------------------------------------------------------------------------
 HashAggregate  (cost=13677.00..13677.06 rows=5 width=45)
   Output: bh, avg(c1), min(c1), max(c2)
   Group Key: t_agg.bh
   ->  Seq Scan on public.t_agg  (cost=0.00..8677.00 rows=500000 width=13)
         Output: bh, c1, c2, c3, c4, c5, c6
(5 rows)

跟蹤分析


(gdb) b ExecInitAgg
Breakpoint 1 at 0x6eefc9: file nodeAgg.c, line 2096.
(gdb) c
Continuing.
Breakpoint 1, ExecInitAgg (node=0x2d903a0, estate=0x2d52428, eflags=16) at nodeAgg.c:2096
2096        Bitmapset  *all_grouped_cols = NULL;
(gdb)

輸入參數


(gdb) p *node
$1 = {plan = {type = T_Agg, startup_cost = 13677, total_cost = 13677.0625, plan_rows = 5, plan_width = 45, 
    parallel_aware = false, parallel_safe = false, plan_node_id = 0, targetlist = 0x2d631f8, qual = 0x0, 
    lefttree = 0x2d62cb8, righttree = 0x0, initPlan = 0x0, extParam = 0x0, allParam = 0x0}, aggstrategy = AGG_HASHED, 
  aggsplit = AGGSPLIT_SIMPLE, numCols = 1, grpColIdx = 0x2d62fa8, grpOperators = 0x2d62f88, numGroups = 5, aggParams = 0x0, 
  groupingSets = 0x0, chain = 0x0}
(gdb) p *estate
$2 = {type = T_EState, es_direction = ForwardScanDirection, es_snapshot = 0x2d00b80, es_crosscheck_snapshot = 0x0, 
  es_range_table = 0x2d62ff0, es_plannedstmt = 0x2c72530, 
  es_sourceText = 0x2c70d78 "select bh,avg(c1),min(c1),max(c2) from t_agg group by bh;", es_junkFilter = 0x0, 
  es_output_cid = 0, es_result_relations = 0x0, es_num_result_relations = 0, es_result_relation_info = 0x0, 
  es_root_result_relations = 0x0, es_num_root_result_relations = 0, es_tuple_routing_result_relations = 0x0, 
  es_trig_target_relations = 0x0, es_trig_tuple_slot = 0x0, es_trig_oldtup_slot = 0x0, es_trig_newtup_slot = 0x0, 
  es_param_list_info = 0x0, es_param_exec_vals = 0x0, es_queryEnv = 0x0, es_query_cxt = 0x2d52310, es_tupleTable = 0x0, 
  es_rowMarks = 0x0, es_processed = 0, es_lastoid = 0, es_top_eflags = 16, es_instrument = 0, es_finished = false, 
  es_exprcontexts = 0x0, es_subplanstates = 0x0, es_auxmodifytables = 0x0, es_per_tuple_exprcontext = 0x0, 
  es_epqTuple = 0x0, es_epqTupleSet = 0x0, es_epqScanDone = 0x0, es_use_parallel_mode = false, es_query_dsa = 0x0, 
  es_jit_flags = 0, es_jit = 0x0, es_jit_worker_instr = 0x0}
(gdb)

使用Hash算法計算


(gdb) n
2097        int            numGroupingSets = 1;
(gdb) 
2100        int            i = 0;
(gdb) 
2101        int            j = 0;
(gdb) 
2102        bool        use_hashing = (node->aggstrategy == AGG_HASHED ||
(gdb) 
2106        Assert(!(eflags & (EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK)));
(gdb) p use_hashing
$3 = true
(gdb)

1.初始化AggState結構體


(gdb) n
2111        aggstate = makeNode(AggState);
(gdb) 
2112        aggstate->ss.ps.plan = (Plan *) node;
(gdb) 
2113        aggstate->ss.ps.state = estate;
(gdb) 
2114        aggstate->ss.ps.ExecProcNode = ExecAgg;
(gdb) 
2116        aggstate->aggs = NIL;
(gdb) 
2117        aggstate->numaggs = 0;
(gdb) 
2118        aggstate->numtrans = 0;
(gdb) 
2119        aggstate->aggstrategy = node->aggstrategy;
(gdb) 
2120        aggstate->aggsplit = node->aggsplit;
(gdb) 
2121        aggstate->maxsets = 0;
(gdb) 
2122        aggstate->projected_set = -1;
(gdb) 
2123        aggstate->current_set = 0;
(gdb) 
2124        aggstate->peragg = NULL;
(gdb) 
2125        aggstate->pertrans = NULL;
(gdb) 
2126        aggstate->curperagg = NULL;
(gdb) 
2127        aggstate->curpertrans = NULL;
(gdb) 
2128        aggstate->input_done = false;
(gdb) 
2129        aggstate->agg_done = false;
(gdb) 
2130        aggstate->pergroups = NULL;
(gdb) 
2131        aggstate->grp_firstTuple = NULL;
(gdb) 
2132        aggstate->sort_in = NULL;
(gdb) 
2133        aggstate->sort_out = NULL;
(gdb) 
(gdb) p *aggstate
$4 = {ss = {ps = {type = T_AggState, plan = 0x2d903a0, state = 0x2d52428, ExecProcNode = 0x6ee438 <ExecAgg>, 
      ExecProcNodeReal = 0x0, instrument = 0x0, worker_instrument = 0x0, worker_jit_instrument = 0x0, qual = 0x0, 
      lefttree = 0x0, righttree = 0x0, initPlan = 0x0, subPlan = 0x0, chgParam = 0x0, ps_ResultTupleSlot = 0x0, 
      ps_ExprContext = 0x0, ps_ProjInfo = 0x0, scandesc = 0x0}, ss_currentRelation = 0x0, ss_currentScanDesc = 0x0, 
    ss_ScanTupleSlot = 0x0}, aggs = 0x0, numaggs = 0, numtrans = 0, aggstrategy = AGG_HASHED, aggsplit = AGGSPLIT_SIMPLE, 
  phase = 0x0, numphases = 0, current_phase = 0, peragg = 0x0, pertrans = 0x0, hashcontext = 0x0, aggcontexts = 0x0, 
  tmpcontext = 0x0, curaggcontext = 0x0, curperagg = 0x0, curpertrans = 0x0, input_done = false, agg_done = false, 
  projected_set = -1, current_set = 0, grouped_cols = 0x0, all_grouped_cols = 0x0, maxsets = 0, phases = 0x0, 
  sort_in = 0x0, sort_out = 0x0, sort_slot = 0x0, pergroups = 0x0, grp_firstTuple = 0x0, table_filled = false, 
  num_hashes = 0, perhash = 0x0, hash_pergroup = 0x0, all_pergroups = 0x0, combinedproj = 0x0}
(gdb)

2.計算分為幾個階段(Hash vs Group)


(gdb) 
2138        numPhases = (use_hashing ? 1 : 2);
(gdb) p numPhases
$5 = 1
(gdb) p numHashes
$6 = 1
(gdb)

Hash只需要一個階段,執行Hash

3.如存在grouping set,則初始化相關信息


(gdb) n
2168        aggstate->maxsets = numGroupingSets;

這里沒有grouping set,不需要初始化相關信息

4.分配內存上下文


(gdb) 
2169        aggstate->numphases = numPhases;
(gdb) 
2172            palloc0(sizeof(ExprContext *) * numGroupingSets);
(gdb) 
2171        aggstate->aggcontexts = (ExprContext **)
(gdb) 
2188        ExecAssignExprContext(estate, &aggstate->ss.ps);
(gdb) 
2189        aggstate->tmpcontext = aggstate->ss.ps.ps_ExprContext;
(gdb) 
2191        for (i = 0; i < numGroupingSets; ++i)
(gdb) 
2193            ExecAssignExprContext(estate, &aggstate->ss.ps);
(gdb) 
2194            aggstate->aggcontexts[i] = aggstate->ss.ps.ps_ExprContext;
(gdb) 
2191        for (i = 0; i < numGroupingSets; ++i)
(gdb) 
2197        if (use_hashing)
(gdb) 
2199            ExecAssignExprContext(estate, &aggstate->ss.ps);
(gdb) 
2200            aggstate->hashcontext = aggstate->ss.ps.ps_ExprContext;
(gdb) 
2203        ExecAssignExprContext(estate, &aggstate->ss.ps);
(gdb) 
2211        if (node->aggstrategy == AGG_HASHED)
(gdb) 
2212            eflags &= ~EXEC_FLAG_REWIND;
(gdb)

5.初始化outer plan子節點


(gdb) 
2213        outerPlan = outerPlan(node);
(gdb) n
2214        outerPlanState(aggstate) = ExecInitNode(outerPlan, estate, eflags);
(gdb) p *outerPlan
$7 = {type = T_SeqScan, startup_cost = 0, total_cost = 8677, plan_rows = 500000, plan_width = 13, parallel_aware = false, 
  parallel_safe = false, plan_node_id = 1, targetlist = 0x2d62770, qual = 0x0, lefttree = 0x0, righttree = 0x0, 
  initPlan = 0x0, extParam = 0x0, allParam = 0x0}

outer(左樹)節點為SeqScan,順序全表掃描.

6.初始化結果類型,slot和投影


(gdb) n
2219        ExecCreateScanSlotFromOuterPlan(estate, &aggstate->ss);
(gdb) n
2220        scanDesc = aggstate->ss.ss_ScanTupleSlot->tts_tupleDescriptor;
(gdb) n
2221        if (node->chain)
(gdb) p *aggstate
$8 = {ss = {ps = {type = T_AggState, plan = 0x2d903a0, state = 0x2d52428, ExecProcNode = 0x6ee438 <ExecAgg>, 
      ExecProcNodeReal = 0x0, instrument = 0x0, worker_instrument = 0x0, worker_jit_instrument = 0x0, qual = 0x0, 
      lefttree = 0x2d52bb0, righttree = 0x0, initPlan = 0x0, subPlan = 0x0, chgParam = 0x0, ps_ResultTupleSlot = 0x0, 
      ps_ExprContext = 0x2d52af0, ps_ProjInfo = 0x0, scandesc = 0x2d52f00}, ss_currentRelation = 0x0, 
    ss_currentScanDesc = 0x0, ss_ScanTupleSlot = 0x2d53458}, aggs = 0x0, numaggs = 0, numtrans = 0, 
  aggstrategy = AGG_HASHED, aggsplit = AGGSPLIT_SIMPLE, phase = 0x0, numphases = 1, current_phase = 0, peragg = 0x0, 
  pertrans = 0x0, hashcontext = 0x2d52a30, aggcontexts = 0x2d52858, tmpcontext = 0x2d52878, curaggcontext = 0x0, 
  curperagg = 0x0, curpertrans = 0x0, input_done = false, agg_done = false, projected_set = -1, current_set = 0, 
  grouped_cols = 0x0, all_grouped_cols = 0x0, maxsets = 1, phases = 0x0, sort_in = 0x0, sort_out = 0x0, sort_slot = 0x0, 
  pergroups = 0x0, grp_firstTuple = 0x0, table_filled = false, num_hashes = 0, perhash = 0x0, hash_pergroup = 0x0, 
  all_pergroups = 0x0, combinedproj = 0x0}
(gdb) 
(gdb) p *scanDesc
$9 = {natts = 7, tdtypeid = 2249, tdtypmod = -1, tdhasoid = false, tdrefcount = -1, constr = 0x0, attrs = 0x2d52f20}
(gdb) p *aggstate->ss.ps.scandesc
$10 = {natts = 7, tdtypeid = 2249, tdtypmod = -1, tdhasoid = false, tdrefcount = -1, constr = 0x0, attrs = 0x2d52f20}
(gdb) 
(gdb) n
2227        ExecInitResultTupleSlotTL(estate, &aggstate->ss.ps);
(gdb) 
2228        ExecAssignProjectionInfo(&aggstate->ss.ps, NULL);
(gdb) 
2244            ExecInitQual(node->plan.qual, (PlanState *) aggstate);
(gdb) p *aggstate
$11 = {ss = {ps = {type = T_AggState, plan = 0x2d903a0, state = 0x2d52428, ExecProcNode = 0x6ee438 <ExecAgg>, 
      ExecProcNodeReal = 0x0, instrument = 0x0, worker_instrument = 0x0, worker_jit_instrument = 0x0, qual = 0x0, 
      lefttree = 0x2d52bb0, righttree = 0x0, initPlan = 0x0, subPlan = 0x0, chgParam = 0x0, ps_ResultTupleSlot = 0x2d537b0, 
      ps_ExprContext = 0x2d52af0, ps_ProjInfo = 0x2d538f0, scandesc = 0x2d52f00}, ss_currentRelation = 0x0, 
    ss_currentScanDesc = 0x0, ss_ScanTupleSlot = 0x2d53458}, aggs = 0x2d53e00, numaggs = 3, numtrans = 0, 
  aggstrategy = AGG_HASHED, aggsplit = AGGSPLIT_SIMPLE, phase = 0x0, numphases = 1, current_phase = 0, peragg = 0x0, 
  pertrans = 0x0, hashcontext = 0x2d52a30, aggcontexts = 0x2d52858, tmpcontext = 0x2d52878, curaggcontext = 0x0, 
  curperagg = 0x0, curpertrans = 0x0, input_done = false, agg_done = false, projected_set = -1, current_set = 0, 
  grouped_cols = 0x0, all_grouped_cols = 0x0, maxsets = 1, phases = 0x0, sort_in = 0x0, sort_out = 0x0, sort_slot = 0x0, 
  pergroups = 0x0, grp_firstTuple = 0x0, table_filled = false, num_hashes = 0, perhash = 0x0, hash_pergroup = 0x0, 
  all_pergroups = 0x0, combinedproj = 0x0}
(gdb) p *aggstate->ss.ps.scandesc
$12 = {natts = 7, tdtypeid = 2249, tdtypmod = -1, tdhasoid = false, tdrefcount = -1, constr = 0x0, attrs = 0x2d52f20}
#### 結果元組Slot
(gdb) p *aggstate->ss.ps.ps_ResultTupleSlot
$13 = {type = T_TupleTableSlot, tts_isempty = true, tts_shouldFree = false, tts_shouldFreeMin = false, tts_slow = false, 
  tts_tuple = 0x0, tts_tupleDescriptor = 0x2d53598, tts_mcxt = 0x2d52310, tts_buffer = 0, tts_nvalid = 0, 
  tts_values = 0x2d53810, tts_isnull = 0x2d53830, tts_mintuple = 0x0, tts_minhdr = {t_len = 0, t_self = {ip_blkid = {
        bi_hi = 0, bi_lo = 0}, ip_posid = 0}, t_tableOid = 0, t_data = 0x0}, tts_off = 0, tts_fixedTupleDescriptor = true}
### 投影信息
(gdb) p *aggstate->ss.ps.ps_ProjInfo
$14 = {type = T_ProjectionInfo, pi_state = {tag = {type = T_ExprState}, flags = 6 '\006', resnull = false, resvalue = 0, 
    resultslot = 0x2d537b0, steps = 0x2d53988, evalfunc = 0x6cd882 <ExecInterpExprStillValid>, expr = 0x2d631f8, 
    evalfunc_private = 0x6cb43e <ExecInterpExpr>, steps_len = 9, steps_alloc = 16, parent = 0x2d52640, ext_params = 0x0, 
    innermost_caseval = 0x0, innermost_casenull = 0x0, innermost_domainval = 0x0, innermost_domainnull = 0x0}, 
  pi_exprContext = 0x2d52af0}
(gdb)

7.初始化子表達式


(gdb) n
2243        aggstate->ss.ps.qual =
(gdb) 
2249        numaggs = aggstate->numaggs;
(gdb) p *aggstate->ss.ps.qual
Cannot access memory at address 0x0
(gdb) 
(gdb) n
2250        Assert(numaggs == list_length(aggstate->aggs));
(gdb) p aggstate->numaggs
$16 = 3

表達式為NULL,一共有3個聚合函數

8.為AggStatePerPhaseData/AggStatePerHashData等結構體分配內存


(gdb) n
2256        aggstate->phases = palloc0(numPhases * sizeof(AggStatePerPhaseData));
(gdb) 
2258        aggstate->num_hashes = numHashes;
(gdb) 
2259        if (numHashes)
(gdb) 
2261            aggstate->perhash = palloc0(sizeof(AggStatePerHashData) * numHashes);
(gdb) 
2262            aggstate->phases[0].numsets = 0;
(gdb) 
2263            aggstate->phases[0].gset_lengths = palloc(numHashes * sizeof(int));
(gdb) n
2264            aggstate->phases[0].grouped_cols = palloc(numHashes * sizeof(Bitmapset *));
(gdb) 
2267        phase = 0;
(gdb) 
(gdb) p aggstate->phases[0]
$17 = {aggstrategy = AGG_PLAIN, numsets = 0, gset_lengths = 0x2d5

9.循環遍歷各個階段
9.1計算分組列,存儲在phasedata->grouped_cols數組和all_grouped_cols中
9.2初始化AggState->phases數組(數組元素對應的結構體為AggStatePerPhase)
9.3初始化AggState->perhash數組(對應的結構體為AggStatePerHash)


(gdb) n
2268        for (phaseidx = 0; phaseidx <= list_length(node->chain); ++phaseidx)
(gdb) p list_length(node->chain)
$18 = 0
(gdb) n
2273            if (phaseidx > 0)
(gdb) 
2280                aggnode = node;
(gdb) p *node
$19 = {plan = {type = T_Agg, startup_cost = 13677, total_cost = 13677.0625, plan_rows = 5, plan_width = 45, 
    parallel_aware = false, parallel_safe = false, plan_node_id = 0, targetlist = 0x2d631f8, qual = 0x0, 
    lefttree = 0x2d62cb8, righttree = 0x0, initPlan = 0x0, extParam = 0x0, allParam = 0x0}, aggstrategy = AGG_HASHED, 
  aggsplit = AGGSPLIT_SIMPLE, numCols = 1, grpColIdx = 0x2d62fa8, grpOperators = 0x2d62f88, numGroups = 5, aggParams = 0x0, 
  groupingSets = 0x0, chain = 0x0}
(gdb) n
2281                sortnode = NULL;
(gdb) 
2284            Assert(phase <= 1 || sortnode);
(gdb) 
2286            if (aggnode->aggstrategy == AGG_HASHED
(gdb) 
2289                AggStatePerPhase phasedata = &aggstate->phases[0];
(gdb) 
2291                Bitmapset  *cols = NULL;
(gdb) 
2293                Assert(phase == 0);
(gdb) 
2294                i = phasedata->numsets++;
(gdb) 
2295                perhash = &aggstate->perhash[i];
(gdb) 
2298                phasedata->aggnode = node;
(gdb) p *phasedata
$20 = {aggstrategy = AGG_PLAIN, numsets = 1, gset_lengths = 0x2d53fe8, grouped_cols = 0x2d54008, eqfunctions = 0x0, 
  aggnode = 0x0, sortnode = 0x0, evaltrans = 0x0}
(gdb) p i
$21 = 0
(gdb) n
2299                phasedata->aggstrategy = node->aggstrategy;
(gdb) 
2302                perhash->aggnode = aggnode;
(gdb) 
2304                phasedata->gset_lengths[i] = perhash->numCols = aggnode->numCols;
(gdb) 
2306                for (j = 0; j < aggnode->numCols; ++j)
(gdb) p aggnode->numCols
$22 = 1
(gdb) n
2307                    cols = bms_add_member(cols, aggnode->grpColIdx[j]);
(gdb) 
2306                for (j = 0; j < aggnode->numCols; ++j)
(gdb) 
2309                phasedata->grouped_cols[i] = cols;
(gdb) p cols
$23 = (Bitmapset *) 0x2d54028
(gdb) p *cols
$24 = {nwords = 1, words = 0x2d5402c}
(gdb) p *cols->words
$25 = 2
(gdb) n
2311                all_grouped_cols = bms_add_members(all_grouped_cols, cols);
(gdb) 
2312                continue;
(gdb) p all_grouped_cols
$26 = (Bitmapset *) 0x2d54048
(gdb) p *all_grouped_cols
$27 = {nwords = 1, words = 0x2d5404c}
(gdb) p *all_grouped_cols->words
$28 = 2
(gdb) n
2268        for (phaseidx = 0; phaseidx <= list_length(node->chain); ++phaseidx)
(gdb) 
2406        i = -1;

10.轉換all_grouped_cols為倒序鏈表


2407        while ((i = bms_next_member(all_grouped_cols, i)) >= 0)
(gdb) p *all_grouped_cols
$29 = {nwords = 1, words = 0x2d5404c}
(gdb) n
2408            aggstate->all_grouped_cols = lcons_int(i, aggstate->all_grouped_cols);
(gdb) 
2407        while ((i = bms_next_member(all_grouped_cols, i)) >= 0)
(gdb)

11.在輸出expr上下文中設置aggregate-result存儲,同時分配私有per-agg工作存儲


(gdb) 
2414        econtext = aggstate->ss.ps.ps_ExprContext;
(gdb) 
2415        econtext->ecxt_aggvalues = (Datum *) palloc0(sizeof(Datum) * numaggs);
(gdb) 
2416        econtext->ecxt_aggnulls = (bool *) palloc0(sizeof(bool) * numaggs);
(gdb) 
2418        peraggs = (AggStatePerAgg) palloc0(sizeof(AggStatePerAggData) * numaggs);
(gdb) 
2419        pertransstates = (AggStatePerTrans) palloc0(sizeof(AggStatePerTransData) * numaggs);
(gdb) 
2421        aggstate->peragg = peraggs;
(gdb) 
2422        aggstate->pertrans = pertransstates;
(gdb) 
2427                                         * (numGroupingSets + numHashes));
(gdb) 
2426            (AggStatePerGroup *) palloc0(sizeof(AggStatePerGroup)
(gdb) 
2425        aggstate->all_pergroups =
(gdb) 
2428        pergroups = aggstate->all_pergroups;
(gdb) 
2430        if (node->aggstrategy != AGG_HASHED)
(gdb)

12.如使用Hash算法,則調用find_hash_columns和build_hash_table方法初始化相關數據


(gdb) 
2445        if (use_hashing)
(gdb) 
2448            aggstate->hash_pergroup = pergroups;
(gdb) 
2450            find_hash_columns(aggstate);
(gdb) 
2451            build_hash_table(aggstate);
(gdb) 
2452            aggstate->table_filled = false;
(gdb) 
2461        if (node->aggstrategy == AGG_HASHED)
(gdb) p *aggstate
$30 = {ss = {ps = {type = T_AggState, plan = 0x2d903a0, state = 0x2d52428, ExecProcNode = 0x6ee438 <ExecAgg>, 
      ExecProcNodeReal = 0x0, instrument = 0x0, worker_instrument = 0x0, worker_jit_instrument = 0x0, qual = 0x0, 
      lefttree = 0x2d52bb0, righttree = 0x0, initPlan = 0x0, subPlan = 0x0, chgParam = 0x0, ps_ResultTupleSlot = 0x2d537b0, 
      ps_ExprContext = 0x2d52af0, ps_ProjInfo = 0x2d538f0, scandesc = 0x2d52f00}, ss_currentRelation = 0x0, 
    ss_currentScanDesc = 0x0, ss_ScanTupleSlot = 0x2d53458}, aggs = 0x2d53e00, numaggs = 3, numtrans = 0, 
  aggstrategy = AGG_HASHED, aggsplit = AGGSPLIT_SIMPLE, phase = 0x0, numphases = 1, current_phase = 0, peragg = 0x2d54770, 
  pertrans = 0x2d56780, hashcontext = 0x2d52a30, aggcontexts = 0x2d52858, tmpcontext = 0x2d52878, curaggcontext = 0x0, 
  curperagg = 0x0, curpertrans = 0x0, input_done = false, agg_done = false, projected_set = -1, current_set = 0, 
  grouped_cols = 0x0, all_grouped_cols = 0x2d54090, maxsets = 1, phases = 0x2d53ef8, sort_in = 0x0, sort_out = 0x0, 
  sort_slot = 0x0, pergroups = 0x0, grp_firstTuple = 0x0, table_filled = false, num_hashes = 1, perhash = 0x2d53f50, 
  hash_pergroup = 0x2d54988, all_pergroups = 0x2d54988, combinedproj = 0x0}
(gdb) p *aggstate->perhash
$31 = {hashtable = 0x2d54ad8, hashiter = {cur = 0, end = 0, done = false}, hashslot = 0x2d54238, hashfunctions = 0x2d542d0, 
  eqfuncoids = 0x2d54a90, numCols = 1, numhashGrpCols = 1, largestGrpColIdx = 1, hashGrpColIdxInput = 0x2d549f0, 
  hashGrpColIdxHash = 0x2d54a10, aggnode = 0x2d903a0}
(gdb) p *aggstate->hash_pergroup
$32 = (AggStatePerGroup) 0x0
(gdb) p *aggstate->all_pergroups
$33 = (AggStatePerGroup) 0x0
(gdb) p *aggstate->phases
$34 = {aggstrategy = AGG_HASHED, numsets = 1, gset_lengths = 0x2d53fe8, grouped_cols = 0x2d54008, eqfunctions = 0x0, 
  aggnode = 0x2d903a0, sortnode = 0x0, evaltrans = 0x0}
(gdb)

13.調用initialize_phase/select_current_set初始化階段數據


(gdb) n
2463            aggstate->current_phase = 0;
(gdb) 
2464            initialize_phase(aggstate, 0);
(gdb) 
2465            select_current_set(aggstate, 0, true);
(gdb) 
2510        aggno = -1;
(gdb) p *aggstate->phases
$35 = {aggstrategy = AGG_HASHED, numsets = 1, gset_lengths = 0x2d53fe8, grouped_cols = 0x2d54008, eqfunctions = 0x0, 
  aggnode = 0x2d903a0, sortnode = 0x0, evaltrans = 0x0}
(gdb)

14.檢索聚合函數信息,初始化per-agg和per-trans數據不可變字段


(gdb) n
2463            aggstate->current_phase = 0;
(gdb) 
2464            initialize_phase(aggstate, 0);
(gdb) 
2465            select_current_set(aggstate, 0, true);
(gdb) 
2510        aggno = -1;
(gdb) p *aggstate->phases
$35 = {aggstrategy = AGG_HASHED, numsets = 1, gset_lengths = 0x2d53fe8, grouped_cols = 0x2d54008, eqfunctions = 0x0, 
  aggnode = 0x2d903a0, sortnode = 0x0, evaltrans = 0x0}
(gdb) n
2511        transno = -1;
(gdb) 
2512        foreach(l, aggstate->aggs)
(gdb) 
2514            AggrefExprState *aggrefstate = (AggrefExprState *) lfirst(l);
(gdb) 
2515            Aggref       *aggref = aggrefstate->aggref;
(gdb) 
2539            Assert(aggref->agglevelsup == 0);
(gdb) p aggstate->aggs
$36 = (List *) 0x2d53e00
(gdb) p *aggstate->aggs
$37 = {type = T_List, length = 3, head = 0x2d53ed0, tail = 0x2d53dd8}
(gdb) n
2541            Assert(aggref->aggsplit == aggstate->aggsplit);
(gdb) 
2544            existing_aggno = find_compatible_peragg(aggref, aggstate, aggno,
(gdb) 
2546            if (existing_aggno != -1)
(gdb) 
2557            peragg = &peraggs[++aggno];
(gdb) 
2558            peragg->aggref = aggref;
(gdb) 
2559            aggrefstate->aggno = aggno;
(gdb) 
2563                                       ObjectIdGetDatum(aggref->aggfnoid));
(gdb) 
2562            aggTuple = SearchSysCache1(AGGFNOID,
(gdb) p aggref->aggfnoid
$38 = 2116
(gdb) n
2564            if (!HeapTupleIsValid(aggTuple))
(gdb) p *aggTuple
$39 = {t_len = 96, t_self = {ip_blkid = {bi_hi = 0, bi_lo = 0}, ip_posid = 17}, t_tableOid = 2600, t_data = 0x7fa0c01f1630}
(gdb) p *aggTuple->t_data
$40 = {t_choice = {t_heap = {t_xmin = 1, t_xmax = 0, t_field3 = {t_cid = 0, t_xvac = 0}}, t_datum = {datum_len_ = 1, 
      datum_typmod = 0, datum_typeid = 0}}, t_ctid = {ip_blkid = {bi_hi = 0, bi_lo = 0}, ip_posid = 17}, t_infomask2 = 22, 
  t_infomask = 2305, t_hoff = 32 ' ', t_bits = 0x7fa0c01f1647 "\377\377\017"}
(gdb) n
2567            aggform = (Form_pg_aggregate) GETSTRUCT(aggTuple);
(gdb) 
2570            aclresult = pg_proc_aclcheck(aggref->aggfnoid, GetUserId(),
(gdb) p *aggform
$41 = {aggfnoid = 2116, aggkind = 110 'n', aggnumdirectargs = 0, aggtransfn = 768, aggfinalfn = 0, aggcombinefn = 768, 
  aggserialfn = 0, aggdeserialfn = 0, aggmtransfn = 0, aggminvtransfn = 0, aggmfinalfn = 0, aggfinalextra = false, 
  aggmfinalextra = false, aggfinalmodify = 114 'r', aggmfinalmodify = 114 'r', aggsortop = 521, aggtranstype = 23, 
  aggtransspace = 0, aggmtranstype = 0, aggmtransspace = 0}
(gdb) n
2572            if (aclresult != ACLCHECK_OK)
(gdb) 
2575            InvokeFunctionExecuteHook(aggref->aggfnoid);
(gdb) 
2578            aggtranstype = aggref->aggtranstype;
(gdb) 
2579            Assert(OidIsValid(aggtranstype));
(gdb) 
2585            if (DO_AGGSPLIT_COMBINE(aggstate->aggsplit))
(gdb) 
2594                transfn_oid = aggform->aggtransfn;
(gdb) 
2597            if (DO_AGGSPLIT_SKIPFINAL(aggstate->aggsplit))
(gdb) p transfn_oid
$42 = 768
(gdb) n
2600                peragg->finalfn_oid = finalfn_oid = aggform->aggfinalfn;
(gdb) 
2607            shareable = (aggform->aggfinalmodify != AGGMODIFY_READ_WRITE) ||
(gdb) p aggform->aggfinalfn
$43 = 0
(gdb) n
2609            peragg->shareable = shareable;
(gdb) 
2611            serialfn_oid = InvalidOid;
(gdb) p shareable
$44 = true
(gdb) n
2612            deserialfn_oid = InvalidOid;
(gdb) 
2618            if (aggtranstype == INTERNALOID)
(gdb) 
2653                                            ObjectIdGetDatum(aggref->aggfnoid));
(gdb) 
2652                procTuple = SearchSysCache1(PROCOID,
(gdb) 
2654                if (!HeapTupleIsValid(procTuple))
(gdb) 
2657                aggOwner = ((Form_pg_proc) GETSTRUCT(procTuple))->proowner;
(gdb) 
2658                ReleaseSysCache(procTuple);
(gdb) 
2660                aclresult = pg_proc_aclcheck(transfn_oid, aggOwner,
(gdb) 
2662                if (aclresult != ACLCHECK_OK)
(gdb) 
2665                InvokeFunctionExecuteHook(transfn_oid);
(gdb) 
2666                if (OidIsValid(finalfn_oid))
(gdb) 
2675                if (OidIsValid(serialfn_oid))
(gdb) 
2684                if (OidIsValid(deserialfn_oid))
(gdb) 
2700            numArguments = get_aggregate_argtypes(aggref, inputTypes);
(gdb) 
2703            numDirectArgs = list_length(aggref->aggdirectargs);
(gdb) 
2706            if (aggform->aggfinalextra)
(gdb) 
2709                peragg->numFinalArgs = numDirectArgs + 1;
(gdb) 
2712            peragg->aggdirectargs = ExecInitExprList(aggref->aggdirectargs,
(gdb) 
2719            if (OidIsValid(finalfn_oid))
(gdb) 
2733            get_typlenbyval(aggref->aggtype,
(gdb) 
2741            textInitVal = SysCacheGetAttr(AGGFNOID, aggTuple,
(gdb) 
2744            if (initValueIsNull)
(gdb) 
2745                initValue = (Datum) 0;
(gdb) 
2757            existing_transno = find_compatible_pertrans(aggstate, aggref,
(gdb) 
2763            if (existing_transno != -1)
(gdb) 
2775                pertrans = &pertransstates[++transno];
(gdb) 
2776                build_pertrans_for_aggref(pertrans, aggstate, estate,
(gdb) 
2781                peragg->transno = transno;
(gdb) 
2783            ReleaseSysCache(aggTuple);
(gdb) 
2512        foreach(l, aggstate->aggs)
(gdb) 
########
testdb=# select oid,proname from pg_proc where oid in (2116,768);
 oid  |  proname   
------+------------
  768 | int4larger
 2116 | max
(2 rows)
########

下一個循環


...
(gdb) p *aggref
$45 = {xpr = {type = T_Aggref}, aggfnoid = 2132, aggtype = 23, aggcollid = 0, inputcollid = 0, aggtranstype = 23, 
  aggargtypes = 0x2d63578, aggdirectargs = 0x0, args = 0x2d63688, aggorder = 0x0, aggdistinct = 0x0, aggfilter = 0x0, 
  aggstar = false, aggvariadic = false, aggkind = 110 'n', agglevelsup = 0, aggsplit = AGGSPLIT_SIMPLE, location = 18}
...
(gdb) p transfn_oid
$49 = 769
...
testdb=# select oid,proname from pg_proc where oid in (2132,769);
 oid  |   proname   
------+-------------
  769 | int4smaller
 2132 | min
(2 rows)

第3遍循環


...
(gdb) p *aggref
$50 = {xpr = {type = T_Aggref}, aggfnoid = 2101, aggtype = 1700, aggcollid = 0, inputcollid = 0, aggtranstype = 1016, 
  aggargtypes = 0x2d632f0, aggdirectargs = 0x0, args = 0x2d63400, aggorder = 0x0, aggdistinct = 0x0, aggfilter = 0x0, 
  aggstar = false, aggvariadic = false, aggkind = 110 'n', agglevelsup = 0, aggsplit = AGGSPLIT_SIMPLE, location = 10}
...
(gdb) p transfn_oid
$51 = 1963
...
2512        foreach(l, aggstate->aggs)
(gdb) 
#####
testdb=# select oid,proname from pg_proc where oid in (2101,1963);
 oid  |    proname     
------+----------------
 1963 | int4_avg_accum
 2101 | avg
(2 rows)
#####

15.構建一次就完成所有轉換工作的表達式.


(gdb) 
2790        aggstate->numaggs = aggno + 1;
(gdb) 
2791        aggstate->numtrans = transno + 1;
(gdb) 
2803        if (numaggs != list_length(aggstate->aggs))
(gdb) 
2815        for (phaseidx = 0; phaseidx < aggstate->numphases; phaseidx++)
(gdb) 
2817            AggStatePerPhase phase = &aggstate->phases[phaseidx];
(gdb) 
2818            bool        dohash = false;
(gdb) 
2819            bool        dosort = false;
(gdb) 
2822            if (!phase->aggnode)
(gdb) 
2825            if (aggstate->aggstrategy == AGG_MIXED && phaseidx == 1)
(gdb) 
2834            else if (aggstate->aggstrategy == AGG_MIXED && phaseidx == 0)
(gdb) 
2843            else if (phase->aggstrategy == AGG_PLAIN ||
(gdb) 
2844                     phase->aggstrategy == AGG_SORTED)
(gdb) 
2843            else if (phase->aggstrategy == AGG_PLAIN ||
(gdb) 
2849            else if (phase->aggstrategy == AGG_HASHED)
(gdb) 
2851                dohash = true;
(gdb) 
2852                dosort = false;
(gdb) 
2857            phase->evaltrans = ExecBuildAggTrans(aggstate, phase, dosort, dohash);
(gdb) 
2815        for (phaseidx = 0; phaseidx < aggstate->numphases; phaseidx++)
(gdb) 
2861        return aggstate;
(gdb)

最終結果
AggState結構體


(gdb) p *aggstate
$52 = {ss = {ps = {type = T_AggState, plan = 0x2d903a0, state = 0x2d52428, ExecProcNode = 0x6ee438 <ExecAgg>, 
      ExecProcNodeReal = 0x0, instrument = 0x0, worker_instrument = 0x0, worker_jit_instrument = 0x0, qual = 0x0, 
      lefttree = 0x2d52bb0, righttree = 0x0, initPlan = 0x0, subPlan = 0x0, chgParam = 0x0, ps_ResultTupleSlot = 0x2d537b0, 
      ps_ExprContext = 0x2d52af0, ps_ProjInfo = 0x2d538f0, scandesc = 0x2d52f00}, ss_currentRelation = 0x0, 
    ss_currentScanDesc = 0x0, ss_ScanTupleSlot = 0x2d53458}, aggs = 0x2d53e00, numaggs = 3, numtrans = 3, 
  aggstrategy = AGG_HASHED, aggsplit = AGGSPLIT_SIMPLE, phase = 0x2d53ef8, numphases = 1, current_phase = 0, 
  peragg = 0x2d54770, pertrans = 0x2d56780, hashcontext = 0x2d52a30, aggcontexts = 0x2d52858, tmpcontext = 0x2d52878, 
  curaggcontext = 0x2d52a30, curperagg = 0x0, curpertrans = 0x0, input_done = false, agg_done = false, projected_set = -1, 
  current_set = 0, grouped_cols = 0x0, all_grouped_cols = 0x2d54090, maxsets = 1, phases = 0x2d53ef8, sort_in = 0x0, 
  sort_out = 0x0, sort_slot = 0x0, pergroups = 0x0, grp_firstTuple = 0x0, table_filled = false, num_hashes = 1, 
  perhash = 0x2d53f50, hash_pergroup = 0x2d54988, all_pergroups = 0x2d54988, combinedproj = 0x0}

AggState->phase


(gdb) p *aggstate->phase
$53 = {aggstrategy = AGG_HASHED, numsets = 1, gset_lengths = 0x2d53fe8, grouped_cols = 0x2d54008, eqfunctions = 0x0, 
  aggnode = 0x2d903a0, sortnode = 0x0, evaltrans = 0x2d55e78}

AggState->peragg


(gdb) p *aggstate->peragg
$54 = {aggref = 0x2d63740, transno = 0, finalfn_oid = 0, finalfn = {fn_addr = 0x0, fn_oid = 0, fn_nargs = 0, 
    fn_strict = false, fn_retset = false, fn_stats = 0 '\000', fn_extra = 0x0, fn_mcxt = 0x0, fn_expr = 0x0}, 
  numFinalArgs = 1, aggdirectargs = 0x0, resulttypeLen = 4, resulttypeByVal = true, shareable = true}
(gdb) p *aggstate->peragg->aggref
$55 = {xpr = {type = T_Aggref}, aggfnoid = 2116, aggtype = 23, aggcollid = 0, inputcollid = 0, aggtranstype = 23, 
  aggargtypes = 0x2d63800, aggdirectargs = 0x0, args = 0x2d63910, aggorder = 0x0, aggdistinct = 0x0, aggfilter = 0x0, 
  aggstar = false, aggvariadic = false, aggkind = 110 'n', agglevelsup = 0, aggsplit = AGGSPLIT_SIMPLE, location = 26}
(gdb) p aggstate->peragg[1]
$56 = {aggref = 0x2d634b8, transno = 1, finalfn_oid = 0, finalfn = {fn_addr = 0x0, fn_oid = 0, fn_nargs = 0, 
    fn_strict = false, fn_retset = false, fn_stats = 0 '\000', fn_extra = 0x0, fn_mcxt = 0x0, fn_expr = 0x0}, 
  numFinalArgs = 1, aggdirectargs = 0x0, resulttypeLen = 4, resulttypeByVal = true, shareable = true}
(gdb) p *aggstate->peragg[1]->aggref
$57 = {xpr = {type = T_Aggref}, aggfnoid = 2132, aggtype = 23, aggcollid = 0, inputcollid = 0, aggtranstype = 23, 
  aggargtypes = 0x2d63578, aggdirectargs = 0x0, args = 0x2d63688, aggorder = 0x0, aggdistinct = 0x0, aggfilter = 0x0, 
  aggstar = false, aggvariadic = false, aggkind = 110 'n', agglevelsup = 0, aggsplit = AGGSPLIT_SIMPLE, location = 18}
(gdb) p aggstate->peragg[2]
$58 = {aggref = 0x2d63230, transno = 2, finalfn_oid = 1964, finalfn = {fn_addr = 0x978251 <int8_avg>, fn_oid = 1964, 
    fn_nargs = 1, fn_strict = true, fn_retset = false, fn_stats = 2 '\002', fn_extra = 0x0, fn_mcxt = 0x2d52310, 
    fn_expr = 0x2d55b80}, numFinalArgs = 1, aggdirectargs = 0x0, resulttypeLen = -1, resulttypeByVal = false, 
  shareable = true}
(gdb) p *aggstate->peragg[2]->aggref
$59 = {xpr = {type = T_Aggref}, aggfnoid = 2101, aggtype = 1700, aggcollid = 0, inputcollid = 0, aggtranstype = 1016, 
  aggargtypes = 0x2d632f0, aggdirectargs = 0x0, args = 0x2d63400, aggorder = 0x0, aggdistinct = 0x0, aggfilter = 0x0, 
  aggstar = false, aggvariadic = false, aggkind = 110 'n', agglevelsup = 0, aggsplit = AGGSPLIT_SIMPLE, location = 10}

AggState->pertrans


(gdb) p aggstate->pertrans[0]
$60 = {aggref = 0x2d63740, aggshared = false, numInputs = 1, numTransInputs = 1, transfn_oid = 768, serialfn_oid = 0, 
  deserialfn_oid = 0, aggtranstype = 23, transfn = {fn_addr = 0x93e877 <int4larger>, fn_oid = 768, fn_nargs = 2, 
    fn_strict = true, fn_retset = false, fn_stats = 2 '\002', fn_extra = 0x0, fn_mcxt = 0x2d52310, fn_expr = 0x2d55940}, 
  serialfn = {fn_addr = 0x0, fn_oid = 0, fn_nargs = 0, fn_strict = false, fn_retset = false, fn_stats = 0 '\000', 
    fn_extra = 0x0, fn_mcxt = 0x0, fn_expr = 0x0}, deserialfn = {fn_addr = 0x0, fn_oid = 0, fn_nargs = 0, 
    fn_strict = false, fn_retset = false, fn_stats = 0 '\000', fn_extra = 0x0, fn_mcxt = 0x0, fn_expr = 0x0}, 
  aggCollation = 0, numSortCols = 0, numDistinctCols = 0, sortColIdx = 0x0, sortOperators = 0x0, sortCollations = 0x0, 
  sortNullsFirst = 0x0, equalfnOne = {fn_addr = 0x0, fn_oid = 0, fn_nargs = 0, fn_strict = false, fn_retset = false, 
    fn_stats = 0 '\000', fn_extra = 0x0, fn_mcxt = 0x0, fn_expr = 0x0}, equalfnMulti = 0x0, initValue = 0, 
  initValueIsNull = true, inputtypeLen = 0, transtypeLen = 4, inputtypeByVal = false, transtypeByVal = true, 
  sortslot = 0x0, uniqslot = 0x0, sortdesc = 0x0, sortstates = 0x2d549b0, transfn_fcinfo = {flinfo = 0x2d567a8, 
    context = 0x2d52640, resultinfo = 0x0, fncollation = 0, isnull = false, nargs = 2, arg = {0 <repeats 100 times>}, 
    argnull = {false <repeats 100 times>}}, serialfn_fcinfo = {flinfo = 0x0, context = 0x0, resultinfo = 0x0, 
    fncollation = 0, isnull = false, nargs = 0, arg = {0 <repeats 100 times>}, argnull = {false <repeats 100 times>}}, 
  deserialfn_fcinfo = {flinfo = 0x0, context = 0x0, resultinfo = 0x0, fncollation = 0, isnull = false, nargs = 0, arg = {
      0 <repeats 100 times>}, argnull = {false <repeats 100 times>}}}
(gdb) p aggstate->pertrans[1]
$61 = {aggref = 0x2d634b8, aggshared = false, numInputs = 1, numTransInputs = 1, transfn_oid = 769, serialfn_oid = 0, 
  deserialfn_oid = 0, aggtranstype = 23, transfn = {fn_addr = 0x93e8a3 <int4smaller>, fn_oid = 769, fn_nargs = 2, 
    fn_strict = true, fn_retset = false, fn_stats = 2 '\002', fn_extra = 0x0, fn_mcxt = 0x2d52310, fn_expr = 0x2d55a90}, 
  serialfn = {fn_addr = 0x0, fn_oid = 0, fn_nargs = 0, fn_strict = false, fn_retset = false, fn_stats = 0 '\000', 
    fn_extra = 0x0, fn_mcxt = 0x0, fn_expr = 0x0}, deserialfn = {fn_addr = 0x0, fn_oid = 0, fn_nargs = 0, 
    fn_strict = false, fn_retset = false, fn_stats = 0 '\000', fn_extra = 0x0, fn_mcxt = 0x0, fn_expr = 0x0}, 
  aggCollation = 0, numSortCols = 0, numDistinctCols = 0, sortColIdx = 0x0, sortOperators = 0x0, sortCollations = 0x0, 
  sortNullsFirst = 0x0, equalfnOne = {fn_addr = 0x0, fn_oid = 0, fn_nargs = 0, fn_strict = false, fn_retset = false, 
    fn_stats = 0 '\000', fn_extra = 0x0, fn_mcxt = 0x0, fn_expr = 0x0}, equalfnMulti = 0x0, initValue = 0, 
  initValueIsNull = true, inputtypeLen = 0, transtypeLen = 4, inputtypeByVal = false, transtypeByVal = true, 
  sortslot = 0x0, uniqslot = 0x0, sortdesc = 0x0, sortstates = 0x2d549d0, transfn_fcinfo = {flinfo = 0x2d573f0, 
    context = 0x2d52640, resultinfo = 0x0, fncollation = 0, isnull = false, nargs = 2, arg = {0 <repeats 100 times>}, 
    argnull = {false <repeats 100 times>}}, serialfn_fcinfo = {flinfo = 0x0, context = 0x0, resultinfo = 0x0, 
    fncollation = 0, isnull = false, nargs = 0, arg = {0 <repeats 100 times>}, argnull = {false <repeats 100 times>}}, 
  deserialfn_fcinfo = {flinfo = 0x0, context = 0x0, resultinfo = 0x0, fncollation = 0, isnull = false, nargs = 0, arg = {
      0 <repeats 100 times>}, argnull = {false <repeats 100 times>}}}
(gdb) p aggstate->pertrans[2]
$62 = {aggref = 0x2d63230, aggshared = false, numInputs = 1, numTransInputs = 1, transfn_oid = 1963, serialfn_oid = 0, 
  deserialfn_oid = 0, aggtranstype = 1016, transfn = {fn_addr = 0x977d8f <int4_avg_accum>, fn_oid = 1963, fn_nargs = 2, 
    fn_strict = true, fn_retset = false, fn_stats = 2 '\002', fn_extra = 0x0, fn_mcxt = 0x2d52310, fn_expr = 0x2d55e20}, 
  serialfn = {fn_addr = 0x0, fn_oid = 0, fn_nargs = 0, fn_strict = false, fn_retset = false, fn_stats = 0 '\000', 
    fn_extra = 0x0, fn_mcxt = 0x0, fn_expr = 0x0}, deserialfn = {fn_addr = 0x0, fn_oid = 0, fn_nargs = 0, 
    fn_strict = false, fn_retset = false, fn_stats = 0 '\000', fn_extra = 0x0, fn_mcxt = 0x0, fn_expr = 0x0}, 
  aggCollation = 0, numSortCols = 0, numDistinctCols = 0, sortColIdx = 0x0, sortOperators = 0x0, sortCollations = 0x0, 
  sortNullsFirst = 0x0, equalfnOne = {fn_addr = 0x0, fn_oid = 0, fn_nargs = 0, fn_strict = false, fn_retset = false, 
    fn_stats = 0 '\000', fn_extra = 0x0, fn_mcxt = 0x0, fn_expr = 0x0}, equalfnMulti = 0x0, initValue = 47537400, 
  initValueIsNull = false, inputtypeLen = 0, transtypeLen = -1, inputtypeByVal = false, transtypeByVal = false, 
  sortslot = 0x0, uniqslot = 0x0, sortdesc = 0x0, sortstates = 0x2d55bd8, transfn_fcinfo = {flinfo = 0x2d58038, 
    context = 0x2d52640, resultinfo = 0x0, fncollation = 0, isnull = false, nargs = 2, arg = {0 <repeats 100 times>}, 
    argnull = {false <repeats 100 times>}}, serialfn_fcinfo = {flinfo = 0x0, context = 0x0, resultinfo = 0x0, 
    fncollation = 0, isnull = false, nargs = 0, arg = {0 <repeats 100 times>}, argnull = {false <repeats 100 times>}}, 
  deserialfn_fcinfo = {flinfo = 0x0, context = 0x0, resultinfo = 0x0, fncollation = 0, isnull = false, nargs = 0, arg = {
      0 <repeats 100 times>}, argnull = {false <repeats 100 times>}}}

AggState->groups相關


(gdb) p *aggstate->pergroups
Cannot access memory at address 0x0
(gdb) p *aggstate->hash_pergroup
$65 = (AggStatePerGroup) 0x0
(gdb) p *aggstate->all_pergroups
$66 = (AggStatePerGroup) 0x0

AggState->perhash


(gdb) p *aggstate->perhash
$67 = {hashtable = 0x2d54ad8, hashiter = {cur = 0, end = 0, done = false}, hashslot = 0x2d54238, hashfunctions = 0x2d542d0, 
  eqfuncoids = 0x2d54a90, numCols = 1, numhashGrpCols = 1, largestGrpColIdx = 1, hashGrpColIdxInput = 0x2d549f0, 
  hashGrpColIdxHash = 0x2d54a10, aggnode = 0x2d903a0}
(gdb) p *aggstate->perhash->hashtable
$68 = {hashtab = 0x2d54b70, numCols = 1, keyColIdx = 0x2d54a10, tab_hash_funcs = 0x2d542d0, tab_eq_func = 0x2d54e90, 
  tablecxt = 0x2d7c450, tempcxt = 0x2d90a00, entrysize = 24, tableslot = 0x2d54df8, inputslot = 0x0, in_hash_funcs = 0x0, 
  cur_eq_func = 0x0, hash_iv = 0, exprcontext = 0x2d557b0}
(gdb) p *aggstate->perhash->hashfunctions
$69 = {fn_addr = 0x4c8a31 <hashtext>, fn_oid = 400, fn_nargs = 1, fn_strict = true, fn_retset = false, fn_stats = 2 '\002', 
  fn_extra = 0x0, fn_mcxt = 0x2d52310, fn_expr = 0x0}

DONE!

尚有不少細節需要整理

四、參考資料

N/A

向AI問一下細節

免責聲明:本站發布的內容(圖片、視頻和文字)以原創、轉載和分享為主,文章觀點不代表本網站立場,如果涉及侵權請聯系站長郵箱:is@yisu.com進行舉報,并提供相關證據,一經查實,將立刻刪除涉嫌侵權內容。

AI

华池县| 新和县| 正定县| 嵩明县| 宜川县| 门头沟区| 依安县| 德安县| 北碚区| 库尔勒市| 永平县| 六枝特区| 永新县| 成武县| 财经| 宕昌县| 茶陵县| 金沙县| 铁岭市| 文成县| 康乐县| 铜川市| 上饶县| 革吉县| 灌云县| 锦屏县| 忻州市| 永兴县| 留坝县| 定襄县| 贵南县| 万全县| 托克托县| 利辛县| 宜兰市| 两当县| 纳雍县| 遂川县| 炉霍县| 文成县| 敦煌市|