您好,登錄后才能下訂單哦!
這篇文章主要講解了“PostgreSQL中match_unsorted_outer函數分析”,文中的講解內容簡單清晰,易于學習與理解,下面請大家跟著小編的思路慢慢深入,一起來研究和學習“PostgreSQL中match_unsorted_outer函數分析”吧!
Cost相關
注意:實際使用的參數值通過系統配置文件定義,而不是這里的常量定義!
typedef double Cost; /* execution cost (in page-access units) */ /* defaults for costsize.c's Cost parameters */ /* NB: cost-estimation code should use the variables, not these constants! */ /* 注意:實際值通過系統配置文件定義,而不是這里的常量定義! */ /* If you change these, update backend/utils/misc/postgresql.sample.conf */ #define DEFAULT_SEQ_PAGE_COST 1.0 //順序掃描page的成本 #define DEFAULT_RANDOM_PAGE_COST 4.0 //隨機掃描page的成本 #define DEFAULT_CPU_TUPLE_COST 0.01 //處理一個元組的CPU成本 #define DEFAULT_CPU_INDEX_TUPLE_COST 0.005 //處理一個索引元組的CPU成本 #define DEFAULT_CPU_OPERATOR_COST 0.0025 //執行一次操作或函數的CPU成本 #define DEFAULT_PARALLEL_TUPLE_COST 0.1 //并行執行,從一個worker傳輸一個元組到另一個worker的成本 #define DEFAULT_PARALLEL_SETUP_COST 1000.0 //構建并行執行環境的成本 #define DEFAULT_EFFECTIVE_CACHE_SIZE 524288 /*先前已有介紹, measured in pages */ double seq_page_cost = DEFAULT_SEQ_PAGE_COST; double random_page_cost = DEFAULT_RANDOM_PAGE_COST; double cpu_tuple_cost = DEFAULT_CPU_TUPLE_COST; double cpu_index_tuple_cost = DEFAULT_CPU_INDEX_TUPLE_COST; double cpu_operator_cost = DEFAULT_CPU_OPERATOR_COST; double parallel_tuple_cost = DEFAULT_PARALLEL_TUPLE_COST; double parallel_setup_cost = DEFAULT_PARALLEL_SETUP_COST; int effective_cache_size = DEFAULT_EFFECTIVE_CACHE_SIZE; Cost disable_cost = 1.0e10;//1后面10個0,通過設置一個巨大的成本,讓優化器自動放棄此路徑 int max_parallel_workers_per_gather = 2;//每次gather使用的worker數
nested loop join的算法實現偽代碼如下:
FOR row#1 IN (select * from dataset#1) LOOP
FOR row#2 IN (select * from dataset#2 where row#1 is matched) LOOP
output values from row#1 and row#2
END LOOP
END LOOP
match_unsorted_outer函數通過在每個可能的外表訪問路徑上使用迭代替換或合并連接的方式嘗試構造連接訪問路徑。
//------------------------------------------------ match_unsorted_outer /* * match_unsorted_outer * Creates possible join paths for processing a single join relation * 'joinrel' by employing either iterative substitution or * mergejoining on each of its possible outer paths (considering * only outer paths that are already ordered well enough for merging). * 通過在每個可能的外表訪問路徑上使用迭代替換或合并連接(只考慮已經排序得足夠好,用于合并的外表訪問路徑), * 為處理最終的連接關系“joinrel”創建可能的連接路徑。 * * We always generate a nestloop path for each available outer path. * In fact we may generate as many as five: one on the cheapest-total-cost * inner path, one on the same with materialization, one on the * cheapest-startup-cost inner path (if different), one on the * cheapest-total inner-indexscan path (if any), and one on the * cheapest-startup inner-indexscan path (if different). * 我們總是為每個可用的外表訪問路徑生成一個nested loop訪問路徑。 * 實際上,可能會生成多達5個: * 一個是內表訪問成本最低的路徑, * 一個是物化但總成本最低的路徑, * 一個是啟動成本最低的內表訪問路徑(如果不同的話), * 一個是成本最低的內表索引掃描路徑(如果有的話), * 一個是啟動成本最低的內表索引掃描路徑(如果不同的話)。 * * We also consider mergejoins if mergejoin clauses are available. See * detailed comments in generate_mergejoin_paths. * 如果mergejoin條件子句可用,也會考慮merge join。詳細請參閱generate_mergejoin_paths中的注釋。 * * 'joinrel' is the join relation * 'outerrel' is the outer join relation * 'innerrel' is the inner join relation * 'jointype' is the type of join to do * 'extra' contains additional input values */ static void match_unsorted_outer(PlannerInfo *root,//優化器信息 RelOptInfo *joinrel,//連接生成的relation RelOptInfo *outerrel,//外表 RelOptInfo *innerrel,//內表 JoinType jointype,//連接類型 JoinPathExtraData *extra)//額外的信息 { JoinType save_jointype = jointype; bool nestjoinOK; bool useallclauses; Path *inner_cheapest_total = innerrel->cheapest_total_path; Path *matpath = NULL; ListCell *lc1; /* * Nestloop only supports inner, left, semi, and anti joins. Also, if we * are doing a right or full mergejoin, we must use *all* the mergeclauses * as join clauses, else we will not have a valid plan. (Although these * two flags are currently inverses, keep them separate for clarity and * possible future changes.) * Nestloop只支持內連接、左連接、半連接和反連接。 * 此外,如果正在做一個右連接或全合并連接,我們必須使用所有的merge clauses條件作為連接子句, * 否則我們將沒有一個有效的計劃。 * (盡管這兩個標志目前是反向的,但為了清晰和可能的未來變化,請將它們分開。) */ switch (jointype) { case JOIN_INNER: case JOIN_LEFT: case JOIN_SEMI: case JOIN_ANTI: nestjoinOK = true; useallclauses = false; break; case JOIN_RIGHT: case JOIN_FULL: nestjoinOK = false; useallclauses = true; break; case JOIN_UNIQUE_OUTER: case JOIN_UNIQUE_INNER: jointype = JOIN_INNER; nestjoinOK = true; useallclauses = false; break; default: elog(ERROR, "unrecognized join type: %d", (int) jointype); nestjoinOK = false; /* keep compiler quiet */ useallclauses = false; break; } /* * If inner_cheapest_total is parameterized by the outer rel, ignore it; * we will consider it below as a member of cheapest_parameterized_paths, * but the other possibilities considered in this routine aren't usable. * 如果inner_cheapest_total被外表rel參數化,則忽略它; * 下面將把它看作cheapest_parameterized_paths的成員,但是這個處理過程中的其他嘗試則不可用此信息。 */ if (PATH_PARAM_BY_REL(inner_cheapest_total, outerrel)) inner_cheapest_total = NULL; /* * If we need to unique-ify the inner path, we will consider only the * cheapest-total inner. * 如果需要唯一化內部路徑,將只考慮成本最低的內表訪問路徑。 */ if (save_jointype == JOIN_UNIQUE_INNER) { /* No way to do this with an inner path parameterized by outer rel */ //除了使用外表rel參數化的內表訪問路徑,沒有其他辦法. if (inner_cheapest_total == NULL) return; inner_cheapest_total = (Path *) create_unique_path(root, innerrel, inner_cheapest_total, extra->sjinfo); Assert(inner_cheapest_total); } else if (nestjoinOK) { /* * Consider materializing the cheapest inner path, unless * enable_material is off or the path in question materializes its * output anyway. * 嘗試實現成本最低的內表訪問路徑,除非enable_material關閉或該路徑以其他方式物化其輸出。 */ if (enable_material && inner_cheapest_total != NULL && !ExecMaterializesOutput(inner_cheapest_total->pathtype)) matpath = (Path *) create_material_path(innerrel, inner_cheapest_total); } foreach(lc1, outerrel->pathlist)//遍歷外表訪問路徑 { Path *outerpath = (Path *) lfirst(lc1); List *merge_pathkeys; /* * We cannot use an outer path that is parameterized by the inner rel. * 不能使用使用內表參數化的外表訪問路徑 */ if (PATH_PARAM_BY_REL(outerpath, innerrel)) continue; /* * If we need to unique-ify the outer path, it's pointless to consider * any but the cheapest outer. (XXX we don't consider parameterized * outers, nor inners, for unique-ified cases. Should we?) * 如果需要唯一化外表訪問路徑,那么只考慮成本最低的外表訪問路徑是毫無意義的。 * 不考慮參數化的outers,也不考慮inners,因為它們是獨特的情況。那我們應該做的是?) */ if (save_jointype == JOIN_UNIQUE_OUTER) { if (outerpath != outerrel->cheapest_total_path) continue; outerpath = (Path *) create_unique_path(root, outerrel, outerpath, extra->sjinfo); Assert(outerpath); } /* * The result will have this sort order (even if it is implemented as * a nestloop, and even if some of the mergeclauses are implemented by * qpquals rather than as true mergeclauses): * 最終結果將會有這樣的排序順序(即使它是作為nestloop實現的, * 即使一些mergeclauses是由qpquals而不是作為真正的mergeclauses實現的): */ merge_pathkeys = build_join_pathkeys(root, joinrel, jointype, outerpath->pathkeys); if (save_jointype == JOIN_UNIQUE_INNER) { /* * Consider nestloop join, but only with the unique-ified cheapest * inner path * 嘗試nestloop join,只是使用保證唯一性的成本最低的內表訪問路徑 */ try_nestloop_path(root, joinrel, outerpath, inner_cheapest_total, merge_pathkeys, jointype, extra); } else if (nestjoinOK) { /* * Consider nestloop joins using this outer path and various * available paths for the inner relation. We consider the * cheapest-total paths for each available parameterization of the * inner relation, including the unparameterized case. * 嘗試使用這個外表訪問路徑和內表的各種可用路徑的nestloop連接。 * 嘗試內部關系的每個可用參數化(包括非參數化用例)的成本最低路徑。 */ ListCell *lc2; foreach(lc2, innerrel->cheapest_parameterized_paths)//遍歷參數化路徑 { Path *innerpath = (Path *) lfirst(lc2); try_nestloop_path(root, joinrel, outerpath, innerpath, merge_pathkeys, jointype, extra);//嘗試nestloop join } /* Also consider materialized form of the cheapest inner path */ if (matpath != NULL) try_nestloop_path(root, joinrel, outerpath, matpath, merge_pathkeys, jointype, extra);//也會嘗試成本最低的物化內表訪問路徑 } /* Can't do anything else if outer path needs to be unique'd */ //如外表需保證唯一,則繼續下一循環 if (save_jointype == JOIN_UNIQUE_OUTER) continue; /* Can't do anything else if inner rel is parameterized by outer */ //內表是被外表參數化的,繼續下一循環 if (inner_cheapest_total == NULL) continue; /* Generate merge join paths */ //構造merge join路徑 generate_mergejoin_paths(root, joinrel, innerrel, outerpath, save_jointype, extra, useallclauses, inner_cheapest_total, merge_pathkeys, false); } /* * Consider partial nestloop and mergejoin plan if outerrel has any * partial path and the joinrel is parallel-safe. However, we can't * handle JOIN_UNIQUE_OUTER, because the outer path will be partial, and * therefore we won't be able to properly guarantee uniqueness. Nor can * we handle extra_lateral_rels, since partial paths must not be * parameterized. Similarly, we can't handle JOIN_FULL and JOIN_RIGHT, * because they can produce false null extended rows. * 考慮并行nestloop和合并計劃,如果外表有并行訪問路徑,并且join rel是是并行安全的。 * 但是,不能處理JOIN_UNIQUE_OUTER,因為外部路徑是并行的,因此不能正確地保證惟一性。 * 同時,也不能處理extra_lateral al_rels,因為部分路徑必須不被參數化。 * 類似地,我們不能處理JOIN_FULL和JOIN_RIGHT,因為它們會產生假空擴展行。 */ if (joinrel->consider_parallel && save_jointype != JOIN_UNIQUE_OUTER && save_jointype != JOIN_FULL && save_jointype != JOIN_RIGHT && outerrel->partial_pathlist != NIL && bms_is_empty(joinrel->lateral_relids)) { if (nestjoinOK) consider_parallel_nestloop(root, joinrel, outerrel, innerrel, save_jointype, extra);//嘗試并行netstloop join /* * If inner_cheapest_total is NULL or non parallel-safe then find the * cheapest total parallel safe path. If doing JOIN_UNIQUE_INNER, we * can't use any alternative inner path. * 如果inner_cheapest_total為空或非并行安全,那么可以找到成本最低的總并行安全訪問路徑。 * 如果執行JOIN_UNIQUE_INNER,則不能使用任何替代的內表訪問路徑。 */ if (inner_cheapest_total == NULL || !inner_cheapest_total->parallel_safe) { if (save_jointype == JOIN_UNIQUE_INNER) return; inner_cheapest_total = get_cheapest_parallel_safe_total_inner( innerrel->pathlist); } if (inner_cheapest_total) consider_parallel_mergejoin(root, joinrel, outerrel, innerrel, save_jointype, extra, inner_cheapest_total);//嘗試并行merge join } } //------------------------------------ try_nestloop_path /* * try_nestloop_path * Consider a nestloop join path; if it appears useful, push it into * the joinrel's pathlist via add_path(). * 嘗試nestloop join訪問路徑; * 如果該路徑可行,則通過函數add_path把該路徑加入到joinrel's pathlist鏈表中. */ static void try_nestloop_path(PlannerInfo *root, RelOptInfo *joinrel, Path *outer_path, Path *inner_path, List *pathkeys, JoinType jointype, JoinPathExtraData *extra) { Relids required_outer; JoinCostWorkspace workspace; RelOptInfo *innerrel = inner_path->parent; RelOptInfo *outerrel = outer_path->parent; Relids innerrelids; Relids outerrelids; Relids inner_paramrels = PATH_REQ_OUTER(inner_path); Relids outer_paramrels = PATH_REQ_OUTER(outer_path); /* * Paths are parameterized by top-level parents, so run parameterization * tests on the parent relids. * 路徑由最頂層的父類參數化,因此在父類集上運行參數化測試。 */ if (innerrel->top_parent_relids) innerrelids = innerrel->top_parent_relids; else innerrelids = innerrel->relids; if (outerrel->top_parent_relids) outerrelids = outerrel->top_parent_relids; else outerrelids = outerrel->relids; /* * Check to see if proposed path is still parameterized, and reject if the * parameterization wouldn't be sensible --- unless allow_star_schema_join * says to allow it anyway. Also, we must reject if have_dangerous_phv * doesn't like the look of it, which could only happen if the nestloop is * still parameterized. * 檢查建議的訪問路徑是否仍然是參數化的,如果參數化不合理,則丟棄——除非allow_star_schema_join設置為允許。 * 另外,如果have_dangerousous_phv不允許,則必須丟棄它,而這種情況只有在nestloop仍然被參數化時才會發生。 */ required_outer = calc_nestloop_required_outer(outerrelids, outer_paramrels, innerrelids, inner_paramrels); if (required_outer && ((!bms_overlap(required_outer, extra->param_source_rels) && !allow_star_schema_join(root, outerrelids, inner_paramrels)) || have_dangerous_phv(root, outerrelids, inner_paramrels))) { /* Waste no memory when we reject a path here */ bms_free(required_outer); return;//退出 } /* * Do a precheck to quickly eliminate obviously-inferior paths. We * calculate a cheap lower bound on the path's cost and then use * add_path_precheck() to see if the path is clearly going to be dominated * by some existing path for the joinrel. If not, do the full pushup with * creating a fully valid path structure and submitting it to add_path(). * The latter two steps are expensive enough to make this two-phase * methodology worthwhile. * 參見merge join的注釋 */ initial_cost_nestloop(root, &workspace, jointype, outer_path, inner_path, extra);//初步計算nestloop的成本 if (add_path_precheck(joinrel, workspace.startup_cost, workspace.total_cost, pathkeys, required_outer))//初步檢查 { /* * If the inner path is parameterized, it is parameterized by the * topmost parent of the outer rel, not the outer rel itself. Fix * that. * 如果內表訪問路徑是參數化的,那么它是由外層rel的最上層父元素參數化的,而不是外層rel本身。 * 需解決這個問題。 */ if (PATH_PARAM_BY_PARENT(inner_path, outer_path->parent)) { inner_path = reparameterize_path_by_child(root, inner_path, outer_path->parent); /* * If we could not translate the path, we can't create nest loop * path. * 如果不能處理此情況,則不能創建nest loop訪問路徑 */ if (!inner_path) { bms_free(required_outer); return; } } add_path(joinrel, (Path *) create_nestloop_path(root, joinrel, jointype, &workspace, extra, outer_path, inner_path, extra->restrictlist, pathkeys, required_outer));//創建并添加nestloop路徑 } else { /* Waste no memory when we reject a path here */ bms_free(required_outer); } } //----------------------- create_nestloop_path /* * create_nestloop_path * Creates a pathnode corresponding to a nestloop join between two * relations. * 創建nestloop 連接路徑節點. * * 'joinrel' is the join relation. * 'jointype' is the type of join required * 'workspace' is the result from initial_cost_nestloop * 'extra' contains various information about the join * 'outer_path' is the outer path * 'inner_path' is the inner path * 'restrict_clauses' are the RestrictInfo nodes to apply at the join * 'pathkeys' are the path keys of the new join path * 'required_outer' is the set of required outer rels * * Returns the resulting path node. */ NestPath * create_nestloop_path(PlannerInfo *root,//優化器信息 RelOptInfo *joinrel,//連接生成的relation JoinType jointype,//連接類型 JoinCostWorkspace *workspace,//成本workspace JoinPathExtraData *extra,//額外的信息 Path *outer_path,//外表訪問路徑 Path *inner_path,//內部訪問路徑 List *restrict_clauses,//約束條件 List *pathkeys,//排序鍵 Relids required_outer)//需依賴的外部relids { NestPath *pathnode = makeNode(NestPath); Relids inner_req_outer = PATH_REQ_OUTER(inner_path); /* * If the inner path is parameterized by the outer, we must drop any * restrict_clauses that are due to be moved into the inner path. We have * to do this now, rather than postpone the work till createplan time, * because the restrict_clauses list can affect the size and cost * estimates for this path. * 如果內表訪問路徑是由外部參數化的,那么必須刪除任何將移動到內表訪問路徑的約束條件子句。 * 現在必須這樣做,而不是將此項工作推遲到create plan的時候,因為限制條件鏈表會影響此路徑的大小和成本估計。 */ if (bms_overlap(inner_req_outer, outer_path->parent->relids))//內表需依賴的relids與外表父路徑的relids存在交集 { Relids inner_and_outer = bms_union(inner_path->parent->relids, inner_req_outer);//合并內部父路徑的relids和內部依賴的relids List *jclauses = NIL; ListCell *lc; foreach(lc, restrict_clauses) { RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc); if (!join_clause_is_movable_into(rinfo, inner_path->parent->relids, inner_and_outer)) jclauses = lappend(jclauses, rinfo); } restrict_clauses = jclauses; } //創建路徑 pathnode->path.pathtype = T_NestLoop; pathnode->path.parent = joinrel; pathnode->path.pathtarget = joinrel->reltarget; pathnode->path.param_info = get_joinrel_parampathinfo(root, joinrel, outer_path, inner_path, extra->sjinfo, required_outer, &restrict_clauses); pathnode->path.parallel_aware = false; pathnode->path.parallel_safe = joinrel->consider_parallel && outer_path->parallel_safe && inner_path->parallel_safe; /* This is a foolish way to estimate parallel_workers, but for now... */ pathnode->path.parallel_workers = outer_path->parallel_workers; pathnode->path.pathkeys = pathkeys; pathnode->jointype = jointype; pathnode->inner_unique = extra->inner_unique; pathnode->outerjoinpath = outer_path; pathnode->innerjoinpath = inner_path; pathnode->joinrestrictinfo = restrict_clauses; final_cost_nestloop(root, pathnode, workspace, extra); return pathnode; }
測試腳本如下
testdb=# explain verbose select dw.*,grjf.grbh,grjf.xm,grjf.ny,grjf.je testdb-# from t_dwxx dw,lateral (select gr.grbh,gr.xm,jf.ny,jf.je testdb(# from t_grxx gr inner join t_jfxx jf testdb(# on gr.dwbh = dw.dwbh testdb(# and gr.grbh = jf.grbh) grjf testdb-# where dwbh = '1001' testdb-# order by dw.dwbh; QUERY PLAN ---------------------------------------------------------------------------------------------- Nested Loop (cost=0.87..112.00 rows=10 width=47) Output: dw.dwmc, dw.dwbh, dw.dwdz, gr.grbh, gr.xm, jf.ny, jf.je -> Nested Loop (cost=0.58..28.80 rows=10 width=32) Output: dw.dwmc, dw.dwbh, dw.dwdz, gr.grbh, gr.xm -> Index Scan using t_dwxx_pkey on public.t_dwxx dw (cost=0.29..8.30 rows=1 width=20) Output: dw.dwmc, dw.dwbh, dw.dwdz Index Cond: ((dw.dwbh)::text = '1001'::text) -> Index Scan using idx_t_grxx_dwbh on public.t_grxx gr (cost=0.29..20.40 rows=10 width=16) Output: gr.dwbh, gr.grbh, gr.xm, gr.xb, gr.nl Index Cond: ((gr.dwbh)::text = '1001'::text) -> Index Scan using idx_t_jfxx_grbh on public.t_jfxx jf (cost=0.29..8.31 rows=1 width=20) Output: jf.grbh, jf.ny, jf.je Index Cond: ((jf.grbh)::text = (gr.grbh)::text) (13 rows)
啟動gdb,設置斷點跟蹤
(gdb) b match_unsorted_outer Breakpoint 1 at 0x7aff2c: file joinpath.c, line 1338. (gdb) c Continuing. Breakpoint 1, match_unsorted_outer (root=0x2a987a8, joinrel=0x2aef8a0, outerrel=0x2aa20f0, innerrel=0x2aa3620, jointype=JOIN_INNER, extra=0x7ffc343da930) at joinpath.c:1338 1338 JoinType save_jointype = jointype; (gdb)
joinrel是1號RTE和3號RTE的連接,即t_dwxx和t_grxx
(gdb) p *joinrel->relids->words $3 = 10
內表成本最低的訪問路徑是索引掃描
(gdb) n 1341 Path *inner_cheapest_total = innerrel->cheapest_total_path; (gdb) 1342 Path *matpath = NULL; (gdb) p *inner_cheapest_total $1 = {type = T_IndexPath, pathtype = T_IndexScan, parent = 0x2aa3620, pathtarget = 0x2aa3858, param_info = 0x0, parallel_aware = false, parallel_safe = true, parallel_workers = 0, rows = 10, startup_cost = 0.29249999999999998, total_cost = 20.399882614957409, pathkeys = 0x0}
連接類型為JOIN_INNER,設置nestjoinOK為T,useallclauses為F
(gdb) p save_jointype $4 = JOIN_INNER (gdb) n 1352 switch (jointype) (gdb) 1358 nestjoinOK = true; (gdb) 1359 useallclauses = false; (gdb) 1360 break; (gdb)
創建物化內表訪問路徑
(gdb) n 1408 if (enable_material && inner_cheapest_total != NULL && (gdb) 1409 !ExecMaterializesOutput(inner_cheapest_total->pathtype)) (gdb) 1408 if (enable_material && inner_cheapest_total != NULL && (gdb) 1410 matpath = (Path *) (gdb) 1414 foreach(lc1, outerrel->pathlist) (gdb) p *matpath $5 = {type = T_MaterialPath, pathtype = T_Material, parent = 0x2aa3620, pathtarget = 0x2aa3858, param_info = 0x0, parallel_aware = false, parallel_safe = true, parallel_workers = 0, rows = 10, startup_cost = 0.29249999999999998, total_cost = 20.44988261495741, pathkeys = 0x0}
開始遍歷外表訪問路徑
(gdb) n 1416 Path *outerpath = (Path *) lfirst(lc1);
嘗試使用這個外表訪問路徑和內表的各種可用路徑構建nestloop連接
嘗試內部關系的每個可用的參數化(包括非參數化用例)成本最低路徑
... 1461 else if (nestjoinOK) (gdb) 1471 foreach(lc2, innerrel->cheapest_parameterized_paths) (gdb) n 1473 Path *innerpath = (Path *) lfirst(lc2); (gdb) n 1475 try_nestloop_path(root, (gdb) p *innerpath $6 = {type = T_IndexPath, pathtype = T_IndexScan, parent = 0x2aa3620, pathtarget = 0x2aa3858, param_info = 0x0, parallel_aware = false, parallel_safe = true, parallel_workers = 0, rows = 10, startup_cost = 0.29249999999999998, total_cost = 20.399882614957409, pathkeys = 0x0} (gdb) n 1471 foreach(lc2, innerrel->cheapest_parameterized_paths) (gdb) 1473 Path *innerpath = (Path *) lfirst(lc2); (gdb) 1475 try_nestloop_path(root, (gdb) 1471 foreach(lc2, innerrel->cheapest_parameterized_paths) (gdb)
同時,也會嘗試成本最低的物化內表訪問路徑
1485 if (matpath != NULL) (gdb) 1486 try_nestloop_path(root, (gdb) 1496 if (save_jointype == JOIN_UNIQUE_OUTER)
完成match_unsorted_outer的調用
1522 save_jointype != JOIN_RIGHT && (gdb) 1550 } (gdb) add_paths_to_joinrel (root=0x2a987a8, joinrel=0x2aef8a0, outerrel=0x2aa20f0, innerrel=0x2aa3620, jointype=JOIN_INNER, sjinfo=0x7ffc343daa20, restrictlist=0x0) at joinpath.c:306 306 if (enable_hashjoin || jointype == JOIN_FULL)
結果joinrel的訪問路徑
(gdb) p *joinrel->pathlist $9 = {type = T_List, length = 1, head = 0x2aefde8, tail = 0x2aefde8} (gdb) p *(Node *)joinrel->pathlist->head->data.ptr_value $10 = {type = T_NestPath} (gdb) p *(NestPath *)joinrel->pathlist->head->data.ptr_value $11 = {path = {type = T_NestPath, pathtype = T_NestLoop, parent = 0x2aef8a0, pathtarget = 0x2aefad8, param_info = 0x0, parallel_aware = false, parallel_safe = true, parallel_workers = 0, rows = 10, startup_cost = 0.57750000000000001, total_cost = 28.802382614957409, pathkeys = 0x0}, jointype = JOIN_INNER, inner_unique = false, outerjoinpath = 0x2aeb0d8, innerjoinpath = 0x2aeb408, joinrestrictinfo = 0x0}
下面考察try_nestloop_path函數的處理邏輯,設置斷點,進入try_nestloop_path函數
(gdb) b try_nestloop_path Breakpoint 1 at 0x7ae950: file joinpath.c, line 373. (gdb) c Continuing. Breakpoint 1, try_nestloop_path (root=0x2a987a8, joinrel=0x2aef8a0, outer_path=0x2aeb0d8, inner_path=0x2aeb408, pathkeys=0x0, jointype=JOIN_INNER, extra=0x7ffc343da930) at joinpath.c:373 373 RelOptInfo *innerrel = inner_path->parent;
joinrel初始時,pathlist為NULL
(gdb) p *(Node *)joinrel->pathlist->head->data.ptr_value Cannot access memory at address 0x8
外表訪問路徑為索引掃描(t_dwxx),內表訪問路徑為索引掃描(t_grxx)
(gdb) p *outer_path $2 = {type = T_IndexPath, pathtype = T_IndexScan, parent = 0x2aa20f0, pathtarget = 0x2aa2328, param_info = 0x0, parallel_aware = false, parallel_safe = true, parallel_workers = 0, rows = 1, startup_cost = 0.28500000000000003, total_cost = 8.3025000000000002, pathkeys = 0x0} (gdb) p *inner_path $3 = {type = T_IndexPath, pathtype = T_IndexScan, parent = 0x2aa3620, pathtarget = 0x2aa3858, param_info = 0x0, parallel_aware = false, parallel_safe = true, parallel_workers = 0, rows = 10, startup_cost = 0.29249999999999998, total_cost = 20.399882614957409, pathkeys = 0x0}
初步計算nestloop的成本
... (gdb) p required_outer $4 = (Relids) 0x0 (gdb) n 422 initial_cost_nestloop(root, &workspace, jointype,
創建nestloop path并添加到joinrel中
425 if (add_path_precheck(joinrel, (gdb) 434 if (PATH_PARAM_BY_PARENT(inner_path, outer_path->parent)) (gdb) 451 create_nestloop_path(root, (gdb) 450 add_path(joinrel, (Path *) (gdb)
生成的第一條路徑
(gdb) p *(NestPath *)joinrel->pathlist->head->data.ptr_value $6 = {path = {type = T_NestPath, pathtype = T_NestLoop, parent = 0x2aef8a0, pathtarget = 0x2aefad8, param_info = 0x0, parallel_aware = false, parallel_safe = true, parallel_workers = 0, rows = 10, startup_cost = 0.57750000000000001, total_cost = 28.802382614957409, pathkeys = 0x0}, jointype = JOIN_INNER, inner_unique = false, outerjoinpath = 0x2aeb0d8, innerjoinpath = 0x2aeb408, joinrestrictinfo = 0x0}
繼續執行,嘗試第二條路徑
(gdb) c Continuing. Breakpoint 1, try_nestloop_path (root=0x2a987a8, joinrel=0x2aef8a0, outer_path=0x2aeb0d8, inner_path=0x2aecf28, pathkeys=0x0, jointype=JOIN_INNER, extra=0x7ffc343da930) at joinpath.c:373 373 RelOptInfo *innerrel = inner_path->parent;
查看inner_path,即內表的cheapest_parameterized_paths鏈表中的第二個元素
(gdb) p *inner_path $9 = {type = T_IndexPath, pathtype = T_IndexScan, parent = 0x2aa3620, pathtarget = 0x2aa3858, param_info = 0x2aed6f8, parallel_aware = false, parallel_safe = true, parallel_workers = 0, rows = 1, startup_cost = 0.29249999999999998, total_cost = 0.35258, pathkeys = 0x2aeceb0}
使用此路徑嘗試nest loop訪問路徑,但該路徑依賴外部relids(4號RTE),被丟棄
403 if (required_outer && (gdb) 405 !allow_star_schema_join(root, outerrelids, inner_paramrels)) || (gdb) 404 ((!bms_overlap(required_outer, extra->param_source_rels) && (gdb) 409 bms_free(required_outer); (gdb) p *required_outer $12 = {nwords = 1, words = 0x2aefe4c} (gdb) p *required_outer->words $13 = 16 (gdb) n 410 return; (gdb)
感謝各位的閱讀,以上就是“PostgreSQL中match_unsorted_outer函數分析”的內容了,經過本文的學習后,相信大家對PostgreSQL中match_unsorted_outer函數分析這一問題有了更深刻的體會,具體使用情況還需要大家實踐驗證。這里是億速云,小編將為大家推送更多相關知識點的文章,歡迎關注!
免責聲明:本站發布的內容(圖片、視頻和文字)以原創、轉載和分享為主,文章觀點不代表本網站立場,如果涉及侵權請聯系站長郵箱:is@yisu.com進行舉報,并提供相關證據,一經查實,將立刻刪除涉嫌侵權內容。