Friday, May 30, 2008

Merge Join caused Cartesian Product

--Merge Join caused Cartesian Product

1591 RPTOPR CREATE TABLE tmp_aio_acct_tp1 as SELECT distinct(a.customer_id),
1591 RPTOPR decode((c.bld_stat_id),1,'Blacklisted') blist_status, d.bdr_evs
1591 RPTOPR dt start_blist, d.bdr_blist_resn_type_id blist_reason FROM tmp_a
1591 RPTOPR io_acct_tb a, blist_cust b, blist_dtls c, blist_dtls_resn_code d
1591 RPTOPR -- WHERE rtrim(a.customer_id) = rtrim(b.blc_cust_id) and WHERE
1591 RPTOPR rtrim(a.customer_id) = b.blc_cust_id and b.bld_blist_id=c.bld_bl
1591 RPTOPR ist_id and b.bld_blist_id=d.bld_blist_id and d.bdr_evedt is NULL
1591 RPTOPR


--this query can't finish event after 10 hours.
--expect 16 hours to completed.
--notice the executions is exterm high.

^LSQL ordered by Executions DB/Inst: PRXP/PRXRP Snaps: 581-587
-> Total Executions: 96,444,779
-> Captured SQL account for 100.0% of Total

CPU per Elap per
Executions Rows Processed Rows per Exec Exec (s) Exec (s) SQL Id
------------ --------------- -------------- ---------- ----------- -------------
96,183,306 0 0.0 0.00 0.00 b9ag273wzuhbx
Module: oracleODSP@ods01 (TNS V1-V3)
SELECT "BLC_CUST_ID","BLD_BLIST_ID" FROM "DBO"."BLIST_CUST" "B" WHERE "BLC_CUST_
ID"=:1 AND "BLD_BLIST_ID"=:2


Elapsed CPU Elap per % Total
Time (s) Time (s) Executions Exec (s) DB Time SQL Id
---------- ---------- ------------ ---------- ------- -------------
2,900 2,898 96,183,306 0.0 14.4 b9ag273wzuhbx
Module: oracleODSP@ods01 (TNS V1-V3)
SELECT "BLC_CUST_ID","BLD_BLIST_ID" FROM "DBO"."BLIST_CUST" "B" WHERE "BLC_CUST_
ID"=:1 AND "BLD_BLIST_ID"=:2


^LSQL ordered by Gets DB/Inst: PRXP/PRXRP Snaps: 581-587
-> Resources reported for PL/SQL code includes the resources used by all SQL
statements called by the code.
-> Total Buffer Gets: 719,230,530
-> Captured SQL account for 99.9% of Total

Gets CPU Elapsed
Buffer Gets Executions per Exec %Total Time (s) Time (s) SQL Id
-------------- ------------ ------------ ------ -------- --------- -------------
319,369,588 96,183,306 3.3 44.4 2897.99 2900.45 b9ag273wzuhbx
Module: oracleODSP@ods01 (TNS V1-V3)
SELECT "BLC_CUST_ID","BLD_BLIST_ID" FROM "DBO"."BLIST_CUST" "B" WHERE "BLC_CUST_ID"=:1 AND "BLD_BLIST_ID"=:2



1591 RPTOPR 1 0 1 LOAD AS SELECT
1591 RPTOPR 2 1 1 SORT
1591 RPTOPR 3 2 1 NESTED LOOPS
1591 RPTOPR 4 3 1 NESTED LOOPS
1591 RPTOPR 5 4 1 MERGE JOIN
1591 RPTOPR 6 5 1 REMOTE BLIST_DTLS_RESN_CODE
1591 RPTOPR 7 5 2 BUFFER
1591 RPTOPR 8 7 1 TABLE ACCESS TMP_AIO_ACCT_TB
1591 RPTOPR 9 4 2 REMOTE BLIST_CUST
1591 RPTOPR 10 3 2 REMOTE BLIST_DTLS


-- the rows of these two tables explains the high number of executions.
-- 3 millions rows in BLIST_DTLS_RESN_CODE and one quarter records meet "d.bdr_evedt is NULL", 7k rows in TMP_AIO_ACCT_TB
-- 3200k/4 * 7k = 5600k * k = 5600 Millions



SELECT distinct(a.customer_id),
decode((c.bld_stat_id),1,'Blacklisted') blist_status, d.bdr_evsdt start_blist, d.bdr_blist_resn_type_id blist_reason
FROM t1 a, blist_cust b, blist_dtls c, blist_dtls_resn_code d
WHERE rtrim(a.customer_id) = b.blc_cust_id and b.bld_blist_id=c.bld_blist_id and b.bld_blist_id=d.bld_blist_id and d.bdr_evedt is NULL;

SELECT /*+ ordered use_nl(b c d) */
distinct(a.customer_id),
decode((c.bld_stat_id),1,'Blacklisted') blist_status, d.bdr_evsdt start_blist, d.bdr_blist_resn_type_id blist_reason
FROM t1 a, blist_cust b, blist_dtls c, blist_dtls_resn_code d
WHERE rtrim(a.customer_id) = b.blc_cust_id and b.bld_blist_id=c.bld_blist_id
and b.bld_blist_id=d.bld_blist_id and d.bdr_evedt is NULL;

1601 RPTOPR 0 17168 SELECT STATEMENT
1601 RPTOPR 1 0 1 SORT
1601 RPTOPR 2 1 1 NESTED LOOPS
1601 RPTOPR 3 2 1 NESTED LOOPS
1601 RPTOPR 4 3 1 NESTED LOOPS
1601 RPTOPR 5 4 1 TABLE ACCESS T1
1601 RPTOPR 6 4 2 REMOTE BLIST_CUST
1601 RPTOPR 7 3 2 REMOTE BLIST_DTLS
1601 RPTOPR 8 2 2 REMOTE BLIST_DTLS_RESN_CODE



-- finished within 1 mins



--try this hint (bad plan same as orginal)
SELECT /*+ use_nl(b c d) */
distinct(a.customer_id),
decode((c.bld_stat_id),1,'Blacklisted') blist_status, d.bdr_evsdt start_blist, d.bdr_blist_resn_type_id blist_reason
FROM t1 a, blist_cust b, blist_dtls c, blist_dtls_resn_code d
WHERE rtrim(a.customer_id) = b.blc_cust_id and b.bld_blist_id=c.bld_blist_id
and b.bld_blist_id=d.bld_blist_id and d.bdr_evedt is NULL;


1601 RPTOPR 0 4506 SELECT STATEMENT
1601 RPTOPR 1 0 1 SORT
1601 RPTOPR 2 1 1 NESTED LOOPS
1601 RPTOPR 3 2 1 NESTED LOOPS
1601 RPTOPR 4 3 1 MERGE JOIN
1601 RPTOPR 5 4 1 REMOTE BLIST_DTLS_RESN_CODE
1601 RPTOPR 6 4 2 BUFFER
1601 RPTOPR 7 6 1 TABLE ACCESS T1
1601 RPTOPR 8 3 2 REMOTE BLIST_CUST
1601 RPTOPR 9 2 2 REMOTE BLIST_DTLS

-
--tried ordered only

SELECT /*+ ordered */
distinct(a.customer_id),
decode((c.bld_stat_id),1,'Blacklisted') blist_status, d.bdr_evsdt start_blist, d.bdr_blist_resn_type_id blist_reason
FROM t1 a, blist_cust b, blist_dtls c, blist_dtls_resn_code d
WHERE rtrim(a.customer_id) = b.blc_cust_id and b.bld_blist_id=c.bld_blist_id
and b.bld_blist_id=d.bld_blist_id and d.bdr_evedt is NULL;

1601 RPTOPR 0 9458 SELECT STATEMENT
1601 RPTOPR 1 0 1 SORT
1601 RPTOPR 2 1 1 HASH JOIN
1601 RPTOPR 3 2 1 REMOTE BLIST_DTLS_RESN_CODE
1601 RPTOPR 4 2 2 HASH JOIN
1601 RPTOPR 5 4 1 TABLE ACCESS T1
1601 RPTOPR 6 4 2 REMOTE


--okay



--think driving_site should also work, but have no chance to test again.


The join operations group of hints controls how joined tables merge data together. A join
operation may direct the optimizer to choose the best path for retrieving all rows for a query
(throughput) or for retrieving the first row (response time).
while ORDERED tells the optimizer to join the tables based on their
order in the FROM clause using the first table listed as the driving table (accessed first).

--tuned queries from hours to seconds using this method


However, as to root casue , optimizer is still a black box to us!