-
Notifications
You must be signed in to change notification settings - Fork 26
/
pg_qualstats--2.1.0--2.1.1.sql
282 lines (260 loc) · 11.4 KB
/
pg_qualstats--2.1.0--2.1.1.sql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
-- complain if script is sourced in psql, rather than via CREATE EXTENSION
\echo Use "ALTER EXTENSION pg_qualstats UPDATE" to load this file. \quit
CREATE OR REPLACE FUNCTION @[email protected]_qualstats_index_advisor (
min_filter integer DEFAULT 1000,
min_selectivity integer DEFAULT 30,
forbidden_am text[] DEFAULT '{}')
RETURNS json
AS $_$
DECLARE
v_processed bigint[] = '{}';
v_indexes json[] = '{}';
v_unoptimised json[] = '{}';
rec record;
v_nb_processed integer = 1;
v_ddl text;
v_col text;
v_qualnodeid bigint;
v_quals_todo bigint[];
v_quals_done bigint[];
v_quals_col_done text[];
v_queryids bigint[] = '{}';
BEGIN
-- sanity checks and default values
SELECT coalesce(min_filter, 1000), coalesce(min_selectivity, 30),
coalesce(forbidden_am, '{}')
INTO min_filter, min_selectivity, forbidden_am;
-- don't try to generate hash indexes Before pg 10, as those are only WAL
-- logged since pg 11.
IF pg_catalog.current_setting('server_version_num')::bigint < 100000 THEN
forbidden_am := array_append(forbidden_am, 'hash');
END IF;
-- first find out unoptimizable quals.
-- We need an array of json containing the per-qual info, and a single
-- array containing all the underlying qualnodeids, so we need to create
-- the wanted final object manually as we can't have two different grouping
-- approach.
FOR rec IN WITH src AS (SELECT DISTINCT qualnodeid,
(coalesce(lrelid, rrelid), coalesce(lattnum, rattnum),
opno, eval_type)::@[email protected] AS qual,
queryid
FROM @[email protected]_qualstats() q
JOIN pg_catalog.pg_database d ON q.dbid = d.oid
LEFT JOIN pg_catalog.pg_operator op ON op.oid = q.opno
LEFT JOIN pg_catalog.pg_amop amop ON amop.amopopr = op.oid
LEFT JOIN pg_catalog.pg_am am ON am.oid = amop.amopmethod
WHERE d.datname = current_database()
AND eval_type = 'f'
AND coalesce(lrelid, rrelid) != 0
AND amname IS NULL
)
SELECT pg_catalog.json_build_object(
'qual', @[email protected]_qualstats_deparse_qual(qual),
-- be careful to generate an empty array if no queryid availiable
'queryids',
coalesce(pg_catalog.array_agg(DISTINCT queryid)
FILTER (WHERE queryid IS NOT NULL), '{}')
) AS obj,
array_agg(qualnodeid) AS qualnodeids
FROM src
GROUP BY qual
LOOP
v_unoptimised := array_append(v_unoptimised, rec.obj);
v_processed := array_cat(v_processed, rec.qualnodeids);
END LOOP;
-- The index suggestion is done in multiple iteration, by scoring for each
-- relation containing interesting quals a path of possibly AND-ed quals
-- that contains other possibly AND-ed quals. Only the higher score path
-- will be used to create an index, so we can then compute another set of
-- paths ignoring the quals that are now optimized with an index.
WHILE v_nb_processed > 0 LOOP
v_nb_processed := 0;
FOR rec IN
-- first, find quals that seems worth to optimize along with the
-- possible access methods, discarding any qualnode that are marked as
-- already processed. Also apply access method restriction.
WITH pgqs AS (
SELECT dbid, amname, qualid, qualnodeid,
(coalesce(lrelid, rrelid), coalesce(lattnum, rattnum),
opno, eval_type)::@[email protected] AS qual, queryid,
round(avg(execution_count)) AS execution_count,
sum(occurences) AS occurences,
round(sum(nbfiltered)::numeric / sum(occurences)) AS avg_filter,
CASE WHEN sum(execution_count) = 0
THEN 0
ELSE round(sum(nbfiltered::numeric) / sum(execution_count) * 100)
END AS avg_selectivity
FROM @[email protected]_qualstats() q
JOIN pg_catalog.pg_database d ON q.dbid = d.oid
JOIN pg_catalog.pg_operator op ON op.oid = q.opno
JOIN pg_catalog.pg_amop amop ON amop.amopopr = op.oid
JOIN pg_catalog.pg_am am ON am.oid = amop.amopmethod
WHERE d.datname = current_database()
AND eval_type = 'f'
AND amname != ALL (forbidden_am)
AND coalesce(lrelid, rrelid) != 0
AND qualnodeid != ALL(v_processed)
GROUP BY dbid, amname, qualid, qualnodeid, lrelid, rrelid,
lattnum, rattnum, opno, eval_type, queryid
),
-- apply cardinality and selectivity restrictions
filtered AS (
SELECT (qual).relid, amname, coalesce(qualid, qualnodeid) AS parent,
count(*) AS weight,
(array_agg(DISTINCT qualnodeid),
array_agg(queryid)
)::@[email protected]_quals AS quals
FROM pgqs
WHERE avg_filter >= min_filter
AND avg_selectivity >= min_selectivity
GROUP BY (qual).relid, amname, parent
),
-- for each possibly AND-ed qual, build the list of included qualnodeid
nodes AS (
SELECT p.relid, p.amname, p.parent, p.quals,
c.quals AS children
FROM filtered p
LEFT JOIN filtered c ON (p.quals).qualnodeids @> (c.quals).qualnodeids
AND p.amname = c.amname
AND p.parent != c.parent
AND (p.quals).qualnodeids != (c.quals).qualnodeids
),
-- build the "paths", which is the list of AND-ed quals that entirely
-- contains another possibly AND-ed quals, and give a score for each
-- path. The scoring method used here is simply the number of
-- columns in the quals.
paths AS (
SELECT DISTINCT *,
coalesce(pg_catalog.array_length((children).qualnodeids, 1),
0) AS weight
FROM nodes
UNION
SELECT DISTINCT p.relid, p.amname, p.parent, p.quals, c.children,
coalesce(pg_catalog.array_length((c.children).qualnodeids, 1),
0) AS weight
FROM nodes p
JOIN nodes c ON (p.children).qualnodeids @> (c.quals).qualnodeids
AND (c.quals).qualnodeids IS NOT NULL
AND (c.quals).qualnodeids != (p.quals).qualnodeids
AND p.amname = c.amname
),
-- compute the final paths.
-- The scoring method used here is simply the sum of total
-- number of columns in each possibly AND-ed quals, so that we can
-- later chose to create indexes that optimize as many queries as
-- possible with as few indexes as possible.
-- We also compute here an access method weight, so that we can later
-- choose a btree index rather than another access method if btree is
-- available.
computed AS (
SELECT relid, amname, parent, quals,
array_agg(to_json(children) ORDER BY weight)
FILTER (WHERE children IS NOT NULL) AS included,
pg_catalog.array_length((quals).qualnodeids, 1)
+ sum(weight) AS path_weight,
CASE amname WHEN 'btree' THEN 1 ELSE 2 END AS amweight
FROM paths
GROUP BY relid, amname, parent, quals
),
-- compute a rank for each final paths, per relation.
final AS (
SELECT relid, amname, parent, quals, included, path_weight, amweight,
row_number() OVER (
PARTITION BY relid
ORDER BY path_weight DESC, amweight) AS rownum
FROM computed
)
-- and finally choose the higher rank final path for each relation.
SELECT relid, amname, parent,
(quals).qualnodeids as quals, (quals).queryids as queryids,
included, path_weight
FROM final
WHERE rownum = 1
LOOP
v_nb_processed := v_nb_processed + 1;
v_ddl := '';
v_quals_todo := '{}';
v_quals_done := '{}';
v_quals_col_done := '{}';
-- put columns from included quals, if any, first for order dependency
DECLARE
v_cur json;
BEGIN
IF rec.included IS NOT NULL THEN
FOR v_cur IN SELECT v->'qualnodeids'
FROM (SELECT * FROM unnest(rec.included)) AS r(v)
ORDER BY pg_catalog.json_array_length(v->'qualnodeids') ASC
LOOP
-- Direct cast from json to bigint is only possible since pg10
FOR v_qualnodeid IN
SELECT pg_catalog.json_array_elements(v_cur)::text::bigint
LOOP
v_quals_todo := v_quals_todo || v_qualnodeid;
END LOOP;
END LOOP;
END IF;
END;
-- and append qual's own columns
v_quals_todo := v_quals_todo || rec.quals;
-- generate the index DDL
FOREACH v_qualnodeid IN ARRAY v_quals_todo LOOP
-- skip quals already present in the index
CONTINUE WHEN v_quals_done @> ARRAY[v_qualnodeid];
-- skip other quals for the same column
v_col := @[email protected]_qualstats_get_idx_col(v_qualnodeid, false);
CONTINUE WHEN v_quals_col_done @> ARRAY[v_col];
-- mark this qual as present in a generated index so it's ignore at
-- next round of best quals to optimize
v_processed := pg_catalog.array_append(v_processed, v_qualnodeid);
-- mark this qual and col as present in this index
v_quals_done := v_quals_done || v_qualnodeid;
v_quals_col_done := v_quals_col_done || v_col;
-- if underlying table has been dropped, stop here
CONTINUE WHEN coalesce(v_col, '') = '';
-- append the column to the index
IF v_ddl != '' THEN v_ddl := v_ddl || ', '; END IF;
v_ddl := v_ddl || @[email protected]_qualstats_get_idx_col(v_qualnodeid, true);
END LOOP;
-- if underlying table has been dropped, skip this (broken) index
CONTINUE WHEN coalesce(v_ddl, '') = '';
-- generate the full CREATE INDEX ddl
v_ddl = pg_catalog.format('CREATE INDEX ON %s USING %I (%s)',
@[email protected]_qualstats_get_qualnode_rel(v_qualnodeid), rec.amname, v_ddl);
-- get the underlyings queryid(s)
DECLARE
v_queryid text;
v_cur json;
BEGIN
v_queryids = rec.queryids;
IF rec.included IS NOT NULL THEN
FOREACH v_cur IN ARRAY rec.included LOOP
-- Direct cast from json to bigint is only possible since pg10
FOR v_queryid IN SELECT pg_catalog.json_array_elements(v_cur->'queryids')::text
LOOP
CONTINUE WHEN v_queryid = 'null';
v_queryids := v_queryids || v_queryid::text::bigint;
END LOOP;
END LOOP;
END IF;
END;
-- remove any duplicates
SELECT pg_catalog.array_agg(DISTINCT v) INTO v_queryids
FROM (SELECT unnest(v_queryids)) s(v);
-- sanitize the queryids
IF v_queryids IS NULL OR v_queryids = '{null}' THEN
v_queryids = '{}';
END IF;
-- and finally append the index to the list of generated indexes
v_indexes := pg_catalog.array_append(v_indexes,
pg_catalog.json_build_object(
'ddl', v_ddl,
'queryids', v_queryids
)
);
END LOOP;
END LOOP;
RETURN pg_catalog.json_build_object(
'indexes', v_indexes,
'unoptimised', v_unoptimised);
END;
$_$ LANGUAGE plpgsql; /* end of pg_qualstats_index_advisor */