Created
April 21, 2017 14:31
-
-
Save xtream1101/268ce0849817be7e8e58faaf274033e8 to your computer and use it in GitHub Desktop.
Dynamic QA query
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#1 | |
DROP FUNCTION qa_fields(character varying,character varying); | |
CREATE OR REPLACE FUNCTION qa_fields(schema_name VARCHAR, tbl_name VARCHAR) | |
-- The table def needs to be known before the function runs, but the number of fields returned is dynamic :( | |
-- Also need to correct names for the fields, the types will alwyas be FLOAT | |
RETURNS TABLE(a FLOAT, b FLOAT, c FLOAT, d FLOAT, e FLOAT, f FLOAT, g FLOAT, h FLOAT, i FLOAT, j FLOAT, k FLOAT, l FLOAT, m FLOAT, n FLOAT, o FLOAT, p FLOAT, q FLOAT, r FLOAT, s FLOAT, t FLOAT, u FLOAT, v FLOAT, w FLOAT, x FLOAT, ts DATE) AS | |
$func$ | |
DECLARE field_name VARCHAR; | |
DECLARE qa_query_full TEXT = ''; | |
DECLARE qa_query_percent VARCHAR = ''; | |
DECLARE qa_query_sum VARCHAR = ''; | |
BEGIN | |
FOR field_name IN SELECT column_name | |
FROM information_schema.columns | |
WHERE table_schema = schema_name | |
AND table_name = tbl_name | |
AND data_type != 'boolean' | |
-- Ignore these columns (these are not from the scraped data) | |
AND column_name not in ('id', 'scrape_id', 'keyword', 'keyword_id') | |
LOOP | |
-- Create the 2 selects for the dynamic fields | |
qa_query_percent := qa_query_percent || 'x.' || field_name || '_null/x.total::float*100 as ' || field_name || '_null_percent, '; | |
qa_query_sum := qa_query_sum || 'SUM(CASE WHEN ' || field_name || ' IS NULL THEN 1 ELSE 0 END) AS ' || field_name || '_null, '; | |
END LOOP; | |
-- Create the completed query | |
qa_query_full := 'SELECT ' || qa_query_percent || ' x.time_collected | |
FROM (SELECT | |
count(id) as total, | |
' || qa_query_sum || ' | |
MIN(date_trunc(''day'', (time_collected)::timestamp)::date) as time_collected | |
FROM | |
' || schema_name || '.' || tbl_name || ' | |
WHERE time_collected>=date_trunc(''day'', (now() + interval ''-4 hour'')::date - interval ''59 day'')::date | |
AND time_collected<=date_trunc(''day'', (now() + interval ''-4 hour'')::date + interval ''1 day'')::date | |
GROUP BY | |
date_trunc(''day'', (time_collected)::timestamp)::date | |
ORDER BY | |
MIN(date_trunc(''day'', (time_collected)::timestamp)::date) DESC | |
) as x;'; | |
-- Execute query and return the results | |
RETURN QUERY EXECUTE qa_query_full; | |
END | |
$func$ LANGUAGE PLPGSQL; | |
-- Get the results for walmart.keywords_raw | |
SELECT b, c FROM qa_fields('walmart', 'keywords_raw'); | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
DROP FUNCTION qa_fields(character varying,character varying); | |
CREATE OR REPLACE FUNCTION qa_fields(schema_name VARCHAR, tbl_name VARCHAR) | |
-- Return the custom query as TEXT, will need to execute on the client side somehow | |
RETURNS TEXT AS | |
$func$ | |
DECLARE field_name VARCHAR; | |
DECLARE qa_query_full TEXT = ''; | |
DECLARE qa_query_percent VARCHAR = ''; | |
DECLARE qa_query_sum VARCHAR = ''; | |
BEGIN | |
FOR field_name IN SELECT column_name | |
FROM information_schema.columns | |
WHERE table_schema = schema_name | |
AND table_name = tbl_name | |
AND data_type != 'boolean' | |
-- Ignore these columns (these are not from the scraped data) | |
AND column_name not in ('id', 'scrape_id', 'keyword', 'keyword_id') | |
LOOP | |
-- Create the 2 selects for the dynamic fields | |
qa_query_percent := qa_query_percent || 'x.' || field_name || '_null/x.total::float*100 as ' || field_name || '_null_percent, '; | |
qa_query_sum := qa_query_sum || 'SUM(CASE WHEN ' || field_name || ' IS NULL THEN 1 ELSE 0 END) AS ' || field_name || '_null, '; | |
END LOOP; | |
-- Create the completed query | |
qa_query_full := 'SELECT ' || qa_query_percent || ' x.time_collected | |
FROM (SELECT | |
count(id) as total, | |
' || qa_query_sum || ' | |
MIN(date_trunc(''day'', (time_collected)::timestamp)::date) as time_collected | |
FROM | |
' || schema_name || '.' || tbl_name || ' | |
WHERE time_collected>=date_trunc(''day'', (now() + interval ''-4 hour'')::date - interval ''59 day'')::date | |
AND time_collected<=date_trunc(''day'', (now() + interval ''-4 hour'')::date + interval ''1 day'')::date | |
GROUP BY | |
date_trunc(''day'', (time_collected)::timestamp)::date | |
ORDER BY | |
MIN(date_trunc(''day'', (time_collected)::timestamp)::date) DESC | |
) as x;'; | |
-- Return the query as type TEXT | |
RETURN qa_query_full; | |
END | |
$func$ LANGUAGE PLPGSQL; | |
-- Returns a field called `qa_fields` with the query string in it | |
select qa_fields('walmart', 'keywords_raw'); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment