208 lines
5.5 KiB
PL/PgSQL
208 lines
5.5 KiB
PL/PgSQL
--------------------------------------------------------------
|
|
-- Practical SQL: A Beginner's Guide to Storytelling with Data
|
|
-- by Anthony DeBarros
|
|
|
|
-- Chapter 5 Code Examples
|
|
--------------------------------------------------------------
|
|
|
|
-- Listing 5-1: Basic addition, subtraction and multiplication with SQL
|
|
|
|
SELECT 2 + 2; -- addition
|
|
SELECT 9 - 1; -- subtraction
|
|
SELECT 3 * 4; -- multiplication
|
|
|
|
-- Listing 5-2: Integer and decimal division with SQL
|
|
|
|
SELECT 11 / 6; -- integer division
|
|
SELECT 11 % 6; -- modulo division
|
|
SELECT 11.0 / 6; -- decimal division
|
|
SELECT CAST(11 AS numeric(3,1)) / 6;
|
|
|
|
-- Listing 5-3: Exponents, roots and factorials with SQL
|
|
|
|
SELECT 3 ^ 4; -- exponentiation
|
|
SELECT |/ 10; -- square root (operator)
|
|
SELECT sqrt(10); -- square root (function)
|
|
SELECT ||/ 10; -- cube root
|
|
SELECT 4 !; -- factorial
|
|
|
|
-- Order of operations
|
|
|
|
SELECT 7 + 8 * 9; -- answer: 79
|
|
SELECT (7 + 8) * 9; -- answer: 135
|
|
|
|
SELECT 3 ^ 3 - 1; -- answer: 26
|
|
SELECT 3 ^ (3 - 1); -- answer: 9
|
|
|
|
-- Listing 5-4: Selecting Census population columns by race with aliases
|
|
|
|
SELECT geo_name,
|
|
state_us_abbreviation AS "st",
|
|
p0010001 AS "Total Population",
|
|
p0010003 AS "White Alone",
|
|
p0010004 AS "Black or African American Alone",
|
|
p0010005 AS "Am Indian/Alaska Native Alone",
|
|
p0010006 AS "Asian Alone",
|
|
p0010007 AS "Native Hawaiian and Other Pacific Islander Alone",
|
|
p0010008 AS "Some Other Race Alone",
|
|
p0010009 AS "Two or More Races"
|
|
FROM us_counties_2010;
|
|
|
|
-- Listing 5-5: Adding two columns in us_counties_2010
|
|
|
|
SELECT geo_name,
|
|
state_us_abbreviation AS "st",
|
|
p0010003 AS "White Alone",
|
|
p0010004 AS "Black Alone",
|
|
p0010003 + p0010004 AS "Total White and Black"
|
|
FROM us_counties_2010;
|
|
|
|
-- Listing 5-6: Checking Census data totals
|
|
|
|
SELECT geo_name,
|
|
state_us_abbreviation AS "st",
|
|
p0010001 AS "Total",
|
|
p0010003 + p0010004 + p0010005 + p0010006 + p0010007
|
|
+ p0010008 + p0010009 AS "All Races",
|
|
(p0010003 + p0010004 + p0010005 + p0010006 + p0010007
|
|
+ p0010008 + p0010009) - p0010001 AS "Difference"
|
|
FROM us_counties_2010
|
|
ORDER BY "Difference" DESC;
|
|
|
|
-- Listing 5-7: Calculating the percent of the population that is
|
|
-- Asian by county (percent of the whole)
|
|
|
|
SELECT geo_name,
|
|
state_us_abbreviation AS "st",
|
|
(CAST(p0010006 AS numeric(8,1)) / p0010001) * 100 AS "pct_asian"
|
|
FROM us_counties_2010
|
|
ORDER BY "pct_asian" DESC;
|
|
|
|
-- Listing 5-8: Calculating percent change
|
|
|
|
CREATE TABLE percent_change (
|
|
department varchar(20),
|
|
spend_2014 numeric(10,2),
|
|
spend_2017 numeric(10,2)
|
|
);
|
|
|
|
INSERT INTO percent_change
|
|
VALUES
|
|
('Building', 250000, 289000),
|
|
('Assessor', 178556, 179500),
|
|
('Library', 87777, 90001),
|
|
('Clerk', 451980, 650000),
|
|
('Police', 250000, 223000),
|
|
('Recreation', 199000, 195000);
|
|
|
|
SELECT department,
|
|
spend_2014,
|
|
spend_2017,
|
|
round( (spend_2017 - spend_2014) /
|
|
spend_2014 * 100, 1 ) AS "pct_change"
|
|
FROM percent_change;
|
|
|
|
-- Listing 5-9: Using sum() and avg() aggregate functions
|
|
|
|
SELECT sum(p0010001) AS "County Sum",
|
|
round(avg(p0010001), 0) AS "County Average"
|
|
FROM us_counties_2010;
|
|
|
|
-- Listing 5-10: Testing SQL percentile functions
|
|
|
|
CREATE TABLE percentile_test (
|
|
numbers integer
|
|
);
|
|
|
|
INSERT INTO percentile_test (numbers) VALUES
|
|
(1), (2), (3), (4), (5), (6);
|
|
|
|
SELECT
|
|
percentile_cont(.5)
|
|
WITHIN GROUP (ORDER BY numbers),
|
|
percentile_disc(.5)
|
|
WITHIN GROUP (ORDER BY numbers)
|
|
FROM percentile_test;
|
|
|
|
-- Listing 5-11: Using sum(), avg(), and percentile_cont() aggregate functions
|
|
|
|
SELECT sum(p0010001) AS "County Sum",
|
|
round(avg(p0010001), 0) AS "County Average",
|
|
percentile_cont(.5)
|
|
WITHIN GROUP (ORDER BY p0010001) AS "County Median"
|
|
FROM us_counties_2010;
|
|
|
|
-- Listing 5-12: Passing an array of values to percentile_cont()
|
|
|
|
-- quartiles
|
|
SELECT percentile_cont(array[.25,.5,.75])
|
|
WITHIN GROUP (ORDER BY p0010001) AS "quartiles"
|
|
FROM us_counties_2010;
|
|
|
|
-- Extra:
|
|
-- quintiles
|
|
SELECT percentile_cont(array[.2,.4,.6,.8])
|
|
WITHIN GROUP (ORDER BY p0010001) AS "quintiles"
|
|
FROM us_counties_2010;
|
|
|
|
-- deciles
|
|
SELECT percentile_cont(array[.1,.2,.3,.4,.5,.6,.7,.8,.9])
|
|
WITHIN GROUP (ORDER BY p0010001) AS "deciles"
|
|
FROM us_counties_2010;
|
|
|
|
-- Listing 5-13: Using unnest() to turn an array into rows
|
|
|
|
SELECT unnest(
|
|
percentile_cont(array[.25,.5,.75])
|
|
WITHIN GROUP (ORDER BY p0010001)
|
|
) AS "quartiles"
|
|
FROM us_counties_2010;
|
|
|
|
-- Listing 5-14: Creating a median() aggregate function in PostgreSQL
|
|
-- Source: https://wiki.postgresql.org/wiki/Aggregate_Median
|
|
|
|
CREATE OR REPLACE FUNCTION _final_median(anyarray)
|
|
RETURNS float8 AS
|
|
$$
|
|
WITH q AS
|
|
(
|
|
SELECT val
|
|
FROM unnest($1) val
|
|
WHERE VAL IS NOT NULL
|
|
ORDER BY 1
|
|
),
|
|
cnt AS
|
|
(
|
|
SELECT COUNT(*) AS c FROM q
|
|
)
|
|
SELECT AVG(val)::float8
|
|
FROM
|
|
(
|
|
SELECT val FROM q
|
|
LIMIT 2 - MOD((SELECT c FROM cnt), 2)
|
|
OFFSET GREATEST(CEIL((SELECT c FROM cnt) / 2.0) - 1,0)
|
|
) q2;
|
|
$$
|
|
LANGUAGE sql IMMUTABLE;
|
|
|
|
CREATE AGGREGATE median(anyelement) (
|
|
SFUNC=array_append,
|
|
STYPE=anyarray,
|
|
FINALFUNC=_final_median,
|
|
INITCOND='{}'
|
|
);
|
|
|
|
-- Listing 5-15: Using a median() aggregate function
|
|
|
|
SELECT sum(p0010001) AS "County Sum",
|
|
round(avg(p0010001), 0) AS "County Average",
|
|
median(p0010001) AS "County Median",
|
|
percentile_cont(.5)
|
|
WITHIN GROUP (ORDER BY P0010001) AS "50th Percentile"
|
|
FROM us_counties_2010;
|
|
|
|
-- Listing 5-16: Finding the most-frequent value with mode()
|
|
|
|
SELECT mode() WITHIN GROUP (ORDER BY p0010001)
|
|
FROM us_counties_2010;
|