210 lines
5.7 KiB
PL/PgSQL
210 lines
5.7 KiB
PL/PgSQL
---------------------------------------------------------------------------
|
|
-- Practical SQL: A Beginner's Guide to Storytelling with Data, 2nd Edition
|
|
-- by Anthony DeBarros
|
|
|
|
-- Chapter 6 Code Examples
|
|
----------------------------------------------------------------------------
|
|
|
|
-- Listing 6-1: Basic addition, subtraction and multiplication with SQL
|
|
|
|
SELECT 2 + 2; -- addition
|
|
SELECT 9 - 1; -- subtraction
|
|
SELECT 3 * 4; -- multiplication
|
|
|
|
-- Listing 6-2: Integer and decimal division with SQL
|
|
|
|
SELECT 11 / 6; -- integer division
|
|
SELECT 11 % 6; -- modulo division
|
|
SELECT 11.0 / 6; -- decimal division
|
|
SELECT CAST(11 AS numeric(3,1)) / 6;
|
|
|
|
-- Listing 6-3: Exponents, roots and factorials with SQL
|
|
|
|
SELECT 3 ^ 4; -- exponentiation
|
|
SELECT |/ 10; -- square root (operator)
|
|
SELECT sqrt(10); -- square root (function)
|
|
SELECT ||/ 10; -- cube root
|
|
SELECT 4 !; -- factorial
|
|
|
|
-- Order of operations
|
|
|
|
SELECT 7 + 8 * 9; -- answer: 79
|
|
SELECT (7 + 8) * 9; -- answer: 135
|
|
|
|
SELECT 3 ^ 3 - 1; -- answer: 26
|
|
SELECT 3 ^ (3 - 1); -- answer: 9
|
|
|
|
-- Listing 6-4: Selecting Census population estimate columns with aliases
|
|
|
|
SELECT county_name AS county,
|
|
state_name AS state,
|
|
pop_est_2019 AS pop,
|
|
births_2019 AS births,
|
|
deaths_2019 AS deaths,
|
|
international_migr_2019 AS int_migr,
|
|
domestic_migr_2019 AS dom_migr,
|
|
residual_2019 AS residual
|
|
FROM us_counties_pop_est_2019;
|
|
|
|
SELECT * FROM us_counties_pop_est_2019;
|
|
|
|
-- Listing 6-5: Subtracting two columns in us_counties_pop_est_2019
|
|
|
|
SELECT county_name AS county,
|
|
state_name AS state,
|
|
births_2019 AS births,
|
|
deaths_2019 AS deaths,
|
|
births_2019 - deaths_2019 AS natural_increase
|
|
FROM us_counties_pop_est_2019;
|
|
|
|
-- Listing 6-6: Checking Census data totals
|
|
|
|
SELECT county_name AS county,
|
|
state_name AS state,
|
|
pop_est_2019 AS pop,
|
|
pop_est_2018 + births_2019 - deaths_2019 +
|
|
international_migr_2019 + domestic_migr_2019 +
|
|
residual_2019 AS components_total,
|
|
pop_est_2019 - (pop_est_2018 + births_2019 - deaths_2019 +
|
|
international_migr_2019 + domestic_migr_2019 +
|
|
residual_2019) AS difference
|
|
FROM us_counties_pop_est_2019
|
|
ORDER BY difference DESC;
|
|
|
|
-- Listing 6-7: Calculating the percent of a county's area that is water
|
|
|
|
SELECT county_name AS county,
|
|
state_name AS state,
|
|
area_water::numeric / (area_land + area_water) * 100 AS pct_water
|
|
FROM us_counties_pop_est_2019
|
|
ORDER BY pct_water DESC;
|
|
|
|
|
|
-- Listing 6-8: Calculating percent change
|
|
|
|
CREATE TABLE percent_change (
|
|
department text,
|
|
spend_2019 numeric(10,2),
|
|
spend_2022 numeric(10,2)
|
|
);
|
|
|
|
INSERT INTO percent_change
|
|
VALUES
|
|
('Assessor', 178556, 179500),
|
|
('Building', 250000, 289000),
|
|
('Clerk', 451980, 650000),
|
|
('Library', 87777, 90001),
|
|
('Parks', 250000, 223000),
|
|
('Water', 199000, 195000);
|
|
|
|
SELECT department,
|
|
spend_2019,
|
|
spend_2022,
|
|
round( (spend_2022 - spend_2019) /
|
|
spend_2019 * 100, 1 ) AS pct_change
|
|
FROM percent_change;
|
|
|
|
-- Listing 6-9: Using sum() and avg() aggregate functions
|
|
|
|
SELECT sum(pop_est_2019) AS county_sum,
|
|
round(avg(pop_est_2019), 0) AS county_average
|
|
FROM us_counties_pop_est_2019;
|
|
|
|
-- Listing 6-10: Testing SQL percentile functions
|
|
|
|
CREATE TABLE percentile_test (
|
|
numbers integer
|
|
);
|
|
|
|
INSERT INTO percentile_test (numbers) VALUES
|
|
(1), (2), (3), (4), (5), (6);
|
|
|
|
SELECT
|
|
percentile_cont(.5)
|
|
WITHIN GROUP (ORDER BY numbers),
|
|
percentile_disc(.5)
|
|
WITHIN GROUP (ORDER BY numbers)
|
|
FROM percentile_test;
|
|
|
|
-- Listing 6-11: Using sum(), avg(), and percentile_cont() aggregate functions
|
|
|
|
SELECT sum(pop_est_2019) AS county_sum,
|
|
round(avg(pop_est_2019), 0) AS county_average,
|
|
percentile_cont(.5)
|
|
WITHIN GROUP (ORDER BY pop_est_2019) AS county_median
|
|
FROM us_counties_pop_est_2019;
|
|
|
|
-- Listing 6-12: Passing an array of values to percentile_cont()
|
|
|
|
-- quartiles
|
|
SELECT percentile_cont(ARRAY[.25,.5,.75])
|
|
WITHIN GROUP (ORDER BY pop_est_2019) AS quartiles
|
|
FROM us_counties_pop_est_2019;
|
|
|
|
-- Extra:
|
|
-- quintiles
|
|
SELECT percentile_cont(ARRAY[.2,.4,.6,.8])
|
|
WITHIN GROUP (ORDER BY pop_est_2019) AS quintiles
|
|
FROM us_counties_pop_est_2019;
|
|
|
|
-- deciles
|
|
SELECT percentile_cont(ARRAY[.1,.2,.3,.4,.5,.6,.7,.8,.9])
|
|
WITHIN GROUP (ORDER BY pop_est_2019) AS deciles
|
|
FROM us_counties_pop_est_2019;
|
|
|
|
-- Listing 6-13: Using unnest() to turn an array into rows
|
|
|
|
SELECT unnest(
|
|
percentile_cont(ARRAY[.25,.5,.75])
|
|
WITHIN GROUP (ORDER BY pop_est_2019)
|
|
) AS quartiles
|
|
FROM us_counties_pop_est_2019;
|
|
|
|
-- Listing 6-14: Creating a median() aggregate function in PostgreSQL
|
|
-- Source: https://wiki.postgresql.org/wiki/Aggregate_Median
|
|
|
|
CREATE OR REPLACE FUNCTION _final_median(anyarray)
|
|
RETURNS float8 AS
|
|
$$
|
|
WITH q AS
|
|
(
|
|
SELECT val
|
|
FROM unnest($1) val
|
|
WHERE VAL IS NOT NULL
|
|
ORDER BY 1
|
|
),
|
|
cnt AS
|
|
(
|
|
SELECT COUNT(*) AS c FROM q
|
|
)
|
|
SELECT AVG(val)::float8
|
|
FROM
|
|
(
|
|
SELECT val FROM q
|
|
LIMIT 2 - MOD((SELECT c FROM cnt), 2)
|
|
OFFSET GREATEST(CEIL((SELECT c FROM cnt) / 2.0) - 1,0)
|
|
) q2;
|
|
$$
|
|
LANGUAGE sql IMMUTABLE;
|
|
|
|
CREATE AGGREGATE median(anyelement) (
|
|
SFUNC=array_append,
|
|
STYPE=anyarray,
|
|
FINALFUNC=_final_median,
|
|
INITCOND='{}'
|
|
);
|
|
|
|
-- Listing 6-15: Using a median() aggregate function
|
|
|
|
SELECT sum(pop_est_2019) AS county_sum,
|
|
round(avg(pop_est_2019), 0) AS county_average,
|
|
median(pop_est_2019) AS county_median_func,
|
|
percentile_cont(.5)
|
|
WITHIN GROUP (ORDER BY pop_est_2019) AS county_median_perc
|
|
FROM us_counties_pop_est_2019;
|
|
|
|
-- Listing 6-16: Finding the most-frequent value with mode()
|
|
|
|
SELECT mode() WITHIN GROUP (ORDER BY pop_est_2019)
|
|
FROM us_counties_pop_est_2019;
|