practical-sql-2/Chapter_06/Chapter_06.sql
2020-08-29 15:21:22 -04:00

210 lines
5.7 KiB
PL/PgSQL

---------------------------------------------------------------------------
-- Practical SQL: A Beginner's Guide to Storytelling with Data, 2nd Edition
-- by Anthony DeBarros
-- Chapter 6 Code Examples
----------------------------------------------------------------------------
-- Listing 6-1: Basic addition, subtraction and multiplication with SQL
SELECT 2 + 2; -- addition
SELECT 9 - 1; -- subtraction
SELECT 3 * 4; -- multiplication
-- Listing 6-2: Integer and decimal division with SQL
SELECT 11 / 6; -- integer division
SELECT 11 % 6; -- modulo division
SELECT 11.0 / 6; -- decimal division
SELECT CAST(11 AS numeric(3,1)) / 6;
-- Listing 6-3: Exponents, roots and factorials with SQL
SELECT 3 ^ 4; -- exponentiation
SELECT |/ 10; -- square root (operator)
SELECT sqrt(10); -- square root (function)
SELECT ||/ 10; -- cube root
SELECT 4 !; -- factorial
-- Order of operations
SELECT 7 + 8 * 9; -- answer: 79
SELECT (7 + 8) * 9; -- answer: 135
SELECT 3 ^ 3 - 1; -- answer: 26
SELECT 3 ^ (3 - 1); -- answer: 9
-- Listing 6-4: Selecting Census population estimate columns with aliases
SELECT county_name AS county,
state_name AS state,
pop_est_2019 AS pop,
births_2019 AS births,
deaths_2019 AS deaths,
international_migr_2019 AS int_migr,
domestic_migr_2019 AS dom_migr,
residual_2019 AS residual
FROM us_counties_pop_est_2019;
SELECT * FROM us_counties_pop_est_2019;
-- Listing 6-5: Subtracting two columns in us_counties_pop_est_2019
SELECT county_name AS county,
state_name AS state,
births_2019 AS births,
deaths_2019 AS deaths,
births_2019 - deaths_2019 AS natural_increase
FROM us_counties_pop_est_2019;
-- Listing 6-6: Checking Census data totals
SELECT county_name AS county,
state_name AS state,
pop_est_2019 AS pop,
pop_est_2018 + births_2019 - deaths_2019 +
international_migr_2019 + domestic_migr_2019 +
residual_2019 AS components_total,
pop_est_2019 - (pop_est_2018 + births_2019 - deaths_2019 +
international_migr_2019 + domestic_migr_2019 +
residual_2019) AS difference
FROM us_counties_pop_est_2019
ORDER BY difference DESC;
-- Listing 6-7: Calculating the percent of a county's area that is water
SELECT county_name AS county,
state_name AS state,
area_water::numeric / (area_land + area_water) * 100 AS pct_water
FROM us_counties_pop_est_2019
ORDER BY pct_water DESC;
-- Listing 6-8: Calculating percent change
CREATE TABLE percent_change (
department text,
spend_2019 numeric(10,2),
spend_2022 numeric(10,2)
);
INSERT INTO percent_change
VALUES
('Assessor', 178556, 179500),
('Building', 250000, 289000),
('Clerk', 451980, 650000),
('Library', 87777, 90001),
('Parks', 250000, 223000),
('Water', 199000, 195000);
SELECT department,
spend_2019,
spend_2022,
round( (spend_2022 - spend_2019) /
spend_2019 * 100, 1 ) AS pct_change
FROM percent_change;
-- Listing 6-9: Using sum() and avg() aggregate functions
SELECT sum(pop_est_2019) AS county_sum,
round(avg(pop_est_2019), 0) AS county_average
FROM us_counties_pop_est_2019;
-- Listing 6-10: Testing SQL percentile functions
CREATE TABLE percentile_test (
numbers integer
);
INSERT INTO percentile_test (numbers) VALUES
(1), (2), (3), (4), (5), (6);
SELECT
percentile_cont(.5)
WITHIN GROUP (ORDER BY numbers),
percentile_disc(.5)
WITHIN GROUP (ORDER BY numbers)
FROM percentile_test;
-- Listing 6-11: Using sum(), avg(), and percentile_cont() aggregate functions
SELECT sum(pop_est_2019) AS county_sum,
round(avg(pop_est_2019), 0) AS county_average,
percentile_cont(.5)
WITHIN GROUP (ORDER BY pop_est_2019) AS county_median
FROM us_counties_pop_est_2019;
select * from us_counties_pop_est_2019 order by pop_est_2019 asc;
-- Listing 6-12: Passing an array of values to percentile_cont()
-- quartiles
SELECT percentile_cont(array[.25,.5,.75])
WITHIN GROUP (ORDER BY pop_est_2019) AS quartiles
FROM us_counties_pop_est_2019;
-- Extra:
-- quintiles
SELECT percentile_cont(ARRAY[.2,.4,.6,.8])
WITHIN GROUP (ORDER BY pop_est_2019) AS quintiles
FROM us_counties_pop_est_2019;
-- deciles
SELECT percentile_cont(ARRAY[.1,.2,.3,.4,.5,.6,.7,.8,.9])
WITHIN GROUP (ORDER BY pop_est_2019) AS deciles
FROM us_counties_pop_est_2019;
-- Listing 6-13: Using unnest() to turn an array into rows
SELECT unnest(
percentile_cont(ARRAY[.25,.5,.75])
WITHIN GROUP (ORDER BY pop_est_2019)
) AS quartiles
FROM us_counties_pop_est_2019;
-- Listing 6-14: Creating a median() aggregate function in PostgreSQL
-- Source: https://wiki.postgresql.org/wiki/Aggregate_Median
CREATE OR REPLACE FUNCTION _final_median(anyarray)
RETURNS float8 AS
$$
WITH q AS
(
SELECT val
FROM unnest($1) val
WHERE VAL IS NOT NULL
ORDER BY 1
),
cnt AS
(
SELECT COUNT(*) AS c FROM q
)
SELECT AVG(val)::float8
FROM
(
SELECT val FROM q
LIMIT 2 - MOD((SELECT c FROM cnt), 2)
OFFSET GREATEST(CEIL((SELECT c FROM cnt) / 2.0) - 1,0)
) q2;
$$
LANGUAGE sql IMMUTABLE;
CREATE AGGREGATE median(anyelement) (
SFUNC=array_append,
STYPE=anyarray,
FINALFUNC=_final_median,
INITCOND='{}'
);
-- Listing 6-15: Using a median() aggregate function
SELECT sum(pop_est_2019) AS county_sum,
round(avg(pop_est_2019), 0) AS county_average,
median(pop_est_2019) AS county_median_func,
percentile_cont(.5)
WITHIN GROUP (ORDER BY pop_est_2019) AS county_median_perc
FROM us_counties_pop_est_2019;
-- Listing 6-16: Finding the most-frequent value with mode()
SELECT mode() WITHIN GROUP (ORDER BY pop_est_2019)
FROM us_counties_pop_est_2019;