practical-sql-2/Chapter_09/Chapter_09.sql
2021-08-15 21:09:08 -04:00

341 lines
9.6 KiB
SQL

---------------------------------------------------------------------------
-- Practical SQL: A Beginner's Guide to Storytelling with Data, 2nd Edition
-- by Anthony DeBarros
-- Chapter 9 Code Examples
----------------------------------------------------------------------------
-- Listing 9-1: Creating and filling the 2018 Public Libraries Survey table
CREATE TABLE pls_fy2018_libraries (
stabr text NOT NULL,
fscskey text CONSTRAINT fscskey_2018_pkey PRIMARY KEY,
libid text NOT NULL,
libname text NOT NULL,
address text NOT NULL,
city text NOT NULL,
zip text NOT NULL,
county text NOT NULL,
phone text NOT NULL,
c_relatn text NOT NULL,
c_legbas text NOT NULL,
c_admin text NOT NULL,
c_fscs text NOT NULL,
geocode text NOT NULL,
lsabound text NOT NULL,
startdate text NOT NULL,
enddate text NOT NULL,
popu_lsa integer NOT NULL,
popu_und integer NOT NULL,
centlib integer NOT NULL,
branlib integer NOT NULL,
bkmob integer NOT NULL,
totstaff numeric(8,2) NOT NULL,
bkvol integer NOT NULL,
ebook integer NOT NULL,
audio_ph integer NOT NULL,
audio_dl integer NOT NULL,
video_ph integer NOT NULL,
video_dl integer NOT NULL,
ec_lo_ot integer NOT NULL,
subscrip integer NOT NULL,
hrs_open integer NOT NULL,
visits integer NOT NULL,
reference integer NOT NULL,
regbor integer NOT NULL,
totcir integer NOT NULL,
kidcircl integer NOT NULL,
totpro integer NOT NULL,
gpterms integer NOT NULL,
pitusr integer NOT NULL,
wifisess integer NOT NULL,
obereg text NOT NULL,
statstru text NOT NULL,
statname text NOT NULL,
stataddr text NOT NULL,
longitude numeric(10,7) NOT NULL,
latitude numeric(10,7) NOT NULL
);
COPY pls_fy2018_libraries
FROM 'C:\YourDirectory\pls_fy2018_libraries.csv'
WITH (FORMAT CSV, HEADER);
CREATE INDEX libname_2018_idx ON pls_fy2018_libraries (libname);
-- Listing 9-2: Creating and filling the 2017 and 2016 Public Libraries Survey tables
CREATE TABLE pls_fy2017_libraries (
stabr text NOT NULL,
fscskey text CONSTRAINT fscskey_17_pkey PRIMARY KEY,
libid text NOT NULL,
libname text NOT NULL,
address text NOT NULL,
city text NOT NULL,
zip text NOT NULL,
county text NOT NULL,
phone text NOT NULL,
c_relatn text NOT NULL,
c_legbas text NOT NULL,
c_admin text NOT NULL,
c_fscs text NOT NULL,
geocode text NOT NULL,
lsabound text NOT NULL,
startdate text NOT NULL,
enddate text NOT NULL,
popu_lsa integer NOT NULL,
popu_und integer NOT NULL,
centlib integer NOT NULL,
branlib integer NOT NULL,
bkmob integer NOT NULL,
totstaff numeric(8,2) NOT NULL,
bkvol integer NOT NULL,
ebook integer NOT NULL,
audio_ph integer NOT NULL,
audio_dl integer NOT NULL,
video_ph integer NOT NULL,
video_dl integer NOT NULL,
ec_lo_ot integer NOT NULL,
subscrip integer NOT NULL,
hrs_open integer NOT NULL,
visits integer NOT NULL,
reference integer NOT NULL,
regbor integer NOT NULL,
totcir integer NOT NULL,
kidcircl integer NOT NULL,
totpro integer NOT NULL,
gpterms integer NOT NULL,
pitusr integer NOT NULL,
wifisess integer NOT NULL,
obereg text NOT NULL,
statstru text NOT NULL,
statname text NOT NULL,
stataddr text NOT NULL,
longitude numeric(10,7) NOT NULL,
latitude numeric(10,7) NOT NULL
);
CREATE TABLE pls_fy2016_libraries (
stabr text NOT NULL,
fscskey text CONSTRAINT fscskey_16_pkey PRIMARY KEY,
libid text NOT NULL,
libname text NOT NULL,
address text NOT NULL,
city text NOT NULL,
zip text NOT NULL,
county text NOT NULL,
phone text NOT NULL,
c_relatn text NOT NULL,
c_legbas text NOT NULL,
c_admin text NOT NULL,
c_fscs text NOT NULL,
geocode text NOT NULL,
lsabound text NOT NULL,
startdate text NOT NULL,
enddate text NOT NULL,
popu_lsa integer NOT NULL,
popu_und integer NOT NULL,
centlib integer NOT NULL,
branlib integer NOT NULL,
bkmob integer NOT NULL,
totstaff numeric(8,2) NOT NULL,
bkvol integer NOT NULL,
ebook integer NOT NULL,
audio_ph integer NOT NULL,
audio_dl integer NOT NULL,
video_ph integer NOT NULL,
video_dl integer NOT NULL,
ec_lo_ot integer NOT NULL,
subscrip integer NOT NULL,
hrs_open integer NOT NULL,
visits integer NOT NULL,
reference integer NOT NULL,
regbor integer NOT NULL,
totcir integer NOT NULL,
kidcircl integer NOT NULL,
totpro integer NOT NULL,
gpterms integer NOT NULL,
pitusr integer NOT NULL,
wifisess integer NOT NULL,
obereg text NOT NULL,
statstru text NOT NULL,
statname text NOT NULL,
stataddr text NOT NULL,
longitude numeric(10,7) NOT NULL,
latitude numeric(10,7) NOT NULL
);
COPY pls_fy2017_libraries
FROM 'C:\YourDirectory\pls_fy2017_libraries.csv'
WITH (FORMAT CSV, HEADER);
COPY pls_fy2016_libraries
FROM 'C:\YourDirectory\pls_fy2016_libraries.csv'
WITH (FORMAT CSV, HEADER);
CREATE INDEX libname_2017_idx ON pls_fy2017_libraries (libname);
CREATE INDEX libname_2016_idx ON pls_fy2016_libraries (libname);
-- Listing 9-3: Using count() for table row counts
SELECT count(*)
FROM pls_fy2018_libraries;
SELECT count(*)
FROM pls_fy2017_libraries;
SELECT count(*)
FROM pls_fy2016_libraries;
-- Listing 9-4: Using count() for the number of values in a column
SELECT count(phone)
FROM pls_fy2018_libraries;
-- Listing 9-5: Using count() for the number of distinct values in a column
SELECT count(libname)
FROM pls_fy2018_libraries;
SELECT count(DISTINCT libname)
FROM pls_fy2018_libraries;
-- Bonus: find duplicate libnames
SELECT libname, count(libname)
FROM pls_fy2018_libraries
GROUP BY libname
ORDER BY count(libname) DESC;
-- Bonus: see location of every Oxford Public Library
SELECT libname, city, stabr
FROM pls_fy2018_libraries
WHERE libname = 'OXFORD PUBLIC LIBRARY';
-- Listing 9-6: Finding the most and fewest visits using max() and min()
SELECT max(visits), min(visits)
FROM pls_fy2018_libraries;
-- Listing 9-7: Using GROUP BY on the stabr column
-- There are 55 in 2018.
SELECT stabr
FROM pls_fy2018_libraries
GROUP BY stabr
ORDER BY stabr;
-- Bonus: there are 54 in 2017.
SELECT stabr
FROM pls_fy2017_libraries
GROUP BY stabr
ORDER BY stabr;
-- Listing 9-8: Using GROUP BY on the city and stabr columns
SELECT city, stabr
FROM pls_fy2018_libraries
GROUP BY city, stabr
ORDER BY city, stabr;
-- Bonus: We can count the combos
SELECT city, stabr, count(*)
FROM pls_fy2018_libraries
GROUP BY city, stabr
ORDER BY count(*) DESC;
-- Listing 9-9: Using GROUP BY with count() on the stabr column
SELECT stabr, count(*)
FROM pls_fy2018_libraries
GROUP BY stabr
ORDER BY count(*) DESC;
-- Listing 9-10: Using GROUP BY with count() on the stabr and stataddr columns
SELECT stabr, stataddr, count(*)
FROM pls_fy2018_libraries
GROUP BY stabr, stataddr
ORDER BY stabr, stataddr;
-- Listing 9-11: Using the sum() aggregate function to total visits to
-- libraries in 2016, 2017, and 2018
-- 2018
SELECT sum(visits) AS visits_2018
FROM pls_fy2018_libraries
WHERE visits >= 0;
-- 2017
SELECT sum(visits) AS visits_2017
FROM pls_fy2017_libraries
WHERE visits >= 0;
-- 2016
SELECT sum(visits) AS visits_2016
FROM pls_fy2016_libraries
WHERE visits >= 0;
-- Listing 9-12: Using sum() to total visits on joined 2018, 2017, and 2016 tables
SELECT sum(pls18.visits) AS visits_2018,
sum(pls17.visits) AS visits_2017,
sum(pls16.visits) AS visits_2016
FROM pls_fy2018_libraries pls18
JOIN pls_fy2017_libraries pls17 ON pls18.fscskey = pls17.fscskey
JOIN pls_fy2016_libraries pls16 ON pls18.fscskey = pls16.fscskey
WHERE pls18.visits >= 0
AND pls17.visits >= 0
AND pls16.visits >= 0;
-- Bonus: summing wifi sessions
SELECT sum(pls18.wifisess) AS wifi_2018,
sum(pls17.wifisess) AS wifi_2017,
sum(pls16.wifisess) AS wifi_2016
FROM pls_fy2018_libraries pls18
JOIN pls_fy2017_libraries pls17 ON pls18.fscskey = pls17.fscskey
JOIN pls_fy2016_libraries pls16 ON pls18.fscskey = pls16.fscskey
WHERE pls18.wifisess >= 0
AND pls17.wifisess >= 0
AND pls16.wifisess >= 0;
-- Listing 9-13: Using GROUP BY to track percent change in library visits by state
SELECT pls18.stabr,
sum(pls18.visits) AS visits_2018,
sum(pls17.visits) AS visits_2017,
sum(pls16.visits) AS visits_2016,
round( (sum(pls18.visits::numeric) - sum(pls17.visits)) /
sum(pls17.visits) * 100, 1 ) AS chg_2018_17,
round( (sum(pls17.visits::numeric) - sum(pls16.visits)) /
sum(pls16.visits) * 100, 1 ) AS chg_2017_16
FROM pls_fy2018_libraries pls18
JOIN pls_fy2017_libraries pls17 ON pls18.fscskey = pls17.fscskey
JOIN pls_fy2016_libraries pls16 ON pls18.fscskey = pls16.fscskey
WHERE pls18.visits >= 0
AND pls17.visits >= 0
AND pls16.visits >= 0
GROUP BY pls18.stabr
ORDER BY chg_2018_17 DESC;
-- Listing 9-14: Using HAVING to filter the results of an aggregate query
SELECT pls18.stabr,
sum(pls18.visits) AS visits_2018,
sum(pls17.visits) AS visits_2017,
sum(pls16.visits) AS visits_2016,
round( (sum(pls18.visits::numeric) - sum(pls17.visits)) /
sum(pls17.visits) * 100, 1 ) AS chg_2018_17,
round( (sum(pls17.visits::numeric) - sum(pls16.visits)) /
sum(pls16.visits) * 100, 1 ) AS chg_2017_16
FROM pls_fy2018_libraries pls18
JOIN pls_fy2017_libraries pls17 ON pls18.fscskey = pls17.fscskey
JOIN pls_fy2016_libraries pls16 ON pls18.fscskey = pls16.fscskey
WHERE pls18.visits >= 0
AND pls17.visits >= 0
AND pls16.visits >= 0
GROUP BY pls18.stabr
HAVING sum(pls18.visits) > 50000000
ORDER BY chg_2018_17 DESC;