344 lines
10 KiB
SQL
344 lines
10 KiB
SQL
---------------------------------------------------------------------------
|
|
-- Practical SQL: A Beginner's Guide to Storytelling with Data, 2nd Edition
|
|
-- by Anthony DeBarros
|
|
|
|
-- Chapter 9 Code Examples
|
|
----------------------------------------------------------------------------
|
|
|
|
-- Listing 9-1: Creating and filling the 2018 Public Libraries Survey table
|
|
|
|
CREATE TABLE pls_fy2018_libraries (
|
|
stabr text NOT NULL,
|
|
fscskey text CONSTRAINT fscskey_2018_pkey PRIMARY KEY,
|
|
libid text NOT NULL,
|
|
libname text NOT NULL,
|
|
address text NOT NULL,
|
|
city text NOT NULL,
|
|
zip text NOT NULL,
|
|
county text NOT NULL,
|
|
phone text NOT NULL,
|
|
c_relatn text NOT NULL,
|
|
c_legbas text NOT NULL,
|
|
c_admin text NOT NULL,
|
|
c_fscs text NOT NULL,
|
|
geocode text NOT NULL,
|
|
lsabound text NOT NULL,
|
|
startdate text NOT NULL,
|
|
enddate text NOT NULL,
|
|
popu_lsa integer NOT NULL,
|
|
popu_und integer NOT NULL,
|
|
centlib integer NOT NULL,
|
|
branlib integer NOT NULL,
|
|
bkmob integer NOT NULL,
|
|
totstaff numeric(8,2) NOT NULL,
|
|
bkvol integer NOT NULL,
|
|
ebook integer NOT NULL,
|
|
audio_ph integer NOT NULL,
|
|
audio_dl integer NOT NULL,
|
|
video_ph integer NOT NULL,
|
|
video_dl integer NOT NULL,
|
|
ec_lo_ot integer NOT NULL,
|
|
subscrip integer NOT NULL,
|
|
hrs_open integer NOT NULL,
|
|
visits integer NOT NULL,
|
|
reference integer NOT NULL,
|
|
regbor integer NOT NULL,
|
|
totcir integer NOT NULL,
|
|
kidcircl integer NOT NULL,
|
|
totpro integer NOT NULL,
|
|
gpterms integer NOT NULL,
|
|
pitusr integer NOT NULL,
|
|
wifisess integer NOT NULL,
|
|
obereg text NOT NULL,
|
|
statstru text NOT NULL,
|
|
statname text NOT NULL,
|
|
stataddr text NOT NULL,
|
|
longitude numeric(10,7) NOT NULL,
|
|
latitude numeric(10,7) NOT NULL
|
|
);
|
|
|
|
COPY pls_fy2018_libraries
|
|
-- FROM 'C:\YourDirectory\pls_fy2018_libraries.csv'
|
|
from '/Users/adebarros/Dropbox/DataMonky/Book-Writing/PracticalSQL_2e/Code-Repo/Chapter_09/pls_fy2018_libraries.csv'
|
|
WITH (FORMAT CSV, HEADER);
|
|
|
|
CREATE INDEX libname_2018_idx ON pls_fy2018_libraries (libname);
|
|
|
|
-- Listing 9-2: Creating and filling the 2017 and 2016 Public Libraries Survey tables
|
|
|
|
CREATE TABLE pls_fy2017_libraries (
|
|
stabr text NOT NULL,
|
|
fscskey text CONSTRAINT fscskey_17_pkey PRIMARY KEY,
|
|
libid text NOT NULL,
|
|
libname text NOT NULL,
|
|
address text NOT NULL,
|
|
city text NOT NULL,
|
|
zip text NOT NULL,
|
|
county text NOT NULL,
|
|
phone text NOT NULL,
|
|
c_relatn text NOT NULL,
|
|
c_legbas text NOT NULL,
|
|
c_admin text NOT NULL,
|
|
c_fscs text NOT NULL,
|
|
geocode text NOT NULL,
|
|
lsabound text NOT NULL,
|
|
startdate text NOT NULL,
|
|
enddate text NOT NULL,
|
|
popu_lsa integer NOT NULL,
|
|
popu_und integer NOT NULL,
|
|
centlib integer NOT NULL,
|
|
branlib integer NOT NULL,
|
|
bkmob integer NOT NULL,
|
|
totstaff numeric(8,2) NOT NULL,
|
|
bkvol integer NOT NULL,
|
|
ebook integer NOT NULL,
|
|
audio_ph integer NOT NULL,
|
|
audio_dl integer NOT NULL,
|
|
video_ph integer NOT NULL,
|
|
video_dl integer NOT NULL,
|
|
ec_lo_ot integer NOT NULL,
|
|
subscrip integer NOT NULL,
|
|
hrs_open integer NOT NULL,
|
|
visits integer NOT NULL,
|
|
reference integer NOT NULL,
|
|
regbor integer NOT NULL,
|
|
totcir integer NOT NULL,
|
|
kidcircl integer NOT NULL,
|
|
totpro integer NOT NULL,
|
|
gpterms integer NOT NULL,
|
|
pitusr integer NOT NULL,
|
|
wifisess integer NOT NULL,
|
|
obereg text NOT NULL,
|
|
statstru text NOT NULL,
|
|
statname text NOT NULL,
|
|
stataddr text NOT NULL,
|
|
longitude numeric(10,7) NOT NULL,
|
|
latitude numeric(10,7) NOT NULL
|
|
);
|
|
|
|
CREATE TABLE pls_fy2016_libraries (
|
|
stabr text NOT NULL,
|
|
fscskey text CONSTRAINT fscskey_16_pkey PRIMARY KEY,
|
|
libid text NOT NULL,
|
|
libname text NOT NULL,
|
|
address text NOT NULL,
|
|
city text NOT NULL,
|
|
zip text NOT NULL,
|
|
county text NOT NULL,
|
|
phone text NOT NULL,
|
|
c_relatn text NOT NULL,
|
|
c_legbas text NOT NULL,
|
|
c_admin text NOT NULL,
|
|
c_fscs text NOT NULL,
|
|
geocode text NOT NULL,
|
|
lsabound text NOT NULL,
|
|
startdate text NOT NULL,
|
|
enddate text NOT NULL,
|
|
popu_lsa integer NOT NULL,
|
|
popu_und integer NOT NULL,
|
|
centlib integer NOT NULL,
|
|
branlib integer NOT NULL,
|
|
bkmob integer NOT NULL,
|
|
totstaff numeric(8,2) NOT NULL,
|
|
bkvol integer NOT NULL,
|
|
ebook integer NOT NULL,
|
|
audio_ph integer NOT NULL,
|
|
audio_dl integer NOT NULL,
|
|
video_ph integer NOT NULL,
|
|
video_dl integer NOT NULL,
|
|
ec_lo_ot integer NOT NULL,
|
|
subscrip integer NOT NULL,
|
|
hrs_open integer NOT NULL,
|
|
visits integer NOT NULL,
|
|
reference integer NOT NULL,
|
|
regbor integer NOT NULL,
|
|
totcir integer NOT NULL,
|
|
kidcircl integer NOT NULL,
|
|
totpro integer NOT NULL,
|
|
gpterms integer NOT NULL,
|
|
pitusr integer NOT NULL,
|
|
wifisess integer NOT NULL,
|
|
obereg text NOT NULL,
|
|
statstru text NOT NULL,
|
|
statname text NOT NULL,
|
|
stataddr text NOT NULL,
|
|
longitude numeric(10,7) NOT NULL,
|
|
latitude numeric(10,7) NOT NULL
|
|
);
|
|
|
|
COPY pls_fy2017_libraries
|
|
-- FROM 'C:\YourDirectory\pls_fy2017_libraries.csv'
|
|
from '/Users/adebarros/Dropbox/DataMonky/Book-Writing/PracticalSQL_2e/Code-Repo/Chapter_09/pls_fy2017_libraries.csv'
|
|
WITH (FORMAT CSV, HEADER);
|
|
|
|
COPY pls_fy2016_libraries
|
|
-- FROM 'C:\YourDirectory\pls_fy2016_libraries.csv'
|
|
from '/Users/adebarros/Dropbox/DataMonky/Book-Writing/PracticalSQL_2e/Code-Repo/Chapter_09/pls_fy2016_libraries.csv'
|
|
WITH (FORMAT CSV, HEADER);
|
|
|
|
CREATE INDEX libname_2017_idx ON pls_fy2017_libraries (libname);
|
|
CREATE INDEX libname_2016_idx ON pls_fy2016_libraries (libname);
|
|
|
|
|
|
-- Listing 9-3: Using count() for table row counts
|
|
|
|
SELECT count(*)
|
|
FROM pls_fy2018_libraries;
|
|
|
|
SELECT count(*)
|
|
FROM pls_fy2017_libraries;
|
|
|
|
SELECT count(*)
|
|
FROM pls_fy2016_libraries;
|
|
|
|
-- Listing 9-4: Using count() for the number of values in a column
|
|
|
|
SELECT count(phone)
|
|
FROM pls_fy2018_libraries;
|
|
|
|
-- Listing 9-5: Using count() for the number of distinct values in a column
|
|
|
|
SELECT count(libname)
|
|
FROM pls_fy2018_libraries;
|
|
|
|
SELECT count(DISTINCT libname)
|
|
FROM pls_fy2018_libraries;
|
|
|
|
-- Bonus: find duplicate libnames
|
|
SELECT libname, count(libname)
|
|
FROM pls_fy2018_libraries
|
|
GROUP BY libname
|
|
ORDER BY count(libname) DESC;
|
|
|
|
-- Bonus: see location of every Oxford Public Library
|
|
SELECT libname, city, stabr
|
|
FROM pls_fy2018_libraries
|
|
WHERE libname = 'OXFORD PUBLIC LIBRARY';
|
|
|
|
-- Listing 9-6: Finding the most and fewest visits using max() and min()
|
|
SELECT max(visits), min(visits)
|
|
FROM pls_fy2018_libraries;
|
|
|
|
-- Listing 9-7: Using GROUP BY on the stabr column
|
|
|
|
-- There are 55 in 2018.
|
|
SELECT stabr
|
|
FROM pls_fy2018_libraries
|
|
GROUP BY stabr
|
|
ORDER BY stabr;
|
|
|
|
-- Bonus: there are 54 in 2017.
|
|
SELECT stabr
|
|
FROM pls_fy2017_libraries
|
|
GROUP BY stabr
|
|
ORDER BY stabr;
|
|
|
|
-- Listing 9-8: Using GROUP BY on the city and stabr columns
|
|
|
|
SELECT city, stabr
|
|
FROM pls_fy2018_libraries
|
|
GROUP BY city, stabr
|
|
ORDER BY city, stabr;
|
|
|
|
-- Bonus: We can count the combos
|
|
SELECT city, stabr, count(*)
|
|
FROM pls_fy2018_libraries
|
|
GROUP BY city, stabr
|
|
ORDER BY count(*) DESC;
|
|
|
|
-- Listing 9-9: Using GROUP BY with count() on the stabr column
|
|
|
|
SELECT stabr, count(*)
|
|
FROM pls_fy2018_libraries
|
|
GROUP BY stabr
|
|
ORDER BY count(*) DESC;
|
|
|
|
-- Listing 9-10: GROUP BY with count() on the stabr and stataddr columns
|
|
|
|
SELECT stabr, stataddr, count(*)
|
|
FROM pls_fy2018_libraries
|
|
GROUP BY stabr, stataddr
|
|
ORDER BY stabr, stataddr;
|
|
|
|
-- Listing 9-11: Using the sum() aggregate function to total visits to
|
|
-- libraries in 2014 and 2009
|
|
|
|
-- 2018
|
|
SELECT sum(visits) AS visits_2018
|
|
FROM pls_fy2018_libraries
|
|
WHERE visits >= 0;
|
|
|
|
-- 2017
|
|
SELECT sum(visits) AS visits_2017
|
|
FROM pls_fy2017_libraries
|
|
WHERE visits >= 0;
|
|
|
|
-- 2016
|
|
SELECT sum(visits) AS visits_2016
|
|
FROM pls_fy2016_libraries
|
|
WHERE visits >= 0;
|
|
|
|
-- Listing 9-12: Using sum() to total visits on joined 2018, 2017, and 2016 tables
|
|
|
|
SELECT sum(pls18.visits) AS visits_2018,
|
|
sum(pls17.visits) AS visits_2017,
|
|
sum(pls16.visits) AS visits_2016
|
|
FROM pls_fy2018_libraries pls18
|
|
JOIN pls_fy2017_libraries pls17 ON pls18.fscskey = pls17.fscskey
|
|
JOIN pls_fy2016_libraries pls16 ON pls18.fscskey = pls16.fscskey
|
|
WHERE pls18.visits >= 0
|
|
AND pls17.visits >= 0
|
|
AND pls16.visits >= 0;
|
|
|
|
-- Bonus: summing wifi sessions
|
|
SELECT sum(pls18.wifisess) AS wifi_2018,
|
|
sum(pls17.wifisess) AS wifi_2017,
|
|
sum(pls16.wifisess) AS wifi_2016
|
|
FROM pls_fy2018_libraries pls18
|
|
JOIN pls_fy2017_libraries pls17 ON pls18.fscskey = pls17.fscskey
|
|
JOIN pls_fy2016_libraries pls16 ON pls18.fscskey = pls16.fscskey
|
|
WHERE pls18.wifisess >= 0
|
|
AND pls17.wifisess >= 0
|
|
AND pls16.wifisess >= 0;
|
|
|
|
|
|
-- Listing 9-13: Using GROUP BY to track percent change in library visits by state
|
|
|
|
SELECT pls18.stabr,
|
|
sum(pls18.visits) AS visits_2018,
|
|
sum(pls17.visits) AS visits_2017,
|
|
sum(pls16.visits) AS visits_2016,
|
|
round( (sum(pls18.visits::numeric) - sum(pls17.visits)) /
|
|
sum(pls17.visits) * 100, 1 ) AS chg_2018_17,
|
|
round( (sum(pls17.visits::numeric) - sum(pls16.visits)) /
|
|
sum(pls16.visits) * 100, 1 ) AS chg_2017_16
|
|
FROM pls_fy2018_libraries pls18
|
|
JOIN pls_fy2017_libraries pls17 ON pls18.fscskey = pls17.fscskey
|
|
JOIN pls_fy2016_libraries pls16 ON pls18.fscskey = pls16.fscskey
|
|
WHERE pls18.visits >= 0
|
|
AND pls17.visits >= 0
|
|
AND pls16.visits >= 0
|
|
GROUP BY pls18.stabr
|
|
ORDER BY chg_2018_17 DESC;
|
|
|
|
-- Listing 9-14: Using HAVING to filter the results of an aggregate query
|
|
|
|
SELECT pls18.stabr,
|
|
sum(pls18.visits) AS visits_2018,
|
|
sum(pls17.visits) AS visits_2017,
|
|
sum(pls16.visits) AS visits_2016,
|
|
round( (sum(pls18.visits::numeric) - sum(pls17.visits)) /
|
|
sum(pls17.visits) * 100, 1 ) AS chg_2018_17,
|
|
round( (sum(pls17.visits::numeric) - sum(pls16.visits)) /
|
|
sum(pls16.visits) * 100, 1 ) AS chg_2017_16
|
|
FROM pls_fy2018_libraries pls18
|
|
JOIN pls_fy2017_libraries pls17 ON pls18.fscskey = pls17.fscskey
|
|
JOIN pls_fy2016_libraries pls16 ON pls18.fscskey = pls16.fscskey
|
|
WHERE pls18.visits >= 0
|
|
AND pls17.visits >= 0
|
|
AND pls16.visits >= 0
|
|
GROUP BY pls18.stabr
|
|
HAVING sum(pls18.visits) > 50000000
|
|
ORDER BY chg_2018_17 DESC;
|
|
|