Chapter 9 code and data

This commit is contained in:
anthonydb 2020-09-25 08:27:20 -04:00
parent 498950fa62
commit a29acc084d
2 changed files with 357 additions and 184 deletions

View File

@ -1,305 +1,343 @@
-- FIRST EDITION FILE; IGNORE ---------------------------------------------------------------------------
-- Practical SQL: A Beginner's Guide to Storytelling with Data, 2nd Edition
--------------------------------------------------------------
-- Practical SQL: A Beginner's Guide to Storytelling with Data
-- by Anthony DeBarros -- by Anthony DeBarros
-- Chapter 9 Code Examples -- Chapter 9 Code Examples
-------------------------------------------------------------- ----------------------------------------------------------------------------
-- Listing 9-1: Creating and filling the 2014 Public Libraries Survey table -- Listing 9-1: Creating and filling the 2018 Public Libraries Survey table
CREATE TABLE pls_fy2014_pupld14a ( CREATE TABLE pls_fy2018_libraries (
stabr varchar(2) NOT NULL, stabr text NOT NULL,
fscskey varchar(6) CONSTRAINT fscskey2014_key PRIMARY KEY, fscskey text CONSTRAINT fscskey_2018_pkey PRIMARY KEY,
libid varchar(20) NOT NULL, libid text NOT NULL,
libname varchar(100) NOT NULL, libname text NOT NULL,
obereg varchar(2) NOT NULL, address text NOT NULL,
rstatus integer NOT NULL, city text NOT NULL,
statstru varchar(2) NOT NULL, zip text NOT NULL,
statname varchar(2) NOT NULL, county text NOT NULL,
stataddr varchar(2) NOT NULL, phone text NOT NULL,
longitud numeric(10,7) NOT NULL, c_relatn text NOT NULL,
latitude numeric(10,7) NOT NULL, c_legbas text NOT NULL,
fipsst varchar(2) NOT NULL, c_admin text NOT NULL,
fipsco varchar(3) NOT NULL, c_fscs text NOT NULL,
address varchar(35) NOT NULL, geocode text NOT NULL,
city varchar(20) NOT NULL, lsabound text NOT NULL,
zip varchar(5) NOT NULL, startdate text NOT NULL,
zip4 varchar(4) NOT NULL, enddate text NOT NULL,
cnty varchar(20) NOT NULL,
phone varchar(10) NOT NULL,
c_relatn varchar(2) NOT NULL,
c_legbas varchar(2) NOT NULL,
c_admin varchar(2) NOT NULL,
geocode varchar(3) NOT NULL,
lsabound varchar(1) NOT NULL,
startdat varchar(10),
enddate varchar(10),
popu_lsa integer NOT NULL, popu_lsa integer NOT NULL,
popu_und integer NOT NULL,
centlib integer NOT NULL, centlib integer NOT NULL,
branlib integer NOT NULL, branlib integer NOT NULL,
bkmob integer NOT NULL, bkmob integer NOT NULL,
master numeric(8,2) NOT NULL,
libraria numeric(8,2) NOT NULL,
totstaff numeric(8,2) NOT NULL, totstaff numeric(8,2) NOT NULL,
locgvt integer NOT NULL,
stgvt integer NOT NULL,
fedgvt integer NOT NULL,
totincm integer NOT NULL,
salaries integer,
benefit integer,
staffexp integer,
prmatexp integer NOT NULL,
elmatexp integer NOT NULL,
totexpco integer NOT NULL,
totopexp integer NOT NULL,
lcap_rev integer NOT NULL,
scap_rev integer NOT NULL,
fcap_rev integer NOT NULL,
cap_rev integer NOT NULL,
capital integer NOT NULL,
bkvol integer NOT NULL, bkvol integer NOT NULL,
ebook integer NOT NULL, ebook integer NOT NULL,
audio_ph integer NOT NULL, audio_ph integer NOT NULL,
audio_dl float NOT NULL, audio_dl integer NOT NULL,
video_ph integer NOT NULL, video_ph integer NOT NULL,
video_dl float NOT NULL, video_dl integer NOT NULL,
databases integer NOT NULL, ec_lo_ot integer NOT NULL,
subscrip integer NOT NULL, subscrip integer NOT NULL,
hrs_open integer NOT NULL, hrs_open integer NOT NULL,
visits integer NOT NULL, visits integer NOT NULL,
referenc integer NOT NULL, reference integer NOT NULL,
regbor integer NOT NULL, regbor integer NOT NULL,
totcir integer NOT NULL, totcir integer NOT NULL,
kidcircl integer NOT NULL, kidcircl integer NOT NULL,
elmatcir integer NOT NULL,
loanto integer NOT NULL,
loanfm integer NOT NULL,
totpro integer NOT NULL, totpro integer NOT NULL,
totatten integer NOT NULL,
gpterms integer NOT NULL, gpterms integer NOT NULL,
pitusr integer NOT NULL, pitusr integer NOT NULL,
wifisess integer NOT NULL, wifisess integer NOT NULL,
yr_sub integer NOT NULL obereg text NOT NULL,
statstru text NOT NULL,
statname text NOT NULL,
stataddr text NOT NULL,
longitude numeric(10,7) NOT NULL,
latitude numeric(10,7) NOT NULL
); );
CREATE INDEX libname2014_idx ON pls_fy2014_pupld14a (libname); COPY pls_fy2018_libraries
CREATE INDEX stabr2014_idx ON pls_fy2014_pupld14a (stabr); -- FROM 'C:\YourDirectory\pls_fy2018_libraries.csv'
CREATE INDEX city2014_idx ON pls_fy2014_pupld14a (city); from '/Users/adebarros/Dropbox/DataMonky/Book-Writing/PracticalSQL_2e/Code-Repo/Chapter_09/pls_fy2018_libraries.csv'
CREATE INDEX visits2014_idx ON pls_fy2014_pupld14a (visits);
COPY pls_fy2014_pupld14a
FROM 'C:\YourDirectory\pls_fy2014_pupld14a.csv'
WITH (FORMAT CSV, HEADER); WITH (FORMAT CSV, HEADER);
-- Listing 9-2: Creating and filling the 2009 Public Libraries Survey table CREATE INDEX libname_2018_idx ON pls_fy2018_libraries (libname);
CREATE TABLE pls_fy2009_pupld09a ( -- Listing 9-2: Creating and filling the 2017 and 2016 Public Libraries Survey tables
stabr varchar(2) NOT NULL,
fscskey varchar(6) CONSTRAINT fscskey2009_key PRIMARY KEY, CREATE TABLE pls_fy2017_libraries (
libid varchar(20) NOT NULL, stabr text NOT NULL,
libname varchar(100) NOT NULL, fscskey text CONSTRAINT fscskey_17_pkey PRIMARY KEY,
address varchar(35) NOT NULL, libid text NOT NULL,
city varchar(20) NOT NULL, libname text NOT NULL,
zip varchar(5) NOT NULL, address text NOT NULL,
zip4 varchar(4) NOT NULL, city text NOT NULL,
cnty varchar(20) NOT NULL, zip text NOT NULL,
phone varchar(10) NOT NULL, county text NOT NULL,
c_relatn varchar(2) NOT NULL, phone text NOT NULL,
c_legbas varchar(2) NOT NULL, c_relatn text NOT NULL,
c_admin varchar(2) NOT NULL, c_legbas text NOT NULL,
geocode varchar(3) NOT NULL, c_admin text NOT NULL,
lsabound varchar(1) NOT NULL, c_fscs text NOT NULL,
startdat varchar(10), geocode text NOT NULL,
enddate varchar(10), lsabound text NOT NULL,
startdate text NOT NULL,
enddate text NOT NULL,
popu_lsa integer NOT NULL, popu_lsa integer NOT NULL,
popu_und integer NOT NULL,
centlib integer NOT NULL, centlib integer NOT NULL,
branlib integer NOT NULL, branlib integer NOT NULL,
bkmob integer NOT NULL, bkmob integer NOT NULL,
master numeric(8,2) NOT NULL,
libraria numeric(8,2) NOT NULL,
totstaff numeric(8,2) NOT NULL, totstaff numeric(8,2) NOT NULL,
locgvt integer NOT NULL,
stgvt integer NOT NULL,
fedgvt integer NOT NULL,
totincm integer NOT NULL,
salaries integer,
benefit integer,
staffexp integer,
prmatexp integer NOT NULL,
elmatexp integer NOT NULL,
totexpco integer NOT NULL,
totopexp integer NOT NULL,
lcap_rev integer NOT NULL,
scap_rev integer NOT NULL,
fcap_rev integer NOT NULL,
cap_rev integer NOT NULL,
capital integer NOT NULL,
bkvol integer NOT NULL, bkvol integer NOT NULL,
ebook integer NOT NULL, ebook integer NOT NULL,
audio integer NOT NULL, audio_ph integer NOT NULL,
video integer NOT NULL, audio_dl integer NOT NULL,
databases integer NOT NULL, video_ph integer NOT NULL,
video_dl integer NOT NULL,
ec_lo_ot integer NOT NULL,
subscrip integer NOT NULL, subscrip integer NOT NULL,
hrs_open integer NOT NULL, hrs_open integer NOT NULL,
visits integer NOT NULL, visits integer NOT NULL,
referenc integer NOT NULL, reference integer NOT NULL,
regbor integer NOT NULL, regbor integer NOT NULL,
totcir integer NOT NULL, totcir integer NOT NULL,
kidcircl integer NOT NULL, kidcircl integer NOT NULL,
loanto integer NOT NULL,
loanfm integer NOT NULL,
totpro integer NOT NULL, totpro integer NOT NULL,
totatten integer NOT NULL,
gpterms integer NOT NULL, gpterms integer NOT NULL,
pitusr integer NOT NULL, pitusr integer NOT NULL,
yr_sub integer NOT NULL, wifisess integer NOT NULL,
obereg varchar(2) NOT NULL, obereg text NOT NULL,
rstatus integer NOT NULL, statstru text NOT NULL,
statstru varchar(2) NOT NULL, statname text NOT NULL,
statname varchar(2) NOT NULL, stataddr text NOT NULL,
stataddr varchar(2) NOT NULL, longitude numeric(10,7) NOT NULL,
longitud numeric(10,7) NOT NULL, latitude numeric(10,7) NOT NULL
latitude numeric(10,7) NOT NULL,
fipsst varchar(2) NOT NULL,
fipsco varchar(3) NOT NULL
); );
CREATE INDEX libname2009_idx ON pls_fy2009_pupld09a (libname); CREATE TABLE pls_fy2016_libraries (
CREATE INDEX stabr2009_idx ON pls_fy2009_pupld09a (stabr); stabr text NOT NULL,
CREATE INDEX city2009_idx ON pls_fy2009_pupld09a (city); fscskey text CONSTRAINT fscskey_16_pkey PRIMARY KEY,
CREATE INDEX visits2009_idx ON pls_fy2009_pupld09a (visits); libid text NOT NULL,
libname text NOT NULL,
address text NOT NULL,
city text NOT NULL,
zip text NOT NULL,
county text NOT NULL,
phone text NOT NULL,
c_relatn text NOT NULL,
c_legbas text NOT NULL,
c_admin text NOT NULL,
c_fscs text NOT NULL,
geocode text NOT NULL,
lsabound text NOT NULL,
startdate text NOT NULL,
enddate text NOT NULL,
popu_lsa integer NOT NULL,
popu_und integer NOT NULL,
centlib integer NOT NULL,
branlib integer NOT NULL,
bkmob integer NOT NULL,
totstaff numeric(8,2) NOT NULL,
bkvol integer NOT NULL,
ebook integer NOT NULL,
audio_ph integer NOT NULL,
audio_dl integer NOT NULL,
video_ph integer NOT NULL,
video_dl integer NOT NULL,
ec_lo_ot integer NOT NULL,
subscrip integer NOT NULL,
hrs_open integer NOT NULL,
visits integer NOT NULL,
reference integer NOT NULL,
regbor integer NOT NULL,
totcir integer NOT NULL,
kidcircl integer NOT NULL,
totpro integer NOT NULL,
gpterms integer NOT NULL,
pitusr integer NOT NULL,
wifisess integer NOT NULL,
obereg text NOT NULL,
statstru text NOT NULL,
statname text NOT NULL,
stataddr text NOT NULL,
longitude numeric(10,7) NOT NULL,
latitude numeric(10,7) NOT NULL
);
COPY pls_fy2009_pupld09a COPY pls_fy2017_libraries
FROM 'C:\YourDirectory\pls_fy2009_pupld09a.csv' -- FROM 'C:\YourDirectory\pls_fy2017_libraries.csv'
from '/Users/adebarros/Dropbox/DataMonky/Book-Writing/PracticalSQL_2e/Code-Repo/Chapter_09/pls_fy2017_libraries.csv'
WITH (FORMAT CSV, HEADER); WITH (FORMAT CSV, HEADER);
COPY pls_fy2016_libraries
-- FROM 'C:\YourDirectory\pls_fy2016_libraries.csv'
from '/Users/adebarros/Dropbox/DataMonky/Book-Writing/PracticalSQL_2e/Code-Repo/Chapter_09/pls_fy2016_libraries.csv'
WITH (FORMAT CSV, HEADER);
CREATE INDEX libname_2017_idx ON pls_fy2017_libraries (libname);
CREATE INDEX libname_2016_idx ON pls_fy2016_libraries (libname);
-- Listing 9-3: Using count() for table row counts -- Listing 9-3: Using count() for table row counts
SELECT count(*) SELECT count(*)
FROM pls_fy2014_pupld14a; FROM pls_fy2018_libraries;
SELECT count(*) SELECT count(*)
FROM pls_fy2009_pupld09a; FROM pls_fy2017_libraries;
SELECT count(*)
FROM pls_fy2016_libraries;
-- Listing 9-4: Using count() for the number of values in a column -- Listing 9-4: Using count() for the number of values in a column
SELECT count(salaries) SELECT count(phone)
FROM pls_fy2014_pupld14a; FROM pls_fy2018_libraries;
-- Listing 9-5: Using count() for the number of distinct values in a column -- Listing 9-5: Using count() for the number of distinct values in a column
SELECT count(libname) SELECT count(libname)
FROM pls_fy2014_pupld14a; FROM pls_fy2018_libraries;
SELECT count(DISTINCT libname) SELECT count(DISTINCT libname)
FROM pls_fy2014_pupld14a; FROM pls_fy2018_libraries;
-- Bonus: find duplicate libnames -- Bonus: find duplicate libnames
SELECT libname, count(libname) SELECT libname, count(libname)
FROM pls_fy2014_pupld14a FROM pls_fy2018_libraries
GROUP BY libname GROUP BY libname
ORDER BY count(libname) DESC; ORDER BY count(libname) DESC;
-- Bonus: see location of every Oxford Public Library -- Bonus: see location of every Oxford Public Library
SELECT libname, city, stabr SELECT libname, city, stabr
FROM pls_fy2014_pupld14a FROM pls_fy2018_libraries
WHERE libname = 'OXFORD PUBLIC LIBRARY'; WHERE libname = 'OXFORD PUBLIC LIBRARY';
-- Listing 9-6: Finding the most and fewest visits using max() and min() -- Listing 9-6: Finding the most and fewest visits using max() and min()
SELECT max(visits), min(visits) SELECT max(visits), min(visits)
FROM pls_fy2014_pupld14a; FROM pls_fy2018_libraries;
-- Listing 9-7: Using GROUP BY on the stabr column -- Listing 9-7: Using GROUP BY on the stabr column
-- There are 56 in 2014. -- There are 55 in 2018.
SELECT stabr SELECT stabr
FROM pls_fy2014_pupld14a FROM pls_fy2018_libraries
GROUP BY stabr GROUP BY stabr
ORDER BY stabr; ORDER BY stabr;
-- Bonus: there are 55 in 2009. -- Bonus: there are 54 in 2017.
SELECT stabr SELECT stabr
FROM pls_fy2009_pupld09a FROM pls_fy2017_libraries
GROUP BY stabr GROUP BY stabr
ORDER BY stabr; ORDER BY stabr;
-- Listing 9-8: Using GROUP BY on the city and stabr columns -- Listing 9-8: Using GROUP BY on the city and stabr columns
SELECT city, stabr SELECT city, stabr
FROM pls_fy2014_pupld14a FROM pls_fy2018_libraries
GROUP BY city, stabr GROUP BY city, stabr
ORDER BY city, stabr; ORDER BY city, stabr;
-- Bonus: We can count some of the combos -- Bonus: We can count the combos
SELECT city, stabr, count(*) SELECT city, stabr, count(*)
FROM pls_fy2014_pupld14a FROM pls_fy2018_libraries
GROUP BY city, stabr GROUP BY city, stabr
ORDER BY count(*) DESC; ORDER BY count(*) DESC;
-- Listing 9-9: GROUP BY with count() on the stabr column -- Listing 9-9: Using GROUP BY with count() on the stabr column
SELECT stabr, count(*) SELECT stabr, count(*)
FROM pls_fy2014_pupld14a FROM pls_fy2018_libraries
GROUP BY stabr GROUP BY stabr
ORDER BY count(*) DESC; ORDER BY count(*) DESC;
-- Listing 9-10: GROUP BY with count() on the stabr and stataddr columns -- Listing 9-10: GROUP BY with count() on the stabr and stataddr columns
SELECT stabr, stataddr, count(*) SELECT stabr, stataddr, count(*)
FROM pls_fy2014_pupld14a FROM pls_fy2018_libraries
GROUP BY stabr, stataddr GROUP BY stabr, stataddr
ORDER BY stabr ASC, count(*) DESC; ORDER BY stabr, stataddr;
-- Listing 9-11: Using the sum() aggregate function to total visits to -- Listing 9-11: Using the sum() aggregate function to total visits to
-- libraries in 2014 and 2009 -- libraries in 2014 and 2009
-- 2014 -- 2018
SELECT sum(visits) AS visits_2014 SELECT sum(visits) AS visits_2018
FROM pls_fy2014_pupld14a FROM pls_fy2018_libraries
WHERE visits >= 0; WHERE visits >= 0;
-- 2009 -- 2017
SELECT sum(visits) AS visits_2009 SELECT sum(visits) AS visits_2017
FROM pls_fy2009_pupld09a FROM pls_fy2017_libraries
WHERE visits >= 0; WHERE visits >= 0;
-- Listing 9-12: Using sum() to total visits on joined 2014 and 2009 library tables -- 2016
SELECT sum(visits) AS visits_2016
FROM pls_fy2016_libraries
WHERE visits >= 0;
-- Listing 9-12: Using sum() to total visits on joined 2018, 2017, and 2016 tables
SELECT sum(pls18.visits) AS visits_2018,
sum(pls17.visits) AS visits_2017,
sum(pls16.visits) AS visits_2016
FROM pls_fy2018_libraries pls18
JOIN pls_fy2017_libraries pls17 ON pls18.fscskey = pls17.fscskey
JOIN pls_fy2016_libraries pls16 ON pls18.fscskey = pls16.fscskey
WHERE pls18.visits >= 0
AND pls17.visits >= 0
AND pls16.visits >= 0;
-- Bonus: summing wifi sessions
SELECT sum(pls18.wifisess) AS wifi_2018,
sum(pls17.wifisess) AS wifi_2017,
sum(pls16.wifisess) AS wifi_2016
FROM pls_fy2018_libraries pls18
JOIN pls_fy2017_libraries pls17 ON pls18.fscskey = pls17.fscskey
JOIN pls_fy2016_libraries pls16 ON pls18.fscskey = pls16.fscskey
WHERE pls18.wifisess >= 0
AND pls17.wifisess >= 0
AND pls16.wifisess >= 0;
SELECT sum(pls14.visits) AS visits_2014,
sum(pls09.visits) AS visits_2009
FROM pls_fy2014_pupld14a pls14 JOIN pls_fy2009_pupld09a pls09
ON pls14.fscskey = pls09.fscskey
WHERE pls14.visits >= 0 AND pls09.visits >= 0;
-- Listing 9-13: Using GROUP BY to track percent change in library visits by state -- Listing 9-13: Using GROUP BY to track percent change in library visits by state
SELECT pls14.stabr, SELECT pls18.stabr,
sum(pls14.visits) AS visits_2014, sum(pls18.visits) AS visits_2018,
sum(pls09.visits) AS visits_2009, sum(pls17.visits) AS visits_2017,
round( (CAST(sum(pls14.visits) AS decimal(10,1)) - sum(pls09.visits)) / sum(pls16.visits) AS visits_2016,
sum(pls09.visits) * 100, 2 ) AS pct_change round( (sum(pls18.visits::numeric) - sum(pls17.visits)) /
FROM pls_fy2014_pupld14a pls14 JOIN pls_fy2009_pupld09a pls09 sum(pls17.visits) * 100, 1 ) AS chg_2018_17,
ON pls14.fscskey = pls09.fscskey round( (sum(pls17.visits::numeric) - sum(pls16.visits)) /
WHERE pls14.visits >= 0 AND pls09.visits >= 0 sum(pls16.visits) * 100, 1 ) AS chg_2017_16
GROUP BY pls14.stabr FROM pls_fy2018_libraries pls18
ORDER BY pct_change DESC; JOIN pls_fy2017_libraries pls17 ON pls18.fscskey = pls17.fscskey
JOIN pls_fy2016_libraries pls16 ON pls18.fscskey = pls16.fscskey
WHERE pls18.visits >= 0
AND pls17.visits >= 0
AND pls16.visits >= 0
GROUP BY pls18.stabr
ORDER BY chg_2018_17 DESC;
-- Listing 9-14: Using HAVING to filter the results of an aggregate query -- Listing 9-14: Using HAVING to filter the results of an aggregate query
SELECT pls14.stabr, SELECT pls18.stabr,
sum(pls14.visits) AS visits_2014, sum(pls18.visits) AS visits_2018,
sum(pls09.visits) AS visits_2009, sum(pls17.visits) AS visits_2017,
round( (CAST(sum(pls14.visits) AS decimal(10,1)) - sum(pls09.visits)) / sum(pls16.visits) AS visits_2016,
sum(pls09.visits) * 100, 2 ) AS pct_change round( (sum(pls18.visits::numeric) - sum(pls17.visits)) /
FROM pls_fy2014_pupld14a pls14 JOIN pls_fy2009_pupld09a pls09 sum(pls17.visits) * 100, 1 ) AS chg_2018_17,
ON pls14.fscskey = pls09.fscskey round( (sum(pls17.visits::numeric) - sum(pls16.visits)) /
WHERE pls14.visits >= 0 AND pls09.visits >= 0 sum(pls16.visits) * 100, 1 ) AS chg_2017_16
GROUP BY pls14.stabr FROM pls_fy2018_libraries pls18
HAVING sum(pls14.visits) > 50000000 JOIN pls_fy2017_libraries pls17 ON pls18.fscskey = pls17.fscskey
ORDER BY pct_change DESC; JOIN pls_fy2016_libraries pls16 ON pls18.fscskey = pls16.fscskey
WHERE pls18.visits >= 0
AND pls17.visits >= 0
AND pls16.visits >= 0
GROUP BY pls18.stabr
HAVING sum(pls18.visits) > 50000000
ORDER BY chg_2018_17 DESC;

View File

@ -470,5 +470,140 @@ CREATE TABLE songs (
-- to perform many queries that include date ranges. -- to perform many queries that include date ranges.
----------------------------------------------------------------------------
-- Chapter 9: Extracting Information by Grouping and Summarizing
----------------------------------------------------------------------------
-- 1. We saw that library visits have declined recently in most places. But
-- what is the pattern in library employment? All three library survey tables
-- contain the column totstaff, which is the number of paid full-time equivalent
-- employees. Modify the code in Listings 9-13 and 9-14 to calculate the
-- percent change in the sum of the column over time, examining all states as
-- well as states with the most visitors. Watch out for negative values!
-- Answer (all states):
SELECT pls18.stabr,
sum(pls18.totstaff) AS totstaff_2018,
sum(pls17.totstaff) AS totstaff_2017,
sum(pls16.totstaff) AS totstaff_2016,
round( (sum(pls18.totstaff::numeric) - sum(pls17.totstaff)) /
sum(pls17.totstaff) * 100, 1 ) AS chg_2018_17,
round( (sum(pls17.totstaff::numeric) - sum(pls16.totstaff)) /
sum(pls16.hrs_open) * 100, 1 ) AS chg_2017_16
FROM pls_fy2018_libraries pls18
JOIN pls_fy2017_libraries pls17 ON pls18.fscskey = pls17.fscskey
JOIN pls_fy2016_libraries pls16 ON pls18.fscskey = pls16.fscskey
WHERE pls18.totstaff >= 0
AND pls17.totstaff >= 0
AND pls16.totstaff >= 0
GROUP BY pls18.stabr
ORDER BY chg_2018_17 DESC;
-- Answer (filtered with HAVING):
SELECT pls18.stabr,
sum(pls18.totstaff) AS totstaff_2018,
sum(pls17.totstaff) AS totstaff_2017,
sum(pls16.totstaff) AS totstaff_2016,
round( (sum(pls18.totstaff::numeric) - sum(pls17.totstaff)) /
sum(pls17.totstaff) * 100, 1 ) AS chg_2018_17,
round( (sum(pls17.totstaff::numeric) - sum(pls16.totstaff)) /
sum(pls16.hrs_open) * 100, 1 ) AS chg_2017_16
FROM pls_fy2018_libraries pls18
JOIN pls_fy2017_libraries pls17 ON pls18.fscskey = pls17.fscskey
JOIN pls_fy2016_libraries pls16 ON pls18.fscskey = pls16.fscskey
WHERE pls18.totstaff >= 0
AND pls17.totstaff >= 0
AND pls16.totstaff >= 0
GROUP BY pls18.stabr
HAVING sum(pls18.visits) > 50000000
ORDER BY chg_2018_17 DESC;
-- 2. The library survey tables contain a column called obereg, a two-digit
-- Bureau of Economic Analysis Code that classifies each library agency
-- according to a region of the United States, such as New England, Rocky
-- Mountains, and so on. Just as we calculated the percent change in visits
-- grouped by state, do the same to group percent changes in visits by U.S.
-- region using obereg. Consult the survey documentation to find the meaning
-- of each region code. For a bonus challenge, create a table with the obereg
-- code as the primary key and the region name as text, and join it to the
-- summary query to group by the region name rather than the code.
-- Answer:
-- a) sum() visits by region.
SELECT pls18.obereg,
sum(pls18.visits) AS visits_2018,
sum(pls17.visits) AS visits_2017,
sum(pls16.visits) AS visits_2016,
round( (sum(pls18.visits::numeric) - sum(pls17.visits)) /
sum(pls17.visits) * 100, 1 ) AS chg_2018_17,
round( (sum(pls17.visits::numeric) - sum(pls16.visits)) /
sum(pls16.visits) * 100, 1 ) AS chg_2017_16
FROM pls_fy2018_libraries pls18
JOIN pls_fy2017_libraries pls17 ON pls18.fscskey = pls17.fscskey
JOIN pls_fy2016_libraries pls16 ON pls18.fscskey = pls16.fscskey
WHERE pls18.visits >= 0
AND pls17.visits >= 0
AND pls16.visits >= 0
GROUP BY pls18.obereg
ORDER BY chg_2018_17 DESC;
-- b) Bonus: creating the regions lookup table and adding it to the query.
CREATE TABLE obereg_codes (
obereg text CONSTRAINT obereg_key PRIMARY KEY,
region text
);
INSERT INTO obereg_codes
VALUES ('01', 'New England (CT ME MA NH RI VT)'),
('02', 'Mid East (DE DC MD NJ NY PA)'),
('03', 'Great Lakes (IL IN MI OH WI)'),
('04', 'Plains (IA KS MN MO NE ND SD)'),
('05', 'Southeast (AL AR FL GA KY LA MS NC SC TN VA WV)'),
('06', 'Soutwest (AZ NM OK TX)'),
('07', 'Rocky Mountains (CO ID MT UT WY)'),
('08', 'Far West (AK CA HI NV OR WA)'),
('09', 'Outlying Areas (AS GU MP PR VI)');
-- sum() visits by region.
SELECT obereg_codes.region,
sum(pls18.visits) AS visits_2018,
sum(pls17.visits) AS visits_2017,
sum(pls16.visits) AS visits_2016,
round( (sum(pls18.visits::numeric) - sum(pls17.visits)) /
sum(pls17.visits) * 100, 1 ) AS chg_2018_17,
round( (sum(pls17.visits::numeric) - sum(pls16.visits)) /
sum(pls16.visits) * 100, 1 ) AS chg_2017_16
FROM pls_fy2018_libraries pls18
JOIN pls_fy2017_libraries pls17 ON pls18.fscskey = pls17.fscskey
JOIN pls_fy2016_libraries pls16 ON pls18.fscskey = pls16.fscskey
JOIN obereg_codes ON pls18.obereg = obereg_codes.obereg
WHERE pls18.visits >= 0
AND pls17.visits >= 0
AND pls16.visits >= 0
GROUP BY obereg_codes.region
ORDER BY chg_2018_17 DESC;
-- 3. Thinking back to the types of joins you learned in Chapter 7,
-- which join type will show you all the rows in all three tables,
-- including those without a match? Write such a query and add an
-- IS NULL filter in a WHERE clause to show agencies not included
-- in one or more of the tables.
-- Answer: a FULL OUTER JOIN will show all rows in both tables.
SELECT pls18.libname, pls18.city, pls18.stabr, pls18.statstru,
pls17.libname, pls17.city, pls17.stabr, pls17.statstru,
pls16.libname, pls16.city, pls16.stabr, pls16.statstru
FROM pls_fy2018_libraries pls18
FULL OUTER JOIN pls_fy2017_libraries pls17 ON pls18.fscskey = pls17.fscskey
FULL OUTER JOIN pls_fy2016_libraries pls16 ON pls18.fscskey = pls16.fscskey
WHERE pls16.fscskey IS NULL OR pls17.fscskey IS NULL;
-- Note: The IS NULL statements in the WHERE clause limit results to those
-- that do not appear in one or more tables.