diff --git a/Chapter_09/Chapter_09.sql b/Chapter_09/Chapter_09.sql index c4e61d3..06eea5d 100644 --- a/Chapter_09/Chapter_09.sql +++ b/Chapter_09/Chapter_09.sql @@ -1,305 +1,343 @@ --- FIRST EDITION FILE; IGNORE - - - - - --------------------------------------------------------------- --- Practical SQL: A Beginner's Guide to Storytelling with Data +--------------------------------------------------------------------------- +-- Practical SQL: A Beginner's Guide to Storytelling with Data, 2nd Edition -- by Anthony DeBarros -- Chapter 9 Code Examples --------------------------------------------------------------- +---------------------------------------------------------------------------- --- Listing 9-1: Creating and filling the 2014 Public Libraries Survey table +-- Listing 9-1: Creating and filling the 2018 Public Libraries Survey table -CREATE TABLE pls_fy2014_pupld14a ( - stabr varchar(2) NOT NULL, - fscskey varchar(6) CONSTRAINT fscskey2014_key PRIMARY KEY, - libid varchar(20) NOT NULL, - libname varchar(100) NOT NULL, - obereg varchar(2) NOT NULL, - rstatus integer NOT NULL, - statstru varchar(2) NOT NULL, - statname varchar(2) NOT NULL, - stataddr varchar(2) NOT NULL, - longitud numeric(10,7) NOT NULL, - latitude numeric(10,7) NOT NULL, - fipsst varchar(2) NOT NULL, - fipsco varchar(3) NOT NULL, - address varchar(35) NOT NULL, - city varchar(20) NOT NULL, - zip varchar(5) NOT NULL, - zip4 varchar(4) NOT NULL, - cnty varchar(20) NOT NULL, - phone varchar(10) NOT NULL, - c_relatn varchar(2) NOT NULL, - c_legbas varchar(2) NOT NULL, - c_admin varchar(2) NOT NULL, - geocode varchar(3) NOT NULL, - lsabound varchar(1) NOT NULL, - startdat varchar(10), - enddate varchar(10), +CREATE TABLE pls_fy2018_libraries ( + stabr text NOT NULL, + fscskey text CONSTRAINT fscskey_2018_pkey PRIMARY KEY, + libid text NOT NULL, + libname text NOT NULL, + address text NOT NULL, + city text NOT NULL, + zip text NOT NULL, + county text NOT NULL, + phone text NOT NULL, + c_relatn text NOT NULL, + c_legbas text NOT NULL, + c_admin text NOT NULL, + c_fscs text NOT NULL, + geocode text NOT NULL, + lsabound text NOT NULL, + startdate text NOT NULL, + enddate text NOT NULL, popu_lsa integer NOT NULL, + popu_und integer NOT NULL, centlib integer NOT NULL, branlib integer NOT NULL, bkmob integer NOT NULL, - master numeric(8,2) NOT NULL, - libraria numeric(8,2) NOT NULL, totstaff numeric(8,2) NOT NULL, - locgvt integer NOT NULL, - stgvt integer NOT NULL, - fedgvt integer NOT NULL, - totincm integer NOT NULL, - salaries integer, - benefit integer, - staffexp integer, - prmatexp integer NOT NULL, - elmatexp integer NOT NULL, - totexpco integer NOT NULL, - totopexp integer NOT NULL, - lcap_rev integer NOT NULL, - scap_rev integer NOT NULL, - fcap_rev integer NOT NULL, - cap_rev integer NOT NULL, - capital integer NOT NULL, bkvol integer NOT NULL, ebook integer NOT NULL, audio_ph integer NOT NULL, - audio_dl float NOT NULL, + audio_dl integer NOT NULL, video_ph integer NOT NULL, - video_dl float NOT NULL, - databases integer NOT NULL, + video_dl integer NOT NULL, + ec_lo_ot integer NOT NULL, subscrip integer NOT NULL, hrs_open integer NOT NULL, visits integer NOT NULL, - referenc integer NOT NULL, + reference integer NOT NULL, regbor integer NOT NULL, totcir integer NOT NULL, kidcircl integer NOT NULL, - elmatcir integer NOT NULL, - loanto integer NOT NULL, - loanfm integer NOT NULL, totpro integer NOT NULL, - totatten integer NOT NULL, gpterms integer NOT NULL, pitusr integer NOT NULL, wifisess integer NOT NULL, - yr_sub integer NOT NULL + obereg text NOT NULL, + statstru text NOT NULL, + statname text NOT NULL, + stataddr text NOT NULL, + longitude numeric(10,7) NOT NULL, + latitude numeric(10,7) NOT NULL ); -CREATE INDEX libname2014_idx ON pls_fy2014_pupld14a (libname); -CREATE INDEX stabr2014_idx ON pls_fy2014_pupld14a (stabr); -CREATE INDEX city2014_idx ON pls_fy2014_pupld14a (city); -CREATE INDEX visits2014_idx ON pls_fy2014_pupld14a (visits); - -COPY pls_fy2014_pupld14a -FROM 'C:\YourDirectory\pls_fy2014_pupld14a.csv' +COPY pls_fy2018_libraries +-- FROM 'C:\YourDirectory\pls_fy2018_libraries.csv' +from '/Users/adebarros/Dropbox/DataMonky/Book-Writing/PracticalSQL_2e/Code-Repo/Chapter_09/pls_fy2018_libraries.csv' WITH (FORMAT CSV, HEADER); --- Listing 9-2: Creating and filling the 2009 Public Libraries Survey table +CREATE INDEX libname_2018_idx ON pls_fy2018_libraries (libname); -CREATE TABLE pls_fy2009_pupld09a ( - stabr varchar(2) NOT NULL, - fscskey varchar(6) CONSTRAINT fscskey2009_key PRIMARY KEY, - libid varchar(20) NOT NULL, - libname varchar(100) NOT NULL, - address varchar(35) NOT NULL, - city varchar(20) NOT NULL, - zip varchar(5) NOT NULL, - zip4 varchar(4) NOT NULL, - cnty varchar(20) NOT NULL, - phone varchar(10) NOT NULL, - c_relatn varchar(2) NOT NULL, - c_legbas varchar(2) NOT NULL, - c_admin varchar(2) NOT NULL, - geocode varchar(3) NOT NULL, - lsabound varchar(1) NOT NULL, - startdat varchar(10), - enddate varchar(10), +-- Listing 9-2: Creating and filling the 2017 and 2016 Public Libraries Survey tables + +CREATE TABLE pls_fy2017_libraries ( + stabr text NOT NULL, + fscskey text CONSTRAINT fscskey_17_pkey PRIMARY KEY, + libid text NOT NULL, + libname text NOT NULL, + address text NOT NULL, + city text NOT NULL, + zip text NOT NULL, + county text NOT NULL, + phone text NOT NULL, + c_relatn text NOT NULL, + c_legbas text NOT NULL, + c_admin text NOT NULL, + c_fscs text NOT NULL, + geocode text NOT NULL, + lsabound text NOT NULL, + startdate text NOT NULL, + enddate text NOT NULL, popu_lsa integer NOT NULL, + popu_und integer NOT NULL, centlib integer NOT NULL, branlib integer NOT NULL, bkmob integer NOT NULL, - master numeric(8,2) NOT NULL, - libraria numeric(8,2) NOT NULL, totstaff numeric(8,2) NOT NULL, - locgvt integer NOT NULL, - stgvt integer NOT NULL, - fedgvt integer NOT NULL, - totincm integer NOT NULL, - salaries integer, - benefit integer, - staffexp integer, - prmatexp integer NOT NULL, - elmatexp integer NOT NULL, - totexpco integer NOT NULL, - totopexp integer NOT NULL, - lcap_rev integer NOT NULL, - scap_rev integer NOT NULL, - fcap_rev integer NOT NULL, - cap_rev integer NOT NULL, - capital integer NOT NULL, bkvol integer NOT NULL, ebook integer NOT NULL, - audio integer NOT NULL, - video integer NOT NULL, - databases integer NOT NULL, + audio_ph integer NOT NULL, + audio_dl integer NOT NULL, + video_ph integer NOT NULL, + video_dl integer NOT NULL, + ec_lo_ot integer NOT NULL, subscrip integer NOT NULL, hrs_open integer NOT NULL, visits integer NOT NULL, - referenc integer NOT NULL, + reference integer NOT NULL, regbor integer NOT NULL, totcir integer NOT NULL, kidcircl integer NOT NULL, - loanto integer NOT NULL, - loanfm integer NOT NULL, totpro integer NOT NULL, - totatten integer NOT NULL, gpterms integer NOT NULL, pitusr integer NOT NULL, - yr_sub integer NOT NULL, - obereg varchar(2) NOT NULL, - rstatus integer NOT NULL, - statstru varchar(2) NOT NULL, - statname varchar(2) NOT NULL, - stataddr varchar(2) NOT NULL, - longitud numeric(10,7) NOT NULL, - latitude numeric(10,7) NOT NULL, - fipsst varchar(2) NOT NULL, - fipsco varchar(3) NOT NULL + wifisess integer NOT NULL, + obereg text NOT NULL, + statstru text NOT NULL, + statname text NOT NULL, + stataddr text NOT NULL, + longitude numeric(10,7) NOT NULL, + latitude numeric(10,7) NOT NULL ); -CREATE INDEX libname2009_idx ON pls_fy2009_pupld09a (libname); -CREATE INDEX stabr2009_idx ON pls_fy2009_pupld09a (stabr); -CREATE INDEX city2009_idx ON pls_fy2009_pupld09a (city); -CREATE INDEX visits2009_idx ON pls_fy2009_pupld09a (visits); +CREATE TABLE pls_fy2016_libraries ( + stabr text NOT NULL, + fscskey text CONSTRAINT fscskey_16_pkey PRIMARY KEY, + libid text NOT NULL, + libname text NOT NULL, + address text NOT NULL, + city text NOT NULL, + zip text NOT NULL, + county text NOT NULL, + phone text NOT NULL, + c_relatn text NOT NULL, + c_legbas text NOT NULL, + c_admin text NOT NULL, + c_fscs text NOT NULL, + geocode text NOT NULL, + lsabound text NOT NULL, + startdate text NOT NULL, + enddate text NOT NULL, + popu_lsa integer NOT NULL, + popu_und integer NOT NULL, + centlib integer NOT NULL, + branlib integer NOT NULL, + bkmob integer NOT NULL, + totstaff numeric(8,2) NOT NULL, + bkvol integer NOT NULL, + ebook integer NOT NULL, + audio_ph integer NOT NULL, + audio_dl integer NOT NULL, + video_ph integer NOT NULL, + video_dl integer NOT NULL, + ec_lo_ot integer NOT NULL, + subscrip integer NOT NULL, + hrs_open integer NOT NULL, + visits integer NOT NULL, + reference integer NOT NULL, + regbor integer NOT NULL, + totcir integer NOT NULL, + kidcircl integer NOT NULL, + totpro integer NOT NULL, + gpterms integer NOT NULL, + pitusr integer NOT NULL, + wifisess integer NOT NULL, + obereg text NOT NULL, + statstru text NOT NULL, + statname text NOT NULL, + stataddr text NOT NULL, + longitude numeric(10,7) NOT NULL, + latitude numeric(10,7) NOT NULL +); -COPY pls_fy2009_pupld09a -FROM 'C:\YourDirectory\pls_fy2009_pupld09a.csv' +COPY pls_fy2017_libraries +-- FROM 'C:\YourDirectory\pls_fy2017_libraries.csv' +from '/Users/adebarros/Dropbox/DataMonky/Book-Writing/PracticalSQL_2e/Code-Repo/Chapter_09/pls_fy2017_libraries.csv' WITH (FORMAT CSV, HEADER); +COPY pls_fy2016_libraries +-- FROM 'C:\YourDirectory\pls_fy2016_libraries.csv' +from '/Users/adebarros/Dropbox/DataMonky/Book-Writing/PracticalSQL_2e/Code-Repo/Chapter_09/pls_fy2016_libraries.csv' +WITH (FORMAT CSV, HEADER); + +CREATE INDEX libname_2017_idx ON pls_fy2017_libraries (libname); +CREATE INDEX libname_2016_idx ON pls_fy2016_libraries (libname); + + -- Listing 9-3: Using count() for table row counts SELECT count(*) -FROM pls_fy2014_pupld14a; +FROM pls_fy2018_libraries; SELECT count(*) -FROM pls_fy2009_pupld09a; +FROM pls_fy2017_libraries; + +SELECT count(*) +FROM pls_fy2016_libraries; -- Listing 9-4: Using count() for the number of values in a column -SELECT count(salaries) -FROM pls_fy2014_pupld14a; +SELECT count(phone) +FROM pls_fy2018_libraries; -- Listing 9-5: Using count() for the number of distinct values in a column SELECT count(libname) -FROM pls_fy2014_pupld14a; +FROM pls_fy2018_libraries; SELECT count(DISTINCT libname) -FROM pls_fy2014_pupld14a; +FROM pls_fy2018_libraries; -- Bonus: find duplicate libnames SELECT libname, count(libname) -FROM pls_fy2014_pupld14a +FROM pls_fy2018_libraries GROUP BY libname ORDER BY count(libname) DESC; -- Bonus: see location of every Oxford Public Library SELECT libname, city, stabr -FROM pls_fy2014_pupld14a +FROM pls_fy2018_libraries WHERE libname = 'OXFORD PUBLIC LIBRARY'; -- Listing 9-6: Finding the most and fewest visits using max() and min() SELECT max(visits), min(visits) -FROM pls_fy2014_pupld14a; +FROM pls_fy2018_libraries; -- Listing 9-7: Using GROUP BY on the stabr column --- There are 56 in 2014. +-- There are 55 in 2018. SELECT stabr -FROM pls_fy2014_pupld14a +FROM pls_fy2018_libraries GROUP BY stabr ORDER BY stabr; --- Bonus: there are 55 in 2009. +-- Bonus: there are 54 in 2017. SELECT stabr -FROM pls_fy2009_pupld09a +FROM pls_fy2017_libraries GROUP BY stabr ORDER BY stabr; -- Listing 9-8: Using GROUP BY on the city and stabr columns SELECT city, stabr -FROM pls_fy2014_pupld14a +FROM pls_fy2018_libraries GROUP BY city, stabr ORDER BY city, stabr; --- Bonus: We can count some of the combos +-- Bonus: We can count the combos SELECT city, stabr, count(*) -FROM pls_fy2014_pupld14a +FROM pls_fy2018_libraries GROUP BY city, stabr ORDER BY count(*) DESC; --- Listing 9-9: GROUP BY with count() on the stabr column +-- Listing 9-9: Using GROUP BY with count() on the stabr column SELECT stabr, count(*) -FROM pls_fy2014_pupld14a +FROM pls_fy2018_libraries GROUP BY stabr ORDER BY count(*) DESC; -- Listing 9-10: GROUP BY with count() on the stabr and stataddr columns SELECT stabr, stataddr, count(*) -FROM pls_fy2014_pupld14a +FROM pls_fy2018_libraries GROUP BY stabr, stataddr -ORDER BY stabr ASC, count(*) DESC; +ORDER BY stabr, stataddr; -- Listing 9-11: Using the sum() aggregate function to total visits to -- libraries in 2014 and 2009 --- 2014 -SELECT sum(visits) AS visits_2014 -FROM pls_fy2014_pupld14a +-- 2018 +SELECT sum(visits) AS visits_2018 +FROM pls_fy2018_libraries WHERE visits >= 0; --- 2009 -SELECT sum(visits) AS visits_2009 -FROM pls_fy2009_pupld09a +-- 2017 +SELECT sum(visits) AS visits_2017 +FROM pls_fy2017_libraries WHERE visits >= 0; --- Listing 9-12: Using sum() to total visits on joined 2014 and 2009 library tables +-- 2016 +SELECT sum(visits) AS visits_2016 +FROM pls_fy2016_libraries +WHERE visits >= 0; + +-- Listing 9-12: Using sum() to total visits on joined 2018, 2017, and 2016 tables + +SELECT sum(pls18.visits) AS visits_2018, + sum(pls17.visits) AS visits_2017, + sum(pls16.visits) AS visits_2016 +FROM pls_fy2018_libraries pls18 + JOIN pls_fy2017_libraries pls17 ON pls18.fscskey = pls17.fscskey + JOIN pls_fy2016_libraries pls16 ON pls18.fscskey = pls16.fscskey +WHERE pls18.visits >= 0 + AND pls17.visits >= 0 + AND pls16.visits >= 0; + +-- Bonus: summing wifi sessions +SELECT sum(pls18.wifisess) AS wifi_2018, + sum(pls17.wifisess) AS wifi_2017, + sum(pls16.wifisess) AS wifi_2016 +FROM pls_fy2018_libraries pls18 + JOIN pls_fy2017_libraries pls17 ON pls18.fscskey = pls17.fscskey + JOIN pls_fy2016_libraries pls16 ON pls18.fscskey = pls16.fscskey +WHERE pls18.wifisess >= 0 + AND pls17.wifisess >= 0 + AND pls16.wifisess >= 0; -SELECT sum(pls14.visits) AS visits_2014, - sum(pls09.visits) AS visits_2009 -FROM pls_fy2014_pupld14a pls14 JOIN pls_fy2009_pupld09a pls09 -ON pls14.fscskey = pls09.fscskey -WHERE pls14.visits >= 0 AND pls09.visits >= 0; -- Listing 9-13: Using GROUP BY to track percent change in library visits by state -SELECT pls14.stabr, - sum(pls14.visits) AS visits_2014, - sum(pls09.visits) AS visits_2009, - round( (CAST(sum(pls14.visits) AS decimal(10,1)) - sum(pls09.visits)) / - sum(pls09.visits) * 100, 2 ) AS pct_change -FROM pls_fy2014_pupld14a pls14 JOIN pls_fy2009_pupld09a pls09 -ON pls14.fscskey = pls09.fscskey -WHERE pls14.visits >= 0 AND pls09.visits >= 0 -GROUP BY pls14.stabr -ORDER BY pct_change DESC; +SELECT pls18.stabr, + sum(pls18.visits) AS visits_2018, + sum(pls17.visits) AS visits_2017, + sum(pls16.visits) AS visits_2016, + round( (sum(pls18.visits::numeric) - sum(pls17.visits)) / + sum(pls17.visits) * 100, 1 ) AS chg_2018_17, + round( (sum(pls17.visits::numeric) - sum(pls16.visits)) / + sum(pls16.visits) * 100, 1 ) AS chg_2017_16 +FROM pls_fy2018_libraries pls18 + JOIN pls_fy2017_libraries pls17 ON pls18.fscskey = pls17.fscskey + JOIN pls_fy2016_libraries pls16 ON pls18.fscskey = pls16.fscskey +WHERE pls18.visits >= 0 + AND pls17.visits >= 0 + AND pls16.visits >= 0 +GROUP BY pls18.stabr +ORDER BY chg_2018_17 DESC; -- Listing 9-14: Using HAVING to filter the results of an aggregate query -SELECT pls14.stabr, - sum(pls14.visits) AS visits_2014, - sum(pls09.visits) AS visits_2009, - round( (CAST(sum(pls14.visits) AS decimal(10,1)) - sum(pls09.visits)) / - sum(pls09.visits) * 100, 2 ) AS pct_change -FROM pls_fy2014_pupld14a pls14 JOIN pls_fy2009_pupld09a pls09 -ON pls14.fscskey = pls09.fscskey -WHERE pls14.visits >= 0 AND pls09.visits >= 0 -GROUP BY pls14.stabr -HAVING sum(pls14.visits) > 50000000 -ORDER BY pct_change DESC; +SELECT pls18.stabr, + sum(pls18.visits) AS visits_2018, + sum(pls17.visits) AS visits_2017, + sum(pls16.visits) AS visits_2016, + round( (sum(pls18.visits::numeric) - sum(pls17.visits)) / + sum(pls17.visits) * 100, 1 ) AS chg_2018_17, + round( (sum(pls17.visits::numeric) - sum(pls16.visits)) / + sum(pls16.visits) * 100, 1 ) AS chg_2017_16 +FROM pls_fy2018_libraries pls18 + JOIN pls_fy2017_libraries pls17 ON pls18.fscskey = pls17.fscskey + JOIN pls_fy2016_libraries pls16 ON pls18.fscskey = pls16.fscskey +WHERE pls18.visits >= 0 + AND pls17.visits >= 0 + AND pls16.visits >= 0 +GROUP BY pls18.stabr +HAVING sum(pls18.visits) > 50000000 +ORDER BY chg_2018_17 DESC; + diff --git a/Try_It_Yourself/Try_It_Yourself.sql b/Try_It_Yourself/Try_It_Yourself.sql index 1eb1350..e33e1f6 100644 --- a/Try_It_Yourself/Try_It_Yourself.sql +++ b/Try_It_Yourself/Try_It_Yourself.sql @@ -470,5 +470,140 @@ CREATE TABLE songs ( -- to perform many queries that include date ranges. +---------------------------------------------------------------------------- +-- Chapter 9: Extracting Information by Grouping and Summarizing +---------------------------------------------------------------------------- +-- 1. We saw that library visits have declined recently in most places. But +-- what is the pattern in library employment? All three library survey tables +-- contain the column totstaff, which is the number of paid full-time equivalent +-- employees. Modify the code in Listings 9-13 and 9-14 to calculate the +-- percent change in the sum of the column over time, examining all states as +-- well as states with the most visitors. Watch out for negative values! +-- Answer (all states): + +SELECT pls18.stabr, + sum(pls18.totstaff) AS totstaff_2018, + sum(pls17.totstaff) AS totstaff_2017, + sum(pls16.totstaff) AS totstaff_2016, + round( (sum(pls18.totstaff::numeric) - sum(pls17.totstaff)) / + sum(pls17.totstaff) * 100, 1 ) AS chg_2018_17, + round( (sum(pls17.totstaff::numeric) - sum(pls16.totstaff)) / + sum(pls16.hrs_open) * 100, 1 ) AS chg_2017_16 +FROM pls_fy2018_libraries pls18 + JOIN pls_fy2017_libraries pls17 ON pls18.fscskey = pls17.fscskey + JOIN pls_fy2016_libraries pls16 ON pls18.fscskey = pls16.fscskey +WHERE pls18.totstaff >= 0 + AND pls17.totstaff >= 0 + AND pls16.totstaff >= 0 +GROUP BY pls18.stabr +ORDER BY chg_2018_17 DESC; + +-- Answer (filtered with HAVING): + +SELECT pls18.stabr, + sum(pls18.totstaff) AS totstaff_2018, + sum(pls17.totstaff) AS totstaff_2017, + sum(pls16.totstaff) AS totstaff_2016, + round( (sum(pls18.totstaff::numeric) - sum(pls17.totstaff)) / + sum(pls17.totstaff) * 100, 1 ) AS chg_2018_17, + round( (sum(pls17.totstaff::numeric) - sum(pls16.totstaff)) / + sum(pls16.hrs_open) * 100, 1 ) AS chg_2017_16 +FROM pls_fy2018_libraries pls18 + JOIN pls_fy2017_libraries pls17 ON pls18.fscskey = pls17.fscskey + JOIN pls_fy2016_libraries pls16 ON pls18.fscskey = pls16.fscskey +WHERE pls18.totstaff >= 0 + AND pls17.totstaff >= 0 + AND pls16.totstaff >= 0 +GROUP BY pls18.stabr +HAVING sum(pls18.visits) > 50000000 +ORDER BY chg_2018_17 DESC; + +-- 2. The library survey tables contain a column called obereg, a two-digit +-- Bureau of Economic Analysis Code that classifies each library agency +-- according to a region of the United States, such as New England, Rocky +-- Mountains, and so on. Just as we calculated the percent change in visits +-- grouped by state, do the same to group percent changes in visits by U.S. +-- region using obereg. Consult the survey documentation to find the meaning +-- of each region code. For a bonus challenge, create a table with the obereg +-- code as the primary key and the region name as text, and join it to the +-- summary query to group by the region name rather than the code. + +-- Answer: + +-- a) sum() visits by region. + +SELECT pls18.obereg, + sum(pls18.visits) AS visits_2018, + sum(pls17.visits) AS visits_2017, + sum(pls16.visits) AS visits_2016, + round( (sum(pls18.visits::numeric) - sum(pls17.visits)) / + sum(pls17.visits) * 100, 1 ) AS chg_2018_17, + round( (sum(pls17.visits::numeric) - sum(pls16.visits)) / + sum(pls16.visits) * 100, 1 ) AS chg_2017_16 +FROM pls_fy2018_libraries pls18 + JOIN pls_fy2017_libraries pls17 ON pls18.fscskey = pls17.fscskey + JOIN pls_fy2016_libraries pls16 ON pls18.fscskey = pls16.fscskey +WHERE pls18.visits >= 0 + AND pls17.visits >= 0 + AND pls16.visits >= 0 +GROUP BY pls18.obereg +ORDER BY chg_2018_17 DESC; + +-- b) Bonus: creating the regions lookup table and adding it to the query. + +CREATE TABLE obereg_codes ( + obereg text CONSTRAINT obereg_key PRIMARY KEY, + region text +); + +INSERT INTO obereg_codes +VALUES ('01', 'New England (CT ME MA NH RI VT)'), + ('02', 'Mid East (DE DC MD NJ NY PA)'), + ('03', 'Great Lakes (IL IN MI OH WI)'), + ('04', 'Plains (IA KS MN MO NE ND SD)'), + ('05', 'Southeast (AL AR FL GA KY LA MS NC SC TN VA WV)'), + ('06', 'Soutwest (AZ NM OK TX)'), + ('07', 'Rocky Mountains (CO ID MT UT WY)'), + ('08', 'Far West (AK CA HI NV OR WA)'), + ('09', 'Outlying Areas (AS GU MP PR VI)'); + +-- sum() visits by region. + +SELECT obereg_codes.region, + sum(pls18.visits) AS visits_2018, + sum(pls17.visits) AS visits_2017, + sum(pls16.visits) AS visits_2016, + round( (sum(pls18.visits::numeric) - sum(pls17.visits)) / + sum(pls17.visits) * 100, 1 ) AS chg_2018_17, + round( (sum(pls17.visits::numeric) - sum(pls16.visits)) / + sum(pls16.visits) * 100, 1 ) AS chg_2017_16 +FROM pls_fy2018_libraries pls18 + JOIN pls_fy2017_libraries pls17 ON pls18.fscskey = pls17.fscskey + JOIN pls_fy2016_libraries pls16 ON pls18.fscskey = pls16.fscskey + JOIN obereg_codes ON pls18.obereg = obereg_codes.obereg +WHERE pls18.visits >= 0 + AND pls17.visits >= 0 + AND pls16.visits >= 0 +GROUP BY obereg_codes.region +ORDER BY chg_2018_17 DESC; + +-- 3. Thinking back to the types of joins you learned in Chapter 7, +-- which join type will show you all the rows in all three tables, +-- including those without a match? Write such a query and add an +-- IS NULL filter in a WHERE clause to show agencies not included +-- in one or more of the tables. + +-- Answer: a FULL OUTER JOIN will show all rows in both tables. + +SELECT pls18.libname, pls18.city, pls18.stabr, pls18.statstru, + pls17.libname, pls17.city, pls17.stabr, pls17.statstru, + pls16.libname, pls16.city, pls16.stabr, pls16.statstru +FROM pls_fy2018_libraries pls18 + FULL OUTER JOIN pls_fy2017_libraries pls17 ON pls18.fscskey = pls17.fscskey + FULL OUTER JOIN pls_fy2016_libraries pls16 ON pls18.fscskey = pls16.fscskey +WHERE pls16.fscskey IS NULL OR pls17.fscskey IS NULL; + +-- Note: The IS NULL statements in the WHERE clause limit results to those +-- that do not appear in one or more tables.