Chapter 9 code and data
This commit is contained in:
parent
498950fa62
commit
a29acc084d
@ -1,305 +1,343 @@
|
||||
-- FIRST EDITION FILE; IGNORE
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
--------------------------------------------------------------
|
||||
-- Practical SQL: A Beginner's Guide to Storytelling with Data
|
||||
---------------------------------------------------------------------------
|
||||
-- Practical SQL: A Beginner's Guide to Storytelling with Data, 2nd Edition
|
||||
-- by Anthony DeBarros
|
||||
|
||||
-- Chapter 9 Code Examples
|
||||
--------------------------------------------------------------
|
||||
----------------------------------------------------------------------------
|
||||
|
||||
-- Listing 9-1: Creating and filling the 2014 Public Libraries Survey table
|
||||
-- Listing 9-1: Creating and filling the 2018 Public Libraries Survey table
|
||||
|
||||
CREATE TABLE pls_fy2014_pupld14a (
|
||||
stabr varchar(2) NOT NULL,
|
||||
fscskey varchar(6) CONSTRAINT fscskey2014_key PRIMARY KEY,
|
||||
libid varchar(20) NOT NULL,
|
||||
libname varchar(100) NOT NULL,
|
||||
obereg varchar(2) NOT NULL,
|
||||
rstatus integer NOT NULL,
|
||||
statstru varchar(2) NOT NULL,
|
||||
statname varchar(2) NOT NULL,
|
||||
stataddr varchar(2) NOT NULL,
|
||||
longitud numeric(10,7) NOT NULL,
|
||||
latitude numeric(10,7) NOT NULL,
|
||||
fipsst varchar(2) NOT NULL,
|
||||
fipsco varchar(3) NOT NULL,
|
||||
address varchar(35) NOT NULL,
|
||||
city varchar(20) NOT NULL,
|
||||
zip varchar(5) NOT NULL,
|
||||
zip4 varchar(4) NOT NULL,
|
||||
cnty varchar(20) NOT NULL,
|
||||
phone varchar(10) NOT NULL,
|
||||
c_relatn varchar(2) NOT NULL,
|
||||
c_legbas varchar(2) NOT NULL,
|
||||
c_admin varchar(2) NOT NULL,
|
||||
geocode varchar(3) NOT NULL,
|
||||
lsabound varchar(1) NOT NULL,
|
||||
startdat varchar(10),
|
||||
enddate varchar(10),
|
||||
CREATE TABLE pls_fy2018_libraries (
|
||||
stabr text NOT NULL,
|
||||
fscskey text CONSTRAINT fscskey_2018_pkey PRIMARY KEY,
|
||||
libid text NOT NULL,
|
||||
libname text NOT NULL,
|
||||
address text NOT NULL,
|
||||
city text NOT NULL,
|
||||
zip text NOT NULL,
|
||||
county text NOT NULL,
|
||||
phone text NOT NULL,
|
||||
c_relatn text NOT NULL,
|
||||
c_legbas text NOT NULL,
|
||||
c_admin text NOT NULL,
|
||||
c_fscs text NOT NULL,
|
||||
geocode text NOT NULL,
|
||||
lsabound text NOT NULL,
|
||||
startdate text NOT NULL,
|
||||
enddate text NOT NULL,
|
||||
popu_lsa integer NOT NULL,
|
||||
popu_und integer NOT NULL,
|
||||
centlib integer NOT NULL,
|
||||
branlib integer NOT NULL,
|
||||
bkmob integer NOT NULL,
|
||||
master numeric(8,2) NOT NULL,
|
||||
libraria numeric(8,2) NOT NULL,
|
||||
totstaff numeric(8,2) NOT NULL,
|
||||
locgvt integer NOT NULL,
|
||||
stgvt integer NOT NULL,
|
||||
fedgvt integer NOT NULL,
|
||||
totincm integer NOT NULL,
|
||||
salaries integer,
|
||||
benefit integer,
|
||||
staffexp integer,
|
||||
prmatexp integer NOT NULL,
|
||||
elmatexp integer NOT NULL,
|
||||
totexpco integer NOT NULL,
|
||||
totopexp integer NOT NULL,
|
||||
lcap_rev integer NOT NULL,
|
||||
scap_rev integer NOT NULL,
|
||||
fcap_rev integer NOT NULL,
|
||||
cap_rev integer NOT NULL,
|
||||
capital integer NOT NULL,
|
||||
bkvol integer NOT NULL,
|
||||
ebook integer NOT NULL,
|
||||
audio_ph integer NOT NULL,
|
||||
audio_dl float NOT NULL,
|
||||
audio_dl integer NOT NULL,
|
||||
video_ph integer NOT NULL,
|
||||
video_dl float NOT NULL,
|
||||
databases integer NOT NULL,
|
||||
video_dl integer NOT NULL,
|
||||
ec_lo_ot integer NOT NULL,
|
||||
subscrip integer NOT NULL,
|
||||
hrs_open integer NOT NULL,
|
||||
visits integer NOT NULL,
|
||||
referenc integer NOT NULL,
|
||||
reference integer NOT NULL,
|
||||
regbor integer NOT NULL,
|
||||
totcir integer NOT NULL,
|
||||
kidcircl integer NOT NULL,
|
||||
elmatcir integer NOT NULL,
|
||||
loanto integer NOT NULL,
|
||||
loanfm integer NOT NULL,
|
||||
totpro integer NOT NULL,
|
||||
totatten integer NOT NULL,
|
||||
gpterms integer NOT NULL,
|
||||
pitusr integer NOT NULL,
|
||||
wifisess integer NOT NULL,
|
||||
yr_sub integer NOT NULL
|
||||
obereg text NOT NULL,
|
||||
statstru text NOT NULL,
|
||||
statname text NOT NULL,
|
||||
stataddr text NOT NULL,
|
||||
longitude numeric(10,7) NOT NULL,
|
||||
latitude numeric(10,7) NOT NULL
|
||||
);
|
||||
|
||||
CREATE INDEX libname2014_idx ON pls_fy2014_pupld14a (libname);
|
||||
CREATE INDEX stabr2014_idx ON pls_fy2014_pupld14a (stabr);
|
||||
CREATE INDEX city2014_idx ON pls_fy2014_pupld14a (city);
|
||||
CREATE INDEX visits2014_idx ON pls_fy2014_pupld14a (visits);
|
||||
|
||||
COPY pls_fy2014_pupld14a
|
||||
FROM 'C:\YourDirectory\pls_fy2014_pupld14a.csv'
|
||||
COPY pls_fy2018_libraries
|
||||
-- FROM 'C:\YourDirectory\pls_fy2018_libraries.csv'
|
||||
from '/Users/adebarros/Dropbox/DataMonky/Book-Writing/PracticalSQL_2e/Code-Repo/Chapter_09/pls_fy2018_libraries.csv'
|
||||
WITH (FORMAT CSV, HEADER);
|
||||
|
||||
-- Listing 9-2: Creating and filling the 2009 Public Libraries Survey table
|
||||
CREATE INDEX libname_2018_idx ON pls_fy2018_libraries (libname);
|
||||
|
||||
CREATE TABLE pls_fy2009_pupld09a (
|
||||
stabr varchar(2) NOT NULL,
|
||||
fscskey varchar(6) CONSTRAINT fscskey2009_key PRIMARY KEY,
|
||||
libid varchar(20) NOT NULL,
|
||||
libname varchar(100) NOT NULL,
|
||||
address varchar(35) NOT NULL,
|
||||
city varchar(20) NOT NULL,
|
||||
zip varchar(5) NOT NULL,
|
||||
zip4 varchar(4) NOT NULL,
|
||||
cnty varchar(20) NOT NULL,
|
||||
phone varchar(10) NOT NULL,
|
||||
c_relatn varchar(2) NOT NULL,
|
||||
c_legbas varchar(2) NOT NULL,
|
||||
c_admin varchar(2) NOT NULL,
|
||||
geocode varchar(3) NOT NULL,
|
||||
lsabound varchar(1) NOT NULL,
|
||||
startdat varchar(10),
|
||||
enddate varchar(10),
|
||||
-- Listing 9-2: Creating and filling the 2017 and 2016 Public Libraries Survey tables
|
||||
|
||||
CREATE TABLE pls_fy2017_libraries (
|
||||
stabr text NOT NULL,
|
||||
fscskey text CONSTRAINT fscskey_17_pkey PRIMARY KEY,
|
||||
libid text NOT NULL,
|
||||
libname text NOT NULL,
|
||||
address text NOT NULL,
|
||||
city text NOT NULL,
|
||||
zip text NOT NULL,
|
||||
county text NOT NULL,
|
||||
phone text NOT NULL,
|
||||
c_relatn text NOT NULL,
|
||||
c_legbas text NOT NULL,
|
||||
c_admin text NOT NULL,
|
||||
c_fscs text NOT NULL,
|
||||
geocode text NOT NULL,
|
||||
lsabound text NOT NULL,
|
||||
startdate text NOT NULL,
|
||||
enddate text NOT NULL,
|
||||
popu_lsa integer NOT NULL,
|
||||
popu_und integer NOT NULL,
|
||||
centlib integer NOT NULL,
|
||||
branlib integer NOT NULL,
|
||||
bkmob integer NOT NULL,
|
||||
master numeric(8,2) NOT NULL,
|
||||
libraria numeric(8,2) NOT NULL,
|
||||
totstaff numeric(8,2) NOT NULL,
|
||||
locgvt integer NOT NULL,
|
||||
stgvt integer NOT NULL,
|
||||
fedgvt integer NOT NULL,
|
||||
totincm integer NOT NULL,
|
||||
salaries integer,
|
||||
benefit integer,
|
||||
staffexp integer,
|
||||
prmatexp integer NOT NULL,
|
||||
elmatexp integer NOT NULL,
|
||||
totexpco integer NOT NULL,
|
||||
totopexp integer NOT NULL,
|
||||
lcap_rev integer NOT NULL,
|
||||
scap_rev integer NOT NULL,
|
||||
fcap_rev integer NOT NULL,
|
||||
cap_rev integer NOT NULL,
|
||||
capital integer NOT NULL,
|
||||
bkvol integer NOT NULL,
|
||||
ebook integer NOT NULL,
|
||||
audio integer NOT NULL,
|
||||
video integer NOT NULL,
|
||||
databases integer NOT NULL,
|
||||
audio_ph integer NOT NULL,
|
||||
audio_dl integer NOT NULL,
|
||||
video_ph integer NOT NULL,
|
||||
video_dl integer NOT NULL,
|
||||
ec_lo_ot integer NOT NULL,
|
||||
subscrip integer NOT NULL,
|
||||
hrs_open integer NOT NULL,
|
||||
visits integer NOT NULL,
|
||||
referenc integer NOT NULL,
|
||||
reference integer NOT NULL,
|
||||
regbor integer NOT NULL,
|
||||
totcir integer NOT NULL,
|
||||
kidcircl integer NOT NULL,
|
||||
loanto integer NOT NULL,
|
||||
loanfm integer NOT NULL,
|
||||
totpro integer NOT NULL,
|
||||
totatten integer NOT NULL,
|
||||
gpterms integer NOT NULL,
|
||||
pitusr integer NOT NULL,
|
||||
yr_sub integer NOT NULL,
|
||||
obereg varchar(2) NOT NULL,
|
||||
rstatus integer NOT NULL,
|
||||
statstru varchar(2) NOT NULL,
|
||||
statname varchar(2) NOT NULL,
|
||||
stataddr varchar(2) NOT NULL,
|
||||
longitud numeric(10,7) NOT NULL,
|
||||
latitude numeric(10,7) NOT NULL,
|
||||
fipsst varchar(2) NOT NULL,
|
||||
fipsco varchar(3) NOT NULL
|
||||
wifisess integer NOT NULL,
|
||||
obereg text NOT NULL,
|
||||
statstru text NOT NULL,
|
||||
statname text NOT NULL,
|
||||
stataddr text NOT NULL,
|
||||
longitude numeric(10,7) NOT NULL,
|
||||
latitude numeric(10,7) NOT NULL
|
||||
);
|
||||
|
||||
CREATE INDEX libname2009_idx ON pls_fy2009_pupld09a (libname);
|
||||
CREATE INDEX stabr2009_idx ON pls_fy2009_pupld09a (stabr);
|
||||
CREATE INDEX city2009_idx ON pls_fy2009_pupld09a (city);
|
||||
CREATE INDEX visits2009_idx ON pls_fy2009_pupld09a (visits);
|
||||
CREATE TABLE pls_fy2016_libraries (
|
||||
stabr text NOT NULL,
|
||||
fscskey text CONSTRAINT fscskey_16_pkey PRIMARY KEY,
|
||||
libid text NOT NULL,
|
||||
libname text NOT NULL,
|
||||
address text NOT NULL,
|
||||
city text NOT NULL,
|
||||
zip text NOT NULL,
|
||||
county text NOT NULL,
|
||||
phone text NOT NULL,
|
||||
c_relatn text NOT NULL,
|
||||
c_legbas text NOT NULL,
|
||||
c_admin text NOT NULL,
|
||||
c_fscs text NOT NULL,
|
||||
geocode text NOT NULL,
|
||||
lsabound text NOT NULL,
|
||||
startdate text NOT NULL,
|
||||
enddate text NOT NULL,
|
||||
popu_lsa integer NOT NULL,
|
||||
popu_und integer NOT NULL,
|
||||
centlib integer NOT NULL,
|
||||
branlib integer NOT NULL,
|
||||
bkmob integer NOT NULL,
|
||||
totstaff numeric(8,2) NOT NULL,
|
||||
bkvol integer NOT NULL,
|
||||
ebook integer NOT NULL,
|
||||
audio_ph integer NOT NULL,
|
||||
audio_dl integer NOT NULL,
|
||||
video_ph integer NOT NULL,
|
||||
video_dl integer NOT NULL,
|
||||
ec_lo_ot integer NOT NULL,
|
||||
subscrip integer NOT NULL,
|
||||
hrs_open integer NOT NULL,
|
||||
visits integer NOT NULL,
|
||||
reference integer NOT NULL,
|
||||
regbor integer NOT NULL,
|
||||
totcir integer NOT NULL,
|
||||
kidcircl integer NOT NULL,
|
||||
totpro integer NOT NULL,
|
||||
gpterms integer NOT NULL,
|
||||
pitusr integer NOT NULL,
|
||||
wifisess integer NOT NULL,
|
||||
obereg text NOT NULL,
|
||||
statstru text NOT NULL,
|
||||
statname text NOT NULL,
|
||||
stataddr text NOT NULL,
|
||||
longitude numeric(10,7) NOT NULL,
|
||||
latitude numeric(10,7) NOT NULL
|
||||
);
|
||||
|
||||
COPY pls_fy2009_pupld09a
|
||||
FROM 'C:\YourDirectory\pls_fy2009_pupld09a.csv'
|
||||
COPY pls_fy2017_libraries
|
||||
-- FROM 'C:\YourDirectory\pls_fy2017_libraries.csv'
|
||||
from '/Users/adebarros/Dropbox/DataMonky/Book-Writing/PracticalSQL_2e/Code-Repo/Chapter_09/pls_fy2017_libraries.csv'
|
||||
WITH (FORMAT CSV, HEADER);
|
||||
|
||||
COPY pls_fy2016_libraries
|
||||
-- FROM 'C:\YourDirectory\pls_fy2016_libraries.csv'
|
||||
from '/Users/adebarros/Dropbox/DataMonky/Book-Writing/PracticalSQL_2e/Code-Repo/Chapter_09/pls_fy2016_libraries.csv'
|
||||
WITH (FORMAT CSV, HEADER);
|
||||
|
||||
CREATE INDEX libname_2017_idx ON pls_fy2017_libraries (libname);
|
||||
CREATE INDEX libname_2016_idx ON pls_fy2016_libraries (libname);
|
||||
|
||||
|
||||
-- Listing 9-3: Using count() for table row counts
|
||||
|
||||
SELECT count(*)
|
||||
FROM pls_fy2014_pupld14a;
|
||||
FROM pls_fy2018_libraries;
|
||||
|
||||
SELECT count(*)
|
||||
FROM pls_fy2009_pupld09a;
|
||||
FROM pls_fy2017_libraries;
|
||||
|
||||
SELECT count(*)
|
||||
FROM pls_fy2016_libraries;
|
||||
|
||||
-- Listing 9-4: Using count() for the number of values in a column
|
||||
|
||||
SELECT count(salaries)
|
||||
FROM pls_fy2014_pupld14a;
|
||||
SELECT count(phone)
|
||||
FROM pls_fy2018_libraries;
|
||||
|
||||
-- Listing 9-5: Using count() for the number of distinct values in a column
|
||||
|
||||
SELECT count(libname)
|
||||
FROM pls_fy2014_pupld14a;
|
||||
FROM pls_fy2018_libraries;
|
||||
|
||||
SELECT count(DISTINCT libname)
|
||||
FROM pls_fy2014_pupld14a;
|
||||
FROM pls_fy2018_libraries;
|
||||
|
||||
-- Bonus: find duplicate libnames
|
||||
SELECT libname, count(libname)
|
||||
FROM pls_fy2014_pupld14a
|
||||
FROM pls_fy2018_libraries
|
||||
GROUP BY libname
|
||||
ORDER BY count(libname) DESC;
|
||||
|
||||
-- Bonus: see location of every Oxford Public Library
|
||||
SELECT libname, city, stabr
|
||||
FROM pls_fy2014_pupld14a
|
||||
FROM pls_fy2018_libraries
|
||||
WHERE libname = 'OXFORD PUBLIC LIBRARY';
|
||||
|
||||
-- Listing 9-6: Finding the most and fewest visits using max() and min()
|
||||
SELECT max(visits), min(visits)
|
||||
FROM pls_fy2014_pupld14a;
|
||||
FROM pls_fy2018_libraries;
|
||||
|
||||
-- Listing 9-7: Using GROUP BY on the stabr column
|
||||
|
||||
-- There are 56 in 2014.
|
||||
-- There are 55 in 2018.
|
||||
SELECT stabr
|
||||
FROM pls_fy2014_pupld14a
|
||||
FROM pls_fy2018_libraries
|
||||
GROUP BY stabr
|
||||
ORDER BY stabr;
|
||||
|
||||
-- Bonus: there are 55 in 2009.
|
||||
-- Bonus: there are 54 in 2017.
|
||||
SELECT stabr
|
||||
FROM pls_fy2009_pupld09a
|
||||
FROM pls_fy2017_libraries
|
||||
GROUP BY stabr
|
||||
ORDER BY stabr;
|
||||
|
||||
-- Listing 9-8: Using GROUP BY on the city and stabr columns
|
||||
|
||||
SELECT city, stabr
|
||||
FROM pls_fy2014_pupld14a
|
||||
FROM pls_fy2018_libraries
|
||||
GROUP BY city, stabr
|
||||
ORDER BY city, stabr;
|
||||
|
||||
-- Bonus: We can count some of the combos
|
||||
-- Bonus: We can count the combos
|
||||
SELECT city, stabr, count(*)
|
||||
FROM pls_fy2014_pupld14a
|
||||
FROM pls_fy2018_libraries
|
||||
GROUP BY city, stabr
|
||||
ORDER BY count(*) DESC;
|
||||
|
||||
-- Listing 9-9: GROUP BY with count() on the stabr column
|
||||
-- Listing 9-9: Using GROUP BY with count() on the stabr column
|
||||
|
||||
SELECT stabr, count(*)
|
||||
FROM pls_fy2014_pupld14a
|
||||
FROM pls_fy2018_libraries
|
||||
GROUP BY stabr
|
||||
ORDER BY count(*) DESC;
|
||||
|
||||
-- Listing 9-10: GROUP BY with count() on the stabr and stataddr columns
|
||||
|
||||
SELECT stabr, stataddr, count(*)
|
||||
FROM pls_fy2014_pupld14a
|
||||
FROM pls_fy2018_libraries
|
||||
GROUP BY stabr, stataddr
|
||||
ORDER BY stabr ASC, count(*) DESC;
|
||||
ORDER BY stabr, stataddr;
|
||||
|
||||
-- Listing 9-11: Using the sum() aggregate function to total visits to
|
||||
-- libraries in 2014 and 2009
|
||||
|
||||
-- 2014
|
||||
SELECT sum(visits) AS visits_2014
|
||||
FROM pls_fy2014_pupld14a
|
||||
-- 2018
|
||||
SELECT sum(visits) AS visits_2018
|
||||
FROM pls_fy2018_libraries
|
||||
WHERE visits >= 0;
|
||||
|
||||
-- 2009
|
||||
SELECT sum(visits) AS visits_2009
|
||||
FROM pls_fy2009_pupld09a
|
||||
-- 2017
|
||||
SELECT sum(visits) AS visits_2017
|
||||
FROM pls_fy2017_libraries
|
||||
WHERE visits >= 0;
|
||||
|
||||
-- Listing 9-12: Using sum() to total visits on joined 2014 and 2009 library tables
|
||||
-- 2016
|
||||
SELECT sum(visits) AS visits_2016
|
||||
FROM pls_fy2016_libraries
|
||||
WHERE visits >= 0;
|
||||
|
||||
-- Listing 9-12: Using sum() to total visits on joined 2018, 2017, and 2016 tables
|
||||
|
||||
SELECT sum(pls18.visits) AS visits_2018,
|
||||
sum(pls17.visits) AS visits_2017,
|
||||
sum(pls16.visits) AS visits_2016
|
||||
FROM pls_fy2018_libraries pls18
|
||||
JOIN pls_fy2017_libraries pls17 ON pls18.fscskey = pls17.fscskey
|
||||
JOIN pls_fy2016_libraries pls16 ON pls18.fscskey = pls16.fscskey
|
||||
WHERE pls18.visits >= 0
|
||||
AND pls17.visits >= 0
|
||||
AND pls16.visits >= 0;
|
||||
|
||||
-- Bonus: summing wifi sessions
|
||||
SELECT sum(pls18.wifisess) AS wifi_2018,
|
||||
sum(pls17.wifisess) AS wifi_2017,
|
||||
sum(pls16.wifisess) AS wifi_2016
|
||||
FROM pls_fy2018_libraries pls18
|
||||
JOIN pls_fy2017_libraries pls17 ON pls18.fscskey = pls17.fscskey
|
||||
JOIN pls_fy2016_libraries pls16 ON pls18.fscskey = pls16.fscskey
|
||||
WHERE pls18.wifisess >= 0
|
||||
AND pls17.wifisess >= 0
|
||||
AND pls16.wifisess >= 0;
|
||||
|
||||
SELECT sum(pls14.visits) AS visits_2014,
|
||||
sum(pls09.visits) AS visits_2009
|
||||
FROM pls_fy2014_pupld14a pls14 JOIN pls_fy2009_pupld09a pls09
|
||||
ON pls14.fscskey = pls09.fscskey
|
||||
WHERE pls14.visits >= 0 AND pls09.visits >= 0;
|
||||
|
||||
-- Listing 9-13: Using GROUP BY to track percent change in library visits by state
|
||||
|
||||
SELECT pls14.stabr,
|
||||
sum(pls14.visits) AS visits_2014,
|
||||
sum(pls09.visits) AS visits_2009,
|
||||
round( (CAST(sum(pls14.visits) AS decimal(10,1)) - sum(pls09.visits)) /
|
||||
sum(pls09.visits) * 100, 2 ) AS pct_change
|
||||
FROM pls_fy2014_pupld14a pls14 JOIN pls_fy2009_pupld09a pls09
|
||||
ON pls14.fscskey = pls09.fscskey
|
||||
WHERE pls14.visits >= 0 AND pls09.visits >= 0
|
||||
GROUP BY pls14.stabr
|
||||
ORDER BY pct_change DESC;
|
||||
SELECT pls18.stabr,
|
||||
sum(pls18.visits) AS visits_2018,
|
||||
sum(pls17.visits) AS visits_2017,
|
||||
sum(pls16.visits) AS visits_2016,
|
||||
round( (sum(pls18.visits::numeric) - sum(pls17.visits)) /
|
||||
sum(pls17.visits) * 100, 1 ) AS chg_2018_17,
|
||||
round( (sum(pls17.visits::numeric) - sum(pls16.visits)) /
|
||||
sum(pls16.visits) * 100, 1 ) AS chg_2017_16
|
||||
FROM pls_fy2018_libraries pls18
|
||||
JOIN pls_fy2017_libraries pls17 ON pls18.fscskey = pls17.fscskey
|
||||
JOIN pls_fy2016_libraries pls16 ON pls18.fscskey = pls16.fscskey
|
||||
WHERE pls18.visits >= 0
|
||||
AND pls17.visits >= 0
|
||||
AND pls16.visits >= 0
|
||||
GROUP BY pls18.stabr
|
||||
ORDER BY chg_2018_17 DESC;
|
||||
|
||||
-- Listing 9-14: Using HAVING to filter the results of an aggregate query
|
||||
|
||||
SELECT pls14.stabr,
|
||||
sum(pls14.visits) AS visits_2014,
|
||||
sum(pls09.visits) AS visits_2009,
|
||||
round( (CAST(sum(pls14.visits) AS decimal(10,1)) - sum(pls09.visits)) /
|
||||
sum(pls09.visits) * 100, 2 ) AS pct_change
|
||||
FROM pls_fy2014_pupld14a pls14 JOIN pls_fy2009_pupld09a pls09
|
||||
ON pls14.fscskey = pls09.fscskey
|
||||
WHERE pls14.visits >= 0 AND pls09.visits >= 0
|
||||
GROUP BY pls14.stabr
|
||||
HAVING sum(pls14.visits) > 50000000
|
||||
ORDER BY pct_change DESC;
|
||||
SELECT pls18.stabr,
|
||||
sum(pls18.visits) AS visits_2018,
|
||||
sum(pls17.visits) AS visits_2017,
|
||||
sum(pls16.visits) AS visits_2016,
|
||||
round( (sum(pls18.visits::numeric) - sum(pls17.visits)) /
|
||||
sum(pls17.visits) * 100, 1 ) AS chg_2018_17,
|
||||
round( (sum(pls17.visits::numeric) - sum(pls16.visits)) /
|
||||
sum(pls16.visits) * 100, 1 ) AS chg_2017_16
|
||||
FROM pls_fy2018_libraries pls18
|
||||
JOIN pls_fy2017_libraries pls17 ON pls18.fscskey = pls17.fscskey
|
||||
JOIN pls_fy2016_libraries pls16 ON pls18.fscskey = pls16.fscskey
|
||||
WHERE pls18.visits >= 0
|
||||
AND pls17.visits >= 0
|
||||
AND pls16.visits >= 0
|
||||
GROUP BY pls18.stabr
|
||||
HAVING sum(pls18.visits) > 50000000
|
||||
ORDER BY chg_2018_17 DESC;
|
||||
|
||||
|
||||
@ -470,5 +470,140 @@ CREATE TABLE songs (
|
||||
-- to perform many queries that include date ranges.
|
||||
|
||||
|
||||
----------------------------------------------------------------------------
|
||||
-- Chapter 9: Extracting Information by Grouping and Summarizing
|
||||
----------------------------------------------------------------------------
|
||||
|
||||
-- 1. We saw that library visits have declined recently in most places. But
|
||||
-- what is the pattern in library employment? All three library survey tables
|
||||
-- contain the column totstaff, which is the number of paid full-time equivalent
|
||||
-- employees. Modify the code in Listings 9-13 and 9-14 to calculate the
|
||||
-- percent change in the sum of the column over time, examining all states as
|
||||
-- well as states with the most visitors. Watch out for negative values!
|
||||
|
||||
-- Answer (all states):
|
||||
|
||||
SELECT pls18.stabr,
|
||||
sum(pls18.totstaff) AS totstaff_2018,
|
||||
sum(pls17.totstaff) AS totstaff_2017,
|
||||
sum(pls16.totstaff) AS totstaff_2016,
|
||||
round( (sum(pls18.totstaff::numeric) - sum(pls17.totstaff)) /
|
||||
sum(pls17.totstaff) * 100, 1 ) AS chg_2018_17,
|
||||
round( (sum(pls17.totstaff::numeric) - sum(pls16.totstaff)) /
|
||||
sum(pls16.hrs_open) * 100, 1 ) AS chg_2017_16
|
||||
FROM pls_fy2018_libraries pls18
|
||||
JOIN pls_fy2017_libraries pls17 ON pls18.fscskey = pls17.fscskey
|
||||
JOIN pls_fy2016_libraries pls16 ON pls18.fscskey = pls16.fscskey
|
||||
WHERE pls18.totstaff >= 0
|
||||
AND pls17.totstaff >= 0
|
||||
AND pls16.totstaff >= 0
|
||||
GROUP BY pls18.stabr
|
||||
ORDER BY chg_2018_17 DESC;
|
||||
|
||||
-- Answer (filtered with HAVING):
|
||||
|
||||
SELECT pls18.stabr,
|
||||
sum(pls18.totstaff) AS totstaff_2018,
|
||||
sum(pls17.totstaff) AS totstaff_2017,
|
||||
sum(pls16.totstaff) AS totstaff_2016,
|
||||
round( (sum(pls18.totstaff::numeric) - sum(pls17.totstaff)) /
|
||||
sum(pls17.totstaff) * 100, 1 ) AS chg_2018_17,
|
||||
round( (sum(pls17.totstaff::numeric) - sum(pls16.totstaff)) /
|
||||
sum(pls16.hrs_open) * 100, 1 ) AS chg_2017_16
|
||||
FROM pls_fy2018_libraries pls18
|
||||
JOIN pls_fy2017_libraries pls17 ON pls18.fscskey = pls17.fscskey
|
||||
JOIN pls_fy2016_libraries pls16 ON pls18.fscskey = pls16.fscskey
|
||||
WHERE pls18.totstaff >= 0
|
||||
AND pls17.totstaff >= 0
|
||||
AND pls16.totstaff >= 0
|
||||
GROUP BY pls18.stabr
|
||||
HAVING sum(pls18.visits) > 50000000
|
||||
ORDER BY chg_2018_17 DESC;
|
||||
|
||||
-- 2. The library survey tables contain a column called obereg, a two-digit
|
||||
-- Bureau of Economic Analysis Code that classifies each library agency
|
||||
-- according to a region of the United States, such as New England, Rocky
|
||||
-- Mountains, and so on. Just as we calculated the percent change in visits
|
||||
-- grouped by state, do the same to group percent changes in visits by U.S.
|
||||
-- region using obereg. Consult the survey documentation to find the meaning
|
||||
-- of each region code. For a bonus challenge, create a table with the obereg
|
||||
-- code as the primary key and the region name as text, and join it to the
|
||||
-- summary query to group by the region name rather than the code.
|
||||
|
||||
-- Answer:
|
||||
|
||||
-- a) sum() visits by region.
|
||||
|
||||
SELECT pls18.obereg,
|
||||
sum(pls18.visits) AS visits_2018,
|
||||
sum(pls17.visits) AS visits_2017,
|
||||
sum(pls16.visits) AS visits_2016,
|
||||
round( (sum(pls18.visits::numeric) - sum(pls17.visits)) /
|
||||
sum(pls17.visits) * 100, 1 ) AS chg_2018_17,
|
||||
round( (sum(pls17.visits::numeric) - sum(pls16.visits)) /
|
||||
sum(pls16.visits) * 100, 1 ) AS chg_2017_16
|
||||
FROM pls_fy2018_libraries pls18
|
||||
JOIN pls_fy2017_libraries pls17 ON pls18.fscskey = pls17.fscskey
|
||||
JOIN pls_fy2016_libraries pls16 ON pls18.fscskey = pls16.fscskey
|
||||
WHERE pls18.visits >= 0
|
||||
AND pls17.visits >= 0
|
||||
AND pls16.visits >= 0
|
||||
GROUP BY pls18.obereg
|
||||
ORDER BY chg_2018_17 DESC;
|
||||
|
||||
-- b) Bonus: creating the regions lookup table and adding it to the query.
|
||||
|
||||
CREATE TABLE obereg_codes (
|
||||
obereg text CONSTRAINT obereg_key PRIMARY KEY,
|
||||
region text
|
||||
);
|
||||
|
||||
INSERT INTO obereg_codes
|
||||
VALUES ('01', 'New England (CT ME MA NH RI VT)'),
|
||||
('02', 'Mid East (DE DC MD NJ NY PA)'),
|
||||
('03', 'Great Lakes (IL IN MI OH WI)'),
|
||||
('04', 'Plains (IA KS MN MO NE ND SD)'),
|
||||
('05', 'Southeast (AL AR FL GA KY LA MS NC SC TN VA WV)'),
|
||||
('06', 'Soutwest (AZ NM OK TX)'),
|
||||
('07', 'Rocky Mountains (CO ID MT UT WY)'),
|
||||
('08', 'Far West (AK CA HI NV OR WA)'),
|
||||
('09', 'Outlying Areas (AS GU MP PR VI)');
|
||||
|
||||
-- sum() visits by region.
|
||||
|
||||
SELECT obereg_codes.region,
|
||||
sum(pls18.visits) AS visits_2018,
|
||||
sum(pls17.visits) AS visits_2017,
|
||||
sum(pls16.visits) AS visits_2016,
|
||||
round( (sum(pls18.visits::numeric) - sum(pls17.visits)) /
|
||||
sum(pls17.visits) * 100, 1 ) AS chg_2018_17,
|
||||
round( (sum(pls17.visits::numeric) - sum(pls16.visits)) /
|
||||
sum(pls16.visits) * 100, 1 ) AS chg_2017_16
|
||||
FROM pls_fy2018_libraries pls18
|
||||
JOIN pls_fy2017_libraries pls17 ON pls18.fscskey = pls17.fscskey
|
||||
JOIN pls_fy2016_libraries pls16 ON pls18.fscskey = pls16.fscskey
|
||||
JOIN obereg_codes ON pls18.obereg = obereg_codes.obereg
|
||||
WHERE pls18.visits >= 0
|
||||
AND pls17.visits >= 0
|
||||
AND pls16.visits >= 0
|
||||
GROUP BY obereg_codes.region
|
||||
ORDER BY chg_2018_17 DESC;
|
||||
|
||||
-- 3. Thinking back to the types of joins you learned in Chapter 7,
|
||||
-- which join type will show you all the rows in all three tables,
|
||||
-- including those without a match? Write such a query and add an
|
||||
-- IS NULL filter in a WHERE clause to show agencies not included
|
||||
-- in one or more of the tables.
|
||||
|
||||
-- Answer: a FULL OUTER JOIN will show all rows in both tables.
|
||||
|
||||
SELECT pls18.libname, pls18.city, pls18.stabr, pls18.statstru,
|
||||
pls17.libname, pls17.city, pls17.stabr, pls17.statstru,
|
||||
pls16.libname, pls16.city, pls16.stabr, pls16.statstru
|
||||
FROM pls_fy2018_libraries pls18
|
||||
FULL OUTER JOIN pls_fy2017_libraries pls17 ON pls18.fscskey = pls17.fscskey
|
||||
FULL OUTER JOIN pls_fy2016_libraries pls16 ON pls18.fscskey = pls16.fscskey
|
||||
WHERE pls16.fscskey IS NULL OR pls17.fscskey IS NULL;
|
||||
|
||||
-- Note: The IS NULL statements in the WHERE clause limit results to those
|
||||
-- that do not appear in one or more tables.
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user