Chapter 9 code and data
This commit is contained in:
parent
498950fa62
commit
a29acc084d
@ -1,305 +1,343 @@
|
|||||||
-- FIRST EDITION FILE; IGNORE
|
---------------------------------------------------------------------------
|
||||||
|
-- Practical SQL: A Beginner's Guide to Storytelling with Data, 2nd Edition
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
--------------------------------------------------------------
|
|
||||||
-- Practical SQL: A Beginner's Guide to Storytelling with Data
|
|
||||||
-- by Anthony DeBarros
|
-- by Anthony DeBarros
|
||||||
|
|
||||||
-- Chapter 9 Code Examples
|
-- Chapter 9 Code Examples
|
||||||
--------------------------------------------------------------
|
----------------------------------------------------------------------------
|
||||||
|
|
||||||
-- Listing 9-1: Creating and filling the 2014 Public Libraries Survey table
|
-- Listing 9-1: Creating and filling the 2018 Public Libraries Survey table
|
||||||
|
|
||||||
CREATE TABLE pls_fy2014_pupld14a (
|
CREATE TABLE pls_fy2018_libraries (
|
||||||
stabr varchar(2) NOT NULL,
|
stabr text NOT NULL,
|
||||||
fscskey varchar(6) CONSTRAINT fscskey2014_key PRIMARY KEY,
|
fscskey text CONSTRAINT fscskey_2018_pkey PRIMARY KEY,
|
||||||
libid varchar(20) NOT NULL,
|
libid text NOT NULL,
|
||||||
libname varchar(100) NOT NULL,
|
libname text NOT NULL,
|
||||||
obereg varchar(2) NOT NULL,
|
address text NOT NULL,
|
||||||
rstatus integer NOT NULL,
|
city text NOT NULL,
|
||||||
statstru varchar(2) NOT NULL,
|
zip text NOT NULL,
|
||||||
statname varchar(2) NOT NULL,
|
county text NOT NULL,
|
||||||
stataddr varchar(2) NOT NULL,
|
phone text NOT NULL,
|
||||||
longitud numeric(10,7) NOT NULL,
|
c_relatn text NOT NULL,
|
||||||
latitude numeric(10,7) NOT NULL,
|
c_legbas text NOT NULL,
|
||||||
fipsst varchar(2) NOT NULL,
|
c_admin text NOT NULL,
|
||||||
fipsco varchar(3) NOT NULL,
|
c_fscs text NOT NULL,
|
||||||
address varchar(35) NOT NULL,
|
geocode text NOT NULL,
|
||||||
city varchar(20) NOT NULL,
|
lsabound text NOT NULL,
|
||||||
zip varchar(5) NOT NULL,
|
startdate text NOT NULL,
|
||||||
zip4 varchar(4) NOT NULL,
|
enddate text NOT NULL,
|
||||||
cnty varchar(20) NOT NULL,
|
|
||||||
phone varchar(10) NOT NULL,
|
|
||||||
c_relatn varchar(2) NOT NULL,
|
|
||||||
c_legbas varchar(2) NOT NULL,
|
|
||||||
c_admin varchar(2) NOT NULL,
|
|
||||||
geocode varchar(3) NOT NULL,
|
|
||||||
lsabound varchar(1) NOT NULL,
|
|
||||||
startdat varchar(10),
|
|
||||||
enddate varchar(10),
|
|
||||||
popu_lsa integer NOT NULL,
|
popu_lsa integer NOT NULL,
|
||||||
|
popu_und integer NOT NULL,
|
||||||
centlib integer NOT NULL,
|
centlib integer NOT NULL,
|
||||||
branlib integer NOT NULL,
|
branlib integer NOT NULL,
|
||||||
bkmob integer NOT NULL,
|
bkmob integer NOT NULL,
|
||||||
master numeric(8,2) NOT NULL,
|
|
||||||
libraria numeric(8,2) NOT NULL,
|
|
||||||
totstaff numeric(8,2) NOT NULL,
|
totstaff numeric(8,2) NOT NULL,
|
||||||
locgvt integer NOT NULL,
|
|
||||||
stgvt integer NOT NULL,
|
|
||||||
fedgvt integer NOT NULL,
|
|
||||||
totincm integer NOT NULL,
|
|
||||||
salaries integer,
|
|
||||||
benefit integer,
|
|
||||||
staffexp integer,
|
|
||||||
prmatexp integer NOT NULL,
|
|
||||||
elmatexp integer NOT NULL,
|
|
||||||
totexpco integer NOT NULL,
|
|
||||||
totopexp integer NOT NULL,
|
|
||||||
lcap_rev integer NOT NULL,
|
|
||||||
scap_rev integer NOT NULL,
|
|
||||||
fcap_rev integer NOT NULL,
|
|
||||||
cap_rev integer NOT NULL,
|
|
||||||
capital integer NOT NULL,
|
|
||||||
bkvol integer NOT NULL,
|
bkvol integer NOT NULL,
|
||||||
ebook integer NOT NULL,
|
ebook integer NOT NULL,
|
||||||
audio_ph integer NOT NULL,
|
audio_ph integer NOT NULL,
|
||||||
audio_dl float NOT NULL,
|
audio_dl integer NOT NULL,
|
||||||
video_ph integer NOT NULL,
|
video_ph integer NOT NULL,
|
||||||
video_dl float NOT NULL,
|
video_dl integer NOT NULL,
|
||||||
databases integer NOT NULL,
|
ec_lo_ot integer NOT NULL,
|
||||||
subscrip integer NOT NULL,
|
subscrip integer NOT NULL,
|
||||||
hrs_open integer NOT NULL,
|
hrs_open integer NOT NULL,
|
||||||
visits integer NOT NULL,
|
visits integer NOT NULL,
|
||||||
referenc integer NOT NULL,
|
reference integer NOT NULL,
|
||||||
regbor integer NOT NULL,
|
regbor integer NOT NULL,
|
||||||
totcir integer NOT NULL,
|
totcir integer NOT NULL,
|
||||||
kidcircl integer NOT NULL,
|
kidcircl integer NOT NULL,
|
||||||
elmatcir integer NOT NULL,
|
|
||||||
loanto integer NOT NULL,
|
|
||||||
loanfm integer NOT NULL,
|
|
||||||
totpro integer NOT NULL,
|
totpro integer NOT NULL,
|
||||||
totatten integer NOT NULL,
|
|
||||||
gpterms integer NOT NULL,
|
gpterms integer NOT NULL,
|
||||||
pitusr integer NOT NULL,
|
pitusr integer NOT NULL,
|
||||||
wifisess integer NOT NULL,
|
wifisess integer NOT NULL,
|
||||||
yr_sub integer NOT NULL
|
obereg text NOT NULL,
|
||||||
|
statstru text NOT NULL,
|
||||||
|
statname text NOT NULL,
|
||||||
|
stataddr text NOT NULL,
|
||||||
|
longitude numeric(10,7) NOT NULL,
|
||||||
|
latitude numeric(10,7) NOT NULL
|
||||||
);
|
);
|
||||||
|
|
||||||
CREATE INDEX libname2014_idx ON pls_fy2014_pupld14a (libname);
|
COPY pls_fy2018_libraries
|
||||||
CREATE INDEX stabr2014_idx ON pls_fy2014_pupld14a (stabr);
|
-- FROM 'C:\YourDirectory\pls_fy2018_libraries.csv'
|
||||||
CREATE INDEX city2014_idx ON pls_fy2014_pupld14a (city);
|
from '/Users/adebarros/Dropbox/DataMonky/Book-Writing/PracticalSQL_2e/Code-Repo/Chapter_09/pls_fy2018_libraries.csv'
|
||||||
CREATE INDEX visits2014_idx ON pls_fy2014_pupld14a (visits);
|
|
||||||
|
|
||||||
COPY pls_fy2014_pupld14a
|
|
||||||
FROM 'C:\YourDirectory\pls_fy2014_pupld14a.csv'
|
|
||||||
WITH (FORMAT CSV, HEADER);
|
WITH (FORMAT CSV, HEADER);
|
||||||
|
|
||||||
-- Listing 9-2: Creating and filling the 2009 Public Libraries Survey table
|
CREATE INDEX libname_2018_idx ON pls_fy2018_libraries (libname);
|
||||||
|
|
||||||
CREATE TABLE pls_fy2009_pupld09a (
|
-- Listing 9-2: Creating and filling the 2017 and 2016 Public Libraries Survey tables
|
||||||
stabr varchar(2) NOT NULL,
|
|
||||||
fscskey varchar(6) CONSTRAINT fscskey2009_key PRIMARY KEY,
|
CREATE TABLE pls_fy2017_libraries (
|
||||||
libid varchar(20) NOT NULL,
|
stabr text NOT NULL,
|
||||||
libname varchar(100) NOT NULL,
|
fscskey text CONSTRAINT fscskey_17_pkey PRIMARY KEY,
|
||||||
address varchar(35) NOT NULL,
|
libid text NOT NULL,
|
||||||
city varchar(20) NOT NULL,
|
libname text NOT NULL,
|
||||||
zip varchar(5) NOT NULL,
|
address text NOT NULL,
|
||||||
zip4 varchar(4) NOT NULL,
|
city text NOT NULL,
|
||||||
cnty varchar(20) NOT NULL,
|
zip text NOT NULL,
|
||||||
phone varchar(10) NOT NULL,
|
county text NOT NULL,
|
||||||
c_relatn varchar(2) NOT NULL,
|
phone text NOT NULL,
|
||||||
c_legbas varchar(2) NOT NULL,
|
c_relatn text NOT NULL,
|
||||||
c_admin varchar(2) NOT NULL,
|
c_legbas text NOT NULL,
|
||||||
geocode varchar(3) NOT NULL,
|
c_admin text NOT NULL,
|
||||||
lsabound varchar(1) NOT NULL,
|
c_fscs text NOT NULL,
|
||||||
startdat varchar(10),
|
geocode text NOT NULL,
|
||||||
enddate varchar(10),
|
lsabound text NOT NULL,
|
||||||
|
startdate text NOT NULL,
|
||||||
|
enddate text NOT NULL,
|
||||||
popu_lsa integer NOT NULL,
|
popu_lsa integer NOT NULL,
|
||||||
|
popu_und integer NOT NULL,
|
||||||
centlib integer NOT NULL,
|
centlib integer NOT NULL,
|
||||||
branlib integer NOT NULL,
|
branlib integer NOT NULL,
|
||||||
bkmob integer NOT NULL,
|
bkmob integer NOT NULL,
|
||||||
master numeric(8,2) NOT NULL,
|
|
||||||
libraria numeric(8,2) NOT NULL,
|
|
||||||
totstaff numeric(8,2) NOT NULL,
|
totstaff numeric(8,2) NOT NULL,
|
||||||
locgvt integer NOT NULL,
|
|
||||||
stgvt integer NOT NULL,
|
|
||||||
fedgvt integer NOT NULL,
|
|
||||||
totincm integer NOT NULL,
|
|
||||||
salaries integer,
|
|
||||||
benefit integer,
|
|
||||||
staffexp integer,
|
|
||||||
prmatexp integer NOT NULL,
|
|
||||||
elmatexp integer NOT NULL,
|
|
||||||
totexpco integer NOT NULL,
|
|
||||||
totopexp integer NOT NULL,
|
|
||||||
lcap_rev integer NOT NULL,
|
|
||||||
scap_rev integer NOT NULL,
|
|
||||||
fcap_rev integer NOT NULL,
|
|
||||||
cap_rev integer NOT NULL,
|
|
||||||
capital integer NOT NULL,
|
|
||||||
bkvol integer NOT NULL,
|
bkvol integer NOT NULL,
|
||||||
ebook integer NOT NULL,
|
ebook integer NOT NULL,
|
||||||
audio integer NOT NULL,
|
audio_ph integer NOT NULL,
|
||||||
video integer NOT NULL,
|
audio_dl integer NOT NULL,
|
||||||
databases integer NOT NULL,
|
video_ph integer NOT NULL,
|
||||||
|
video_dl integer NOT NULL,
|
||||||
|
ec_lo_ot integer NOT NULL,
|
||||||
subscrip integer NOT NULL,
|
subscrip integer NOT NULL,
|
||||||
hrs_open integer NOT NULL,
|
hrs_open integer NOT NULL,
|
||||||
visits integer NOT NULL,
|
visits integer NOT NULL,
|
||||||
referenc integer NOT NULL,
|
reference integer NOT NULL,
|
||||||
regbor integer NOT NULL,
|
regbor integer NOT NULL,
|
||||||
totcir integer NOT NULL,
|
totcir integer NOT NULL,
|
||||||
kidcircl integer NOT NULL,
|
kidcircl integer NOT NULL,
|
||||||
loanto integer NOT NULL,
|
|
||||||
loanfm integer NOT NULL,
|
|
||||||
totpro integer NOT NULL,
|
totpro integer NOT NULL,
|
||||||
totatten integer NOT NULL,
|
|
||||||
gpterms integer NOT NULL,
|
gpterms integer NOT NULL,
|
||||||
pitusr integer NOT NULL,
|
pitusr integer NOT NULL,
|
||||||
yr_sub integer NOT NULL,
|
wifisess integer NOT NULL,
|
||||||
obereg varchar(2) NOT NULL,
|
obereg text NOT NULL,
|
||||||
rstatus integer NOT NULL,
|
statstru text NOT NULL,
|
||||||
statstru varchar(2) NOT NULL,
|
statname text NOT NULL,
|
||||||
statname varchar(2) NOT NULL,
|
stataddr text NOT NULL,
|
||||||
stataddr varchar(2) NOT NULL,
|
longitude numeric(10,7) NOT NULL,
|
||||||
longitud numeric(10,7) NOT NULL,
|
latitude numeric(10,7) NOT NULL
|
||||||
latitude numeric(10,7) NOT NULL,
|
|
||||||
fipsst varchar(2) NOT NULL,
|
|
||||||
fipsco varchar(3) NOT NULL
|
|
||||||
);
|
);
|
||||||
|
|
||||||
CREATE INDEX libname2009_idx ON pls_fy2009_pupld09a (libname);
|
CREATE TABLE pls_fy2016_libraries (
|
||||||
CREATE INDEX stabr2009_idx ON pls_fy2009_pupld09a (stabr);
|
stabr text NOT NULL,
|
||||||
CREATE INDEX city2009_idx ON pls_fy2009_pupld09a (city);
|
fscskey text CONSTRAINT fscskey_16_pkey PRIMARY KEY,
|
||||||
CREATE INDEX visits2009_idx ON pls_fy2009_pupld09a (visits);
|
libid text NOT NULL,
|
||||||
|
libname text NOT NULL,
|
||||||
|
address text NOT NULL,
|
||||||
|
city text NOT NULL,
|
||||||
|
zip text NOT NULL,
|
||||||
|
county text NOT NULL,
|
||||||
|
phone text NOT NULL,
|
||||||
|
c_relatn text NOT NULL,
|
||||||
|
c_legbas text NOT NULL,
|
||||||
|
c_admin text NOT NULL,
|
||||||
|
c_fscs text NOT NULL,
|
||||||
|
geocode text NOT NULL,
|
||||||
|
lsabound text NOT NULL,
|
||||||
|
startdate text NOT NULL,
|
||||||
|
enddate text NOT NULL,
|
||||||
|
popu_lsa integer NOT NULL,
|
||||||
|
popu_und integer NOT NULL,
|
||||||
|
centlib integer NOT NULL,
|
||||||
|
branlib integer NOT NULL,
|
||||||
|
bkmob integer NOT NULL,
|
||||||
|
totstaff numeric(8,2) NOT NULL,
|
||||||
|
bkvol integer NOT NULL,
|
||||||
|
ebook integer NOT NULL,
|
||||||
|
audio_ph integer NOT NULL,
|
||||||
|
audio_dl integer NOT NULL,
|
||||||
|
video_ph integer NOT NULL,
|
||||||
|
video_dl integer NOT NULL,
|
||||||
|
ec_lo_ot integer NOT NULL,
|
||||||
|
subscrip integer NOT NULL,
|
||||||
|
hrs_open integer NOT NULL,
|
||||||
|
visits integer NOT NULL,
|
||||||
|
reference integer NOT NULL,
|
||||||
|
regbor integer NOT NULL,
|
||||||
|
totcir integer NOT NULL,
|
||||||
|
kidcircl integer NOT NULL,
|
||||||
|
totpro integer NOT NULL,
|
||||||
|
gpterms integer NOT NULL,
|
||||||
|
pitusr integer NOT NULL,
|
||||||
|
wifisess integer NOT NULL,
|
||||||
|
obereg text NOT NULL,
|
||||||
|
statstru text NOT NULL,
|
||||||
|
statname text NOT NULL,
|
||||||
|
stataddr text NOT NULL,
|
||||||
|
longitude numeric(10,7) NOT NULL,
|
||||||
|
latitude numeric(10,7) NOT NULL
|
||||||
|
);
|
||||||
|
|
||||||
COPY pls_fy2009_pupld09a
|
COPY pls_fy2017_libraries
|
||||||
FROM 'C:\YourDirectory\pls_fy2009_pupld09a.csv'
|
-- FROM 'C:\YourDirectory\pls_fy2017_libraries.csv'
|
||||||
|
from '/Users/adebarros/Dropbox/DataMonky/Book-Writing/PracticalSQL_2e/Code-Repo/Chapter_09/pls_fy2017_libraries.csv'
|
||||||
WITH (FORMAT CSV, HEADER);
|
WITH (FORMAT CSV, HEADER);
|
||||||
|
|
||||||
|
COPY pls_fy2016_libraries
|
||||||
|
-- FROM 'C:\YourDirectory\pls_fy2016_libraries.csv'
|
||||||
|
from '/Users/adebarros/Dropbox/DataMonky/Book-Writing/PracticalSQL_2e/Code-Repo/Chapter_09/pls_fy2016_libraries.csv'
|
||||||
|
WITH (FORMAT CSV, HEADER);
|
||||||
|
|
||||||
|
CREATE INDEX libname_2017_idx ON pls_fy2017_libraries (libname);
|
||||||
|
CREATE INDEX libname_2016_idx ON pls_fy2016_libraries (libname);
|
||||||
|
|
||||||
|
|
||||||
-- Listing 9-3: Using count() for table row counts
|
-- Listing 9-3: Using count() for table row counts
|
||||||
|
|
||||||
SELECT count(*)
|
SELECT count(*)
|
||||||
FROM pls_fy2014_pupld14a;
|
FROM pls_fy2018_libraries;
|
||||||
|
|
||||||
SELECT count(*)
|
SELECT count(*)
|
||||||
FROM pls_fy2009_pupld09a;
|
FROM pls_fy2017_libraries;
|
||||||
|
|
||||||
|
SELECT count(*)
|
||||||
|
FROM pls_fy2016_libraries;
|
||||||
|
|
||||||
-- Listing 9-4: Using count() for the number of values in a column
|
-- Listing 9-4: Using count() for the number of values in a column
|
||||||
|
|
||||||
SELECT count(salaries)
|
SELECT count(phone)
|
||||||
FROM pls_fy2014_pupld14a;
|
FROM pls_fy2018_libraries;
|
||||||
|
|
||||||
-- Listing 9-5: Using count() for the number of distinct values in a column
|
-- Listing 9-5: Using count() for the number of distinct values in a column
|
||||||
|
|
||||||
SELECT count(libname)
|
SELECT count(libname)
|
||||||
FROM pls_fy2014_pupld14a;
|
FROM pls_fy2018_libraries;
|
||||||
|
|
||||||
SELECT count(DISTINCT libname)
|
SELECT count(DISTINCT libname)
|
||||||
FROM pls_fy2014_pupld14a;
|
FROM pls_fy2018_libraries;
|
||||||
|
|
||||||
-- Bonus: find duplicate libnames
|
-- Bonus: find duplicate libnames
|
||||||
SELECT libname, count(libname)
|
SELECT libname, count(libname)
|
||||||
FROM pls_fy2014_pupld14a
|
FROM pls_fy2018_libraries
|
||||||
GROUP BY libname
|
GROUP BY libname
|
||||||
ORDER BY count(libname) DESC;
|
ORDER BY count(libname) DESC;
|
||||||
|
|
||||||
-- Bonus: see location of every Oxford Public Library
|
-- Bonus: see location of every Oxford Public Library
|
||||||
SELECT libname, city, stabr
|
SELECT libname, city, stabr
|
||||||
FROM pls_fy2014_pupld14a
|
FROM pls_fy2018_libraries
|
||||||
WHERE libname = 'OXFORD PUBLIC LIBRARY';
|
WHERE libname = 'OXFORD PUBLIC LIBRARY';
|
||||||
|
|
||||||
-- Listing 9-6: Finding the most and fewest visits using max() and min()
|
-- Listing 9-6: Finding the most and fewest visits using max() and min()
|
||||||
SELECT max(visits), min(visits)
|
SELECT max(visits), min(visits)
|
||||||
FROM pls_fy2014_pupld14a;
|
FROM pls_fy2018_libraries;
|
||||||
|
|
||||||
-- Listing 9-7: Using GROUP BY on the stabr column
|
-- Listing 9-7: Using GROUP BY on the stabr column
|
||||||
|
|
||||||
-- There are 56 in 2014.
|
-- There are 55 in 2018.
|
||||||
SELECT stabr
|
SELECT stabr
|
||||||
FROM pls_fy2014_pupld14a
|
FROM pls_fy2018_libraries
|
||||||
GROUP BY stabr
|
GROUP BY stabr
|
||||||
ORDER BY stabr;
|
ORDER BY stabr;
|
||||||
|
|
||||||
-- Bonus: there are 55 in 2009.
|
-- Bonus: there are 54 in 2017.
|
||||||
SELECT stabr
|
SELECT stabr
|
||||||
FROM pls_fy2009_pupld09a
|
FROM pls_fy2017_libraries
|
||||||
GROUP BY stabr
|
GROUP BY stabr
|
||||||
ORDER BY stabr;
|
ORDER BY stabr;
|
||||||
|
|
||||||
-- Listing 9-8: Using GROUP BY on the city and stabr columns
|
-- Listing 9-8: Using GROUP BY on the city and stabr columns
|
||||||
|
|
||||||
SELECT city, stabr
|
SELECT city, stabr
|
||||||
FROM pls_fy2014_pupld14a
|
FROM pls_fy2018_libraries
|
||||||
GROUP BY city, stabr
|
GROUP BY city, stabr
|
||||||
ORDER BY city, stabr;
|
ORDER BY city, stabr;
|
||||||
|
|
||||||
-- Bonus: We can count some of the combos
|
-- Bonus: We can count the combos
|
||||||
SELECT city, stabr, count(*)
|
SELECT city, stabr, count(*)
|
||||||
FROM pls_fy2014_pupld14a
|
FROM pls_fy2018_libraries
|
||||||
GROUP BY city, stabr
|
GROUP BY city, stabr
|
||||||
ORDER BY count(*) DESC;
|
ORDER BY count(*) DESC;
|
||||||
|
|
||||||
-- Listing 9-9: GROUP BY with count() on the stabr column
|
-- Listing 9-9: Using GROUP BY with count() on the stabr column
|
||||||
|
|
||||||
SELECT stabr, count(*)
|
SELECT stabr, count(*)
|
||||||
FROM pls_fy2014_pupld14a
|
FROM pls_fy2018_libraries
|
||||||
GROUP BY stabr
|
GROUP BY stabr
|
||||||
ORDER BY count(*) DESC;
|
ORDER BY count(*) DESC;
|
||||||
|
|
||||||
-- Listing 9-10: GROUP BY with count() on the stabr and stataddr columns
|
-- Listing 9-10: GROUP BY with count() on the stabr and stataddr columns
|
||||||
|
|
||||||
SELECT stabr, stataddr, count(*)
|
SELECT stabr, stataddr, count(*)
|
||||||
FROM pls_fy2014_pupld14a
|
FROM pls_fy2018_libraries
|
||||||
GROUP BY stabr, stataddr
|
GROUP BY stabr, stataddr
|
||||||
ORDER BY stabr ASC, count(*) DESC;
|
ORDER BY stabr, stataddr;
|
||||||
|
|
||||||
-- Listing 9-11: Using the sum() aggregate function to total visits to
|
-- Listing 9-11: Using the sum() aggregate function to total visits to
|
||||||
-- libraries in 2014 and 2009
|
-- libraries in 2014 and 2009
|
||||||
|
|
||||||
-- 2014
|
-- 2018
|
||||||
SELECT sum(visits) AS visits_2014
|
SELECT sum(visits) AS visits_2018
|
||||||
FROM pls_fy2014_pupld14a
|
FROM pls_fy2018_libraries
|
||||||
WHERE visits >= 0;
|
WHERE visits >= 0;
|
||||||
|
|
||||||
-- 2009
|
-- 2017
|
||||||
SELECT sum(visits) AS visits_2009
|
SELECT sum(visits) AS visits_2017
|
||||||
FROM pls_fy2009_pupld09a
|
FROM pls_fy2017_libraries
|
||||||
WHERE visits >= 0;
|
WHERE visits >= 0;
|
||||||
|
|
||||||
-- Listing 9-12: Using sum() to total visits on joined 2014 and 2009 library tables
|
-- 2016
|
||||||
|
SELECT sum(visits) AS visits_2016
|
||||||
|
FROM pls_fy2016_libraries
|
||||||
|
WHERE visits >= 0;
|
||||||
|
|
||||||
|
-- Listing 9-12: Using sum() to total visits on joined 2018, 2017, and 2016 tables
|
||||||
|
|
||||||
|
SELECT sum(pls18.visits) AS visits_2018,
|
||||||
|
sum(pls17.visits) AS visits_2017,
|
||||||
|
sum(pls16.visits) AS visits_2016
|
||||||
|
FROM pls_fy2018_libraries pls18
|
||||||
|
JOIN pls_fy2017_libraries pls17 ON pls18.fscskey = pls17.fscskey
|
||||||
|
JOIN pls_fy2016_libraries pls16 ON pls18.fscskey = pls16.fscskey
|
||||||
|
WHERE pls18.visits >= 0
|
||||||
|
AND pls17.visits >= 0
|
||||||
|
AND pls16.visits >= 0;
|
||||||
|
|
||||||
|
-- Bonus: summing wifi sessions
|
||||||
|
SELECT sum(pls18.wifisess) AS wifi_2018,
|
||||||
|
sum(pls17.wifisess) AS wifi_2017,
|
||||||
|
sum(pls16.wifisess) AS wifi_2016
|
||||||
|
FROM pls_fy2018_libraries pls18
|
||||||
|
JOIN pls_fy2017_libraries pls17 ON pls18.fscskey = pls17.fscskey
|
||||||
|
JOIN pls_fy2016_libraries pls16 ON pls18.fscskey = pls16.fscskey
|
||||||
|
WHERE pls18.wifisess >= 0
|
||||||
|
AND pls17.wifisess >= 0
|
||||||
|
AND pls16.wifisess >= 0;
|
||||||
|
|
||||||
SELECT sum(pls14.visits) AS visits_2014,
|
|
||||||
sum(pls09.visits) AS visits_2009
|
|
||||||
FROM pls_fy2014_pupld14a pls14 JOIN pls_fy2009_pupld09a pls09
|
|
||||||
ON pls14.fscskey = pls09.fscskey
|
|
||||||
WHERE pls14.visits >= 0 AND pls09.visits >= 0;
|
|
||||||
|
|
||||||
-- Listing 9-13: Using GROUP BY to track percent change in library visits by state
|
-- Listing 9-13: Using GROUP BY to track percent change in library visits by state
|
||||||
|
|
||||||
SELECT pls14.stabr,
|
SELECT pls18.stabr,
|
||||||
sum(pls14.visits) AS visits_2014,
|
sum(pls18.visits) AS visits_2018,
|
||||||
sum(pls09.visits) AS visits_2009,
|
sum(pls17.visits) AS visits_2017,
|
||||||
round( (CAST(sum(pls14.visits) AS decimal(10,1)) - sum(pls09.visits)) /
|
sum(pls16.visits) AS visits_2016,
|
||||||
sum(pls09.visits) * 100, 2 ) AS pct_change
|
round( (sum(pls18.visits::numeric) - sum(pls17.visits)) /
|
||||||
FROM pls_fy2014_pupld14a pls14 JOIN pls_fy2009_pupld09a pls09
|
sum(pls17.visits) * 100, 1 ) AS chg_2018_17,
|
||||||
ON pls14.fscskey = pls09.fscskey
|
round( (sum(pls17.visits::numeric) - sum(pls16.visits)) /
|
||||||
WHERE pls14.visits >= 0 AND pls09.visits >= 0
|
sum(pls16.visits) * 100, 1 ) AS chg_2017_16
|
||||||
GROUP BY pls14.stabr
|
FROM pls_fy2018_libraries pls18
|
||||||
ORDER BY pct_change DESC;
|
JOIN pls_fy2017_libraries pls17 ON pls18.fscskey = pls17.fscskey
|
||||||
|
JOIN pls_fy2016_libraries pls16 ON pls18.fscskey = pls16.fscskey
|
||||||
|
WHERE pls18.visits >= 0
|
||||||
|
AND pls17.visits >= 0
|
||||||
|
AND pls16.visits >= 0
|
||||||
|
GROUP BY pls18.stabr
|
||||||
|
ORDER BY chg_2018_17 DESC;
|
||||||
|
|
||||||
-- Listing 9-14: Using HAVING to filter the results of an aggregate query
|
-- Listing 9-14: Using HAVING to filter the results of an aggregate query
|
||||||
|
|
||||||
SELECT pls14.stabr,
|
SELECT pls18.stabr,
|
||||||
sum(pls14.visits) AS visits_2014,
|
sum(pls18.visits) AS visits_2018,
|
||||||
sum(pls09.visits) AS visits_2009,
|
sum(pls17.visits) AS visits_2017,
|
||||||
round( (CAST(sum(pls14.visits) AS decimal(10,1)) - sum(pls09.visits)) /
|
sum(pls16.visits) AS visits_2016,
|
||||||
sum(pls09.visits) * 100, 2 ) AS pct_change
|
round( (sum(pls18.visits::numeric) - sum(pls17.visits)) /
|
||||||
FROM pls_fy2014_pupld14a pls14 JOIN pls_fy2009_pupld09a pls09
|
sum(pls17.visits) * 100, 1 ) AS chg_2018_17,
|
||||||
ON pls14.fscskey = pls09.fscskey
|
round( (sum(pls17.visits::numeric) - sum(pls16.visits)) /
|
||||||
WHERE pls14.visits >= 0 AND pls09.visits >= 0
|
sum(pls16.visits) * 100, 1 ) AS chg_2017_16
|
||||||
GROUP BY pls14.stabr
|
FROM pls_fy2018_libraries pls18
|
||||||
HAVING sum(pls14.visits) > 50000000
|
JOIN pls_fy2017_libraries pls17 ON pls18.fscskey = pls17.fscskey
|
||||||
ORDER BY pct_change DESC;
|
JOIN pls_fy2016_libraries pls16 ON pls18.fscskey = pls16.fscskey
|
||||||
|
WHERE pls18.visits >= 0
|
||||||
|
AND pls17.visits >= 0
|
||||||
|
AND pls16.visits >= 0
|
||||||
|
GROUP BY pls18.stabr
|
||||||
|
HAVING sum(pls18.visits) > 50000000
|
||||||
|
ORDER BY chg_2018_17 DESC;
|
||||||
|
|
||||||
|
|||||||
@ -470,5 +470,140 @@ CREATE TABLE songs (
|
|||||||
-- to perform many queries that include date ranges.
|
-- to perform many queries that include date ranges.
|
||||||
|
|
||||||
|
|
||||||
|
----------------------------------------------------------------------------
|
||||||
|
-- Chapter 9: Extracting Information by Grouping and Summarizing
|
||||||
|
----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
-- 1. We saw that library visits have declined recently in most places. But
|
||||||
|
-- what is the pattern in library employment? All three library survey tables
|
||||||
|
-- contain the column totstaff, which is the number of paid full-time equivalent
|
||||||
|
-- employees. Modify the code in Listings 9-13 and 9-14 to calculate the
|
||||||
|
-- percent change in the sum of the column over time, examining all states as
|
||||||
|
-- well as states with the most visitors. Watch out for negative values!
|
||||||
|
|
||||||
|
-- Answer (all states):
|
||||||
|
|
||||||
|
SELECT pls18.stabr,
|
||||||
|
sum(pls18.totstaff) AS totstaff_2018,
|
||||||
|
sum(pls17.totstaff) AS totstaff_2017,
|
||||||
|
sum(pls16.totstaff) AS totstaff_2016,
|
||||||
|
round( (sum(pls18.totstaff::numeric) - sum(pls17.totstaff)) /
|
||||||
|
sum(pls17.totstaff) * 100, 1 ) AS chg_2018_17,
|
||||||
|
round( (sum(pls17.totstaff::numeric) - sum(pls16.totstaff)) /
|
||||||
|
sum(pls16.hrs_open) * 100, 1 ) AS chg_2017_16
|
||||||
|
FROM pls_fy2018_libraries pls18
|
||||||
|
JOIN pls_fy2017_libraries pls17 ON pls18.fscskey = pls17.fscskey
|
||||||
|
JOIN pls_fy2016_libraries pls16 ON pls18.fscskey = pls16.fscskey
|
||||||
|
WHERE pls18.totstaff >= 0
|
||||||
|
AND pls17.totstaff >= 0
|
||||||
|
AND pls16.totstaff >= 0
|
||||||
|
GROUP BY pls18.stabr
|
||||||
|
ORDER BY chg_2018_17 DESC;
|
||||||
|
|
||||||
|
-- Answer (filtered with HAVING):
|
||||||
|
|
||||||
|
SELECT pls18.stabr,
|
||||||
|
sum(pls18.totstaff) AS totstaff_2018,
|
||||||
|
sum(pls17.totstaff) AS totstaff_2017,
|
||||||
|
sum(pls16.totstaff) AS totstaff_2016,
|
||||||
|
round( (sum(pls18.totstaff::numeric) - sum(pls17.totstaff)) /
|
||||||
|
sum(pls17.totstaff) * 100, 1 ) AS chg_2018_17,
|
||||||
|
round( (sum(pls17.totstaff::numeric) - sum(pls16.totstaff)) /
|
||||||
|
sum(pls16.hrs_open) * 100, 1 ) AS chg_2017_16
|
||||||
|
FROM pls_fy2018_libraries pls18
|
||||||
|
JOIN pls_fy2017_libraries pls17 ON pls18.fscskey = pls17.fscskey
|
||||||
|
JOIN pls_fy2016_libraries pls16 ON pls18.fscskey = pls16.fscskey
|
||||||
|
WHERE pls18.totstaff >= 0
|
||||||
|
AND pls17.totstaff >= 0
|
||||||
|
AND pls16.totstaff >= 0
|
||||||
|
GROUP BY pls18.stabr
|
||||||
|
HAVING sum(pls18.visits) > 50000000
|
||||||
|
ORDER BY chg_2018_17 DESC;
|
||||||
|
|
||||||
|
-- 2. The library survey tables contain a column called obereg, a two-digit
|
||||||
|
-- Bureau of Economic Analysis Code that classifies each library agency
|
||||||
|
-- according to a region of the United States, such as New England, Rocky
|
||||||
|
-- Mountains, and so on. Just as we calculated the percent change in visits
|
||||||
|
-- grouped by state, do the same to group percent changes in visits by U.S.
|
||||||
|
-- region using obereg. Consult the survey documentation to find the meaning
|
||||||
|
-- of each region code. For a bonus challenge, create a table with the obereg
|
||||||
|
-- code as the primary key and the region name as text, and join it to the
|
||||||
|
-- summary query to group by the region name rather than the code.
|
||||||
|
|
||||||
|
-- Answer:
|
||||||
|
|
||||||
|
-- a) sum() visits by region.
|
||||||
|
|
||||||
|
SELECT pls18.obereg,
|
||||||
|
sum(pls18.visits) AS visits_2018,
|
||||||
|
sum(pls17.visits) AS visits_2017,
|
||||||
|
sum(pls16.visits) AS visits_2016,
|
||||||
|
round( (sum(pls18.visits::numeric) - sum(pls17.visits)) /
|
||||||
|
sum(pls17.visits) * 100, 1 ) AS chg_2018_17,
|
||||||
|
round( (sum(pls17.visits::numeric) - sum(pls16.visits)) /
|
||||||
|
sum(pls16.visits) * 100, 1 ) AS chg_2017_16
|
||||||
|
FROM pls_fy2018_libraries pls18
|
||||||
|
JOIN pls_fy2017_libraries pls17 ON pls18.fscskey = pls17.fscskey
|
||||||
|
JOIN pls_fy2016_libraries pls16 ON pls18.fscskey = pls16.fscskey
|
||||||
|
WHERE pls18.visits >= 0
|
||||||
|
AND pls17.visits >= 0
|
||||||
|
AND pls16.visits >= 0
|
||||||
|
GROUP BY pls18.obereg
|
||||||
|
ORDER BY chg_2018_17 DESC;
|
||||||
|
|
||||||
|
-- b) Bonus: creating the regions lookup table and adding it to the query.
|
||||||
|
|
||||||
|
CREATE TABLE obereg_codes (
|
||||||
|
obereg text CONSTRAINT obereg_key PRIMARY KEY,
|
||||||
|
region text
|
||||||
|
);
|
||||||
|
|
||||||
|
INSERT INTO obereg_codes
|
||||||
|
VALUES ('01', 'New England (CT ME MA NH RI VT)'),
|
||||||
|
('02', 'Mid East (DE DC MD NJ NY PA)'),
|
||||||
|
('03', 'Great Lakes (IL IN MI OH WI)'),
|
||||||
|
('04', 'Plains (IA KS MN MO NE ND SD)'),
|
||||||
|
('05', 'Southeast (AL AR FL GA KY LA MS NC SC TN VA WV)'),
|
||||||
|
('06', 'Soutwest (AZ NM OK TX)'),
|
||||||
|
('07', 'Rocky Mountains (CO ID MT UT WY)'),
|
||||||
|
('08', 'Far West (AK CA HI NV OR WA)'),
|
||||||
|
('09', 'Outlying Areas (AS GU MP PR VI)');
|
||||||
|
|
||||||
|
-- sum() visits by region.
|
||||||
|
|
||||||
|
SELECT obereg_codes.region,
|
||||||
|
sum(pls18.visits) AS visits_2018,
|
||||||
|
sum(pls17.visits) AS visits_2017,
|
||||||
|
sum(pls16.visits) AS visits_2016,
|
||||||
|
round( (sum(pls18.visits::numeric) - sum(pls17.visits)) /
|
||||||
|
sum(pls17.visits) * 100, 1 ) AS chg_2018_17,
|
||||||
|
round( (sum(pls17.visits::numeric) - sum(pls16.visits)) /
|
||||||
|
sum(pls16.visits) * 100, 1 ) AS chg_2017_16
|
||||||
|
FROM pls_fy2018_libraries pls18
|
||||||
|
JOIN pls_fy2017_libraries pls17 ON pls18.fscskey = pls17.fscskey
|
||||||
|
JOIN pls_fy2016_libraries pls16 ON pls18.fscskey = pls16.fscskey
|
||||||
|
JOIN obereg_codes ON pls18.obereg = obereg_codes.obereg
|
||||||
|
WHERE pls18.visits >= 0
|
||||||
|
AND pls17.visits >= 0
|
||||||
|
AND pls16.visits >= 0
|
||||||
|
GROUP BY obereg_codes.region
|
||||||
|
ORDER BY chg_2018_17 DESC;
|
||||||
|
|
||||||
|
-- 3. Thinking back to the types of joins you learned in Chapter 7,
|
||||||
|
-- which join type will show you all the rows in all three tables,
|
||||||
|
-- including those without a match? Write such a query and add an
|
||||||
|
-- IS NULL filter in a WHERE clause to show agencies not included
|
||||||
|
-- in one or more of the tables.
|
||||||
|
|
||||||
|
-- Answer: a FULL OUTER JOIN will show all rows in both tables.
|
||||||
|
|
||||||
|
SELECT pls18.libname, pls18.city, pls18.stabr, pls18.statstru,
|
||||||
|
pls17.libname, pls17.city, pls17.stabr, pls17.statstru,
|
||||||
|
pls16.libname, pls16.city, pls16.stabr, pls16.statstru
|
||||||
|
FROM pls_fy2018_libraries pls18
|
||||||
|
FULL OUTER JOIN pls_fy2017_libraries pls17 ON pls18.fscskey = pls17.fscskey
|
||||||
|
FULL OUTER JOIN pls_fy2016_libraries pls16 ON pls18.fscskey = pls16.fscskey
|
||||||
|
WHERE pls16.fscskey IS NULL OR pls17.fscskey IS NULL;
|
||||||
|
|
||||||
|
-- Note: The IS NULL statements in the WHERE clause limit results to those
|
||||||
|
-- that do not appear in one or more tables.
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user