practical-sql-2/Chapter_09/Chapter_09.sql
2020-02-26 21:43:21 -05:00

300 lines
8.7 KiB
SQL

--------------------------------------------------------------
-- Practical SQL: A Beginner's Guide to Storytelling with Data
-- by Anthony DeBarros
-- Chapter 8 Code Examples
--------------------------------------------------------------
-- Listing 8-1: Creating and filling the 2014 Public Libraries Survey table
CREATE TABLE pls_fy2014_pupld14a (
stabr varchar(2) NOT NULL,
fscskey varchar(6) CONSTRAINT fscskey2014_key PRIMARY KEY,
libid varchar(20) NOT NULL,
libname varchar(100) NOT NULL,
obereg varchar(2) NOT NULL,
rstatus integer NOT NULL,
statstru varchar(2) NOT NULL,
statname varchar(2) NOT NULL,
stataddr varchar(2) NOT NULL,
longitud numeric(10,7) NOT NULL,
latitude numeric(10,7) NOT NULL,
fipsst varchar(2) NOT NULL,
fipsco varchar(3) NOT NULL,
address varchar(35) NOT NULL,
city varchar(20) NOT NULL,
zip varchar(5) NOT NULL,
zip4 varchar(4) NOT NULL,
cnty varchar(20) NOT NULL,
phone varchar(10) NOT NULL,
c_relatn varchar(2) NOT NULL,
c_legbas varchar(2) NOT NULL,
c_admin varchar(2) NOT NULL,
geocode varchar(3) NOT NULL,
lsabound varchar(1) NOT NULL,
startdat varchar(10),
enddate varchar(10),
popu_lsa integer NOT NULL,
centlib integer NOT NULL,
branlib integer NOT NULL,
bkmob integer NOT NULL,
master numeric(8,2) NOT NULL,
libraria numeric(8,2) NOT NULL,
totstaff numeric(8,2) NOT NULL,
locgvt integer NOT NULL,
stgvt integer NOT NULL,
fedgvt integer NOT NULL,
totincm integer NOT NULL,
salaries integer,
benefit integer,
staffexp integer,
prmatexp integer NOT NULL,
elmatexp integer NOT NULL,
totexpco integer NOT NULL,
totopexp integer NOT NULL,
lcap_rev integer NOT NULL,
scap_rev integer NOT NULL,
fcap_rev integer NOT NULL,
cap_rev integer NOT NULL,
capital integer NOT NULL,
bkvol integer NOT NULL,
ebook integer NOT NULL,
audio_ph integer NOT NULL,
audio_dl float NOT NULL,
video_ph integer NOT NULL,
video_dl float NOT NULL,
databases integer NOT NULL,
subscrip integer NOT NULL,
hrs_open integer NOT NULL,
visits integer NOT NULL,
referenc integer NOT NULL,
regbor integer NOT NULL,
totcir integer NOT NULL,
kidcircl integer NOT NULL,
elmatcir integer NOT NULL,
loanto integer NOT NULL,
loanfm integer NOT NULL,
totpro integer NOT NULL,
totatten integer NOT NULL,
gpterms integer NOT NULL,
pitusr integer NOT NULL,
wifisess integer NOT NULL,
yr_sub integer NOT NULL
);
CREATE INDEX libname2014_idx ON pls_fy2014_pupld14a (libname);
CREATE INDEX stabr2014_idx ON pls_fy2014_pupld14a (stabr);
CREATE INDEX city2014_idx ON pls_fy2014_pupld14a (city);
CREATE INDEX visits2014_idx ON pls_fy2014_pupld14a (visits);
COPY pls_fy2014_pupld14a
FROM 'C:\YourDirectory\pls_fy2014_pupld14a.csv'
WITH (FORMAT CSV, HEADER);
-- Listing 8-2: Creating and filling the 2009 Public Libraries Survey table
CREATE TABLE pls_fy2009_pupld09a (
stabr varchar(2) NOT NULL,
fscskey varchar(6) CONSTRAINT fscskey2009_key PRIMARY KEY,
libid varchar(20) NOT NULL,
libname varchar(100) NOT NULL,
address varchar(35) NOT NULL,
city varchar(20) NOT NULL,
zip varchar(5) NOT NULL,
zip4 varchar(4) NOT NULL,
cnty varchar(20) NOT NULL,
phone varchar(10) NOT NULL,
c_relatn varchar(2) NOT NULL,
c_legbas varchar(2) NOT NULL,
c_admin varchar(2) NOT NULL,
geocode varchar(3) NOT NULL,
lsabound varchar(1) NOT NULL,
startdat varchar(10),
enddate varchar(10),
popu_lsa integer NOT NULL,
centlib integer NOT NULL,
branlib integer NOT NULL,
bkmob integer NOT NULL,
master numeric(8,2) NOT NULL,
libraria numeric(8,2) NOT NULL,
totstaff numeric(8,2) NOT NULL,
locgvt integer NOT NULL,
stgvt integer NOT NULL,
fedgvt integer NOT NULL,
totincm integer NOT NULL,
salaries integer,
benefit integer,
staffexp integer,
prmatexp integer NOT NULL,
elmatexp integer NOT NULL,
totexpco integer NOT NULL,
totopexp integer NOT NULL,
lcap_rev integer NOT NULL,
scap_rev integer NOT NULL,
fcap_rev integer NOT NULL,
cap_rev integer NOT NULL,
capital integer NOT NULL,
bkvol integer NOT NULL,
ebook integer NOT NULL,
audio integer NOT NULL,
video integer NOT NULL,
databases integer NOT NULL,
subscrip integer NOT NULL,
hrs_open integer NOT NULL,
visits integer NOT NULL,
referenc integer NOT NULL,
regbor integer NOT NULL,
totcir integer NOT NULL,
kidcircl integer NOT NULL,
loanto integer NOT NULL,
loanfm integer NOT NULL,
totpro integer NOT NULL,
totatten integer NOT NULL,
gpterms integer NOT NULL,
pitusr integer NOT NULL,
yr_sub integer NOT NULL,
obereg varchar(2) NOT NULL,
rstatus integer NOT NULL,
statstru varchar(2) NOT NULL,
statname varchar(2) NOT NULL,
stataddr varchar(2) NOT NULL,
longitud numeric(10,7) NOT NULL,
latitude numeric(10,7) NOT NULL,
fipsst varchar(2) NOT NULL,
fipsco varchar(3) NOT NULL
);
CREATE INDEX libname2009_idx ON pls_fy2009_pupld09a (libname);
CREATE INDEX stabr2009_idx ON pls_fy2009_pupld09a (stabr);
CREATE INDEX city2009_idx ON pls_fy2009_pupld09a (city);
CREATE INDEX visits2009_idx ON pls_fy2009_pupld09a (visits);
COPY pls_fy2009_pupld09a
FROM 'C:\YourDirectory\pls_fy2009_pupld09a.csv'
WITH (FORMAT CSV, HEADER);
-- Listing 8-3: Using count() for table row counts
SELECT count(*)
FROM pls_fy2014_pupld14a;
SELECT count(*)
FROM pls_fy2009_pupld09a;
-- Listing 8-4: Using count() for the number of values in a column
SELECT count(salaries)
FROM pls_fy2014_pupld14a;
-- Listing 8-5: Using count() for the number of distinct values in a column
SELECT count(libname)
FROM pls_fy2014_pupld14a;
SELECT count(DISTINCT libname)
FROM pls_fy2014_pupld14a;
-- Bonus: find duplicate libnames
SELECT libname, count(libname)
FROM pls_fy2014_pupld14a
GROUP BY libname
ORDER BY count(libname) DESC;
-- Bonus: see location of every Oxford Public Library
SELECT libname, city, stabr
FROM pls_fy2014_pupld14a
WHERE libname = 'OXFORD PUBLIC LIBRARY';
-- Listing 8-6: Finding the most and fewest visits using max() and min()
SELECT max(visits), min(visits)
FROM pls_fy2014_pupld14a;
-- Listing 8-7: Using GROUP BY on the stabr column
-- There are 56 in 2014.
SELECT stabr
FROM pls_fy2014_pupld14a
GROUP BY stabr
ORDER BY stabr;
-- Bonus: there are 55 in 2009.
SELECT stabr
FROM pls_fy2009_pupld09a
GROUP BY stabr
ORDER BY stabr;
-- Listing 8-8: Using GROUP BY on the city and stabr columns
SELECT city, stabr
FROM pls_fy2014_pupld14a
GROUP BY city, stabr
ORDER BY city, stabr;
-- Bonus: We can count some of the combos
SELECT city, stabr, count(*)
FROM pls_fy2014_pupld14a
GROUP BY city, stabr
ORDER BY count(*) DESC;
-- Listing 8-9: GROUP BY with count() on the stabr column
SELECT stabr, count(*)
FROM pls_fy2014_pupld14a
GROUP BY stabr
ORDER BY count(*) DESC;
-- Listing 8-10: GROUP BY with count() on the stabr and stataddr columns
SELECT stabr, stataddr, count(*)
FROM pls_fy2014_pupld14a
GROUP BY stabr, stataddr
ORDER BY stabr ASC, count(*) DESC;
-- Listing 8-11: Using the sum() aggregate function to total visits to
-- libraries in 2014 and 2009
-- 2014
SELECT sum(visits) AS visits_2014
FROM pls_fy2014_pupld14a
WHERE visits >= 0;
-- 2009
SELECT sum(visits) AS visits_2009
FROM pls_fy2009_pupld09a
WHERE visits >= 0;
-- Listing 8-12: Using sum() to total visits on joined 2014 and 2009 library tables
SELECT sum(pls14.visits) AS visits_2014,
sum(pls09.visits) AS visits_2009
FROM pls_fy2014_pupld14a pls14 JOIN pls_fy2009_pupld09a pls09
ON pls14.fscskey = pls09.fscskey
WHERE pls14.visits >= 0 AND pls09.visits >= 0;
-- Listing 8-13: Using GROUP BY to track percent change in library visits by state
SELECT pls14.stabr,
sum(pls14.visits) AS visits_2014,
sum(pls09.visits) AS visits_2009,
round( (CAST(sum(pls14.visits) AS decimal(10,1)) - sum(pls09.visits)) /
sum(pls09.visits) * 100, 2 ) AS pct_change
FROM pls_fy2014_pupld14a pls14 JOIN pls_fy2009_pupld09a pls09
ON pls14.fscskey = pls09.fscskey
WHERE pls14.visits >= 0 AND pls09.visits >= 0
GROUP BY pls14.stabr
ORDER BY pct_change DESC;
-- Listing 8-14: Using HAVING to filter the results of an aggregate query
SELECT pls14.stabr,
sum(pls14.visits) AS visits_2014,
sum(pls09.visits) AS visits_2009,
round( (CAST(sum(pls14.visits) AS decimal(10,1)) - sum(pls09.visits)) /
sum(pls09.visits) * 100, 2 ) AS pct_change
FROM pls_fy2014_pupld14a pls14 JOIN pls_fy2009_pupld09a pls09
ON pls14.fscskey = pls09.fscskey
WHERE pls14.visits >= 0 AND pls09.visits >= 0
GROUP BY pls14.stabr
HAVING sum(pls14.visits) > 50000000
ORDER BY pct_change DESC;