--------------------------------------------------------------------------- -- Practical SQL: A Beginner's Guide to Storytelling with Data, 2nd Edition -- by Anthony DeBarros -- Chapter 9 Code Examples ---------------------------------------------------------------------------- -- Listing 9-1: Creating and filling the 2018 Public Libraries Survey table CREATE TABLE pls_fy2018_libraries ( stabr text NOT NULL, fscskey text CONSTRAINT fscskey_2018_pkey PRIMARY KEY, libid text NOT NULL, libname text NOT NULL, address text NOT NULL, city text NOT NULL, zip text NOT NULL, county text NOT NULL, phone text NOT NULL, c_relatn text NOT NULL, c_legbas text NOT NULL, c_admin text NOT NULL, c_fscs text NOT NULL, geocode text NOT NULL, lsabound text NOT NULL, startdate text NOT NULL, enddate text NOT NULL, popu_lsa integer NOT NULL, popu_und integer NOT NULL, centlib integer NOT NULL, branlib integer NOT NULL, bkmob integer NOT NULL, totstaff numeric(8,2) NOT NULL, bkvol integer NOT NULL, ebook integer NOT NULL, audio_ph integer NOT NULL, audio_dl integer NOT NULL, video_ph integer NOT NULL, video_dl integer NOT NULL, ec_lo_ot integer NOT NULL, subscrip integer NOT NULL, hrs_open integer NOT NULL, visits integer NOT NULL, reference integer NOT NULL, regbor integer NOT NULL, totcir integer NOT NULL, kidcircl integer NOT NULL, totpro integer NOT NULL, gpterms integer NOT NULL, pitusr integer NOT NULL, wifisess integer NOT NULL, obereg text NOT NULL, statstru text NOT NULL, statname text NOT NULL, stataddr text NOT NULL, longitude numeric(10,7) NOT NULL, latitude numeric(10,7) NOT NULL ); COPY pls_fy2018_libraries FROM 'C:\YourDirectory\pls_fy2018_libraries.csv' WITH (FORMAT CSV, HEADER); CREATE INDEX libname_2018_idx ON pls_fy2018_libraries (libname); -- Listing 9-2: Creating and filling the 2017 and 2016 Public Libraries Survey tables CREATE TABLE pls_fy2017_libraries ( stabr text NOT NULL, fscskey text CONSTRAINT fscskey_17_pkey PRIMARY KEY, libid text NOT NULL, libname text NOT NULL, address text NOT NULL, city text NOT NULL, zip text NOT NULL, county text NOT NULL, phone text NOT NULL, c_relatn text NOT NULL, c_legbas text NOT NULL, c_admin text NOT NULL, c_fscs text NOT NULL, geocode text NOT NULL, lsabound text NOT NULL, startdate text NOT NULL, enddate text NOT NULL, popu_lsa integer NOT NULL, popu_und integer NOT NULL, centlib integer NOT NULL, branlib integer NOT NULL, bkmob integer NOT NULL, totstaff numeric(8,2) NOT NULL, bkvol integer NOT NULL, ebook integer NOT NULL, audio_ph integer NOT NULL, audio_dl integer NOT NULL, video_ph integer NOT NULL, video_dl integer NOT NULL, ec_lo_ot integer NOT NULL, subscrip integer NOT NULL, hrs_open integer NOT NULL, visits integer NOT NULL, reference integer NOT NULL, regbor integer NOT NULL, totcir integer NOT NULL, kidcircl integer NOT NULL, totpro integer NOT NULL, gpterms integer NOT NULL, pitusr integer NOT NULL, wifisess integer NOT NULL, obereg text NOT NULL, statstru text NOT NULL, statname text NOT NULL, stataddr text NOT NULL, longitude numeric(10,7) NOT NULL, latitude numeric(10,7) NOT NULL ); CREATE TABLE pls_fy2016_libraries ( stabr text NOT NULL, fscskey text CONSTRAINT fscskey_16_pkey PRIMARY KEY, libid text NOT NULL, libname text NOT NULL, address text NOT NULL, city text NOT NULL, zip text NOT NULL, county text NOT NULL, phone text NOT NULL, c_relatn text NOT NULL, c_legbas text NOT NULL, c_admin text NOT NULL, c_fscs text NOT NULL, geocode text NOT NULL, lsabound text NOT NULL, startdate text NOT NULL, enddate text NOT NULL, popu_lsa integer NOT NULL, popu_und integer NOT NULL, centlib integer NOT NULL, branlib integer NOT NULL, bkmob integer NOT NULL, totstaff numeric(8,2) NOT NULL, bkvol integer NOT NULL, ebook integer NOT NULL, audio_ph integer NOT NULL, audio_dl integer NOT NULL, video_ph integer NOT NULL, video_dl integer NOT NULL, ec_lo_ot integer NOT NULL, subscrip integer NOT NULL, hrs_open integer NOT NULL, visits integer NOT NULL, reference integer NOT NULL, regbor integer NOT NULL, totcir integer NOT NULL, kidcircl integer NOT NULL, totpro integer NOT NULL, gpterms integer NOT NULL, pitusr integer NOT NULL, wifisess integer NOT NULL, obereg text NOT NULL, statstru text NOT NULL, statname text NOT NULL, stataddr text NOT NULL, longitude numeric(10,7) NOT NULL, latitude numeric(10,7) NOT NULL ); COPY pls_fy2017_libraries FROM 'C:\YourDirectory\pls_fy2017_libraries.csv' WITH (FORMAT CSV, HEADER); COPY pls_fy2016_libraries FROM 'C:\YourDirectory\pls_fy2016_libraries.csv' WITH (FORMAT CSV, HEADER); CREATE INDEX libname_2017_idx ON pls_fy2017_libraries (libname); CREATE INDEX libname_2016_idx ON pls_fy2016_libraries (libname); -- Listing 9-3: Using count() for table row counts SELECT count(*) FROM pls_fy2018_libraries; SELECT count(*) FROM pls_fy2017_libraries; SELECT count(*) FROM pls_fy2016_libraries; -- Listing 9-4: Using count() for the number of values in a column SELECT count(phone) FROM pls_fy2018_libraries; -- Listing 9-5: Using count() for the number of distinct values in a column SELECT count(libname) FROM pls_fy2018_libraries; SELECT count(DISTINCT libname) FROM pls_fy2018_libraries; -- Bonus: find duplicate libnames SELECT libname, count(libname) FROM pls_fy2018_libraries GROUP BY libname ORDER BY count(libname) DESC; -- Bonus: see location of every Oxford Public Library SELECT libname, city, stabr FROM pls_fy2018_libraries WHERE libname = 'OXFORD PUBLIC LIBRARY'; -- Listing 9-6: Finding the most and fewest visits using max() and min() SELECT max(visits), min(visits) FROM pls_fy2018_libraries; -- Listing 9-7: Using GROUP BY on the stabr column -- There are 55 in 2018. SELECT stabr FROM pls_fy2018_libraries GROUP BY stabr ORDER BY stabr; -- Bonus: there are 54 in 2017. SELECT stabr FROM pls_fy2017_libraries GROUP BY stabr ORDER BY stabr; -- Listing 9-8: Using GROUP BY on the city and stabr columns SELECT city, stabr FROM pls_fy2018_libraries GROUP BY city, stabr ORDER BY city, stabr; -- Bonus: We can count the combos SELECT city, stabr, count(*) FROM pls_fy2018_libraries GROUP BY city, stabr ORDER BY count(*) DESC; -- Listing 9-9: Using GROUP BY with count() on the stabr column SELECT stabr, count(*) FROM pls_fy2018_libraries GROUP BY stabr ORDER BY count(*) DESC; -- Listing 9-10: GROUP BY with count() on the stabr and stataddr columns SELECT stabr, stataddr, count(*) FROM pls_fy2018_libraries GROUP BY stabr, stataddr ORDER BY stabr, stataddr; -- Listing 9-11: Using the sum() aggregate function to total visits to -- libraries in 2014 and 2009 -- 2018 SELECT sum(visits) AS visits_2018 FROM pls_fy2018_libraries WHERE visits >= 0; -- 2017 SELECT sum(visits) AS visits_2017 FROM pls_fy2017_libraries WHERE visits >= 0; -- 2016 SELECT sum(visits) AS visits_2016 FROM pls_fy2016_libraries WHERE visits >= 0; -- Listing 9-12: Using sum() to total visits on joined 2018, 2017, and 2016 tables SELECT sum(pls18.visits) AS visits_2018, sum(pls17.visits) AS visits_2017, sum(pls16.visits) AS visits_2016 FROM pls_fy2018_libraries pls18 JOIN pls_fy2017_libraries pls17 ON pls18.fscskey = pls17.fscskey JOIN pls_fy2016_libraries pls16 ON pls18.fscskey = pls16.fscskey WHERE pls18.visits >= 0 AND pls17.visits >= 0 AND pls16.visits >= 0; -- Bonus: summing wifi sessions SELECT sum(pls18.wifisess) AS wifi_2018, sum(pls17.wifisess) AS wifi_2017, sum(pls16.wifisess) AS wifi_2016 FROM pls_fy2018_libraries pls18 JOIN pls_fy2017_libraries pls17 ON pls18.fscskey = pls17.fscskey JOIN pls_fy2016_libraries pls16 ON pls18.fscskey = pls16.fscskey WHERE pls18.wifisess >= 0 AND pls17.wifisess >= 0 AND pls16.wifisess >= 0; -- Listing 9-13: Using GROUP BY to track percent change in library visits by state SELECT pls18.stabr, sum(pls18.visits) AS visits_2018, sum(pls17.visits) AS visits_2017, sum(pls16.visits) AS visits_2016, round( (sum(pls18.visits::numeric) - sum(pls17.visits)) / sum(pls17.visits) * 100, 1 ) AS chg_2018_17, round( (sum(pls17.visits::numeric) - sum(pls16.visits)) / sum(pls16.visits) * 100, 1 ) AS chg_2017_16 FROM pls_fy2018_libraries pls18 JOIN pls_fy2017_libraries pls17 ON pls18.fscskey = pls17.fscskey JOIN pls_fy2016_libraries pls16 ON pls18.fscskey = pls16.fscskey WHERE pls18.visits >= 0 AND pls17.visits >= 0 AND pls16.visits >= 0 GROUP BY pls18.stabr ORDER BY chg_2018_17 DESC; -- Listing 9-14: Using HAVING to filter the results of an aggregate query SELECT pls18.stabr, sum(pls18.visits) AS visits_2018, sum(pls17.visits) AS visits_2017, sum(pls16.visits) AS visits_2016, round( (sum(pls18.visits::numeric) - sum(pls17.visits)) / sum(pls17.visits) * 100, 1 ) AS chg_2018_17, round( (sum(pls17.visits::numeric) - sum(pls16.visits)) / sum(pls16.visits) * 100, 1 ) AS chg_2017_16 FROM pls_fy2018_libraries pls18 JOIN pls_fy2017_libraries pls17 ON pls18.fscskey = pls17.fscskey JOIN pls_fy2016_libraries pls16 ON pls18.fscskey = pls16.fscskey WHERE pls18.visits >= 0 AND pls17.visits >= 0 AND pls16.visits >= 0 GROUP BY pls18.stabr HAVING sum(pls18.visits) > 50000000 ORDER BY chg_2018_17 DESC;