Chapter 7 data and code updates

This commit is contained in:
anthonydb 2020-09-04 18:23:11 -04:00
parent a7e51bbb39
commit 7c60ce3b9c
4 changed files with 3315 additions and 80 deletions

View File

@ -2,7 +2,7 @@
-- Practical SQL: A Beginner's Guide to Storytelling with Data, 2nd Edition -- Practical SQL: A Beginner's Guide to Storytelling with Data, 2nd Edition
-- by Anthony DeBarros -- by Anthony DeBarros
-- Chapter 7 Code Examples -- Chapter 7 Code Examples
---------------------------------------------------------------------------- ----------------------------------------------------------------------------
@ -46,152 +46,183 @@ ON employees.dept_id = departments.dept_id;
-- Listing 7-3: Creating two tables to explore JOIN types -- Listing 7-3: Creating two tables to explore JOIN types
CREATE TABLE schools_left ( CREATE TABLE district_2020 (
id integer CONSTRAINT left_id_key PRIMARY KEY, id integer CONSTRAINT id_key_2020 PRIMARY KEY,
left_school varchar(30) school_2020 text
); );
CREATE TABLE schools_right ( CREATE TABLE district_2035 (
id integer CONSTRAINT right_id_key PRIMARY KEY, id integer CONSTRAINT id_key_2035 PRIMARY KEY,
right_school varchar(30) school_2035 text
); );
INSERT INTO schools_left (id, left_school) VALUES INSERT INTO district_2020 VALUES
(1, 'Oak Street School'), (1, 'Oak Street School'),
(2, 'Roosevelt High School'), (2, 'Roosevelt High School'),
(5, 'Washington Middle School'), (5, 'Dover Middle School'),
(6, 'Jefferson High School'); (6, 'Webutuck High School');
INSERT INTO schools_right (id, right_school) VALUES INSERT INTO district_2035 VALUES
(1, 'Oak Street School'), (1, 'Oak Street School'),
(2, 'Roosevelt High School'), (2, 'Roosevelt High School'),
(3, 'Morrison Elementary'), (3, 'Morrison Elementary'),
(4, 'Chase Magnet Academy'), (4, 'Chase Magnet Academy'),
(6, 'Jefferson High School'); (6, 'Webutuck High School');
-- Listing 7-4: Using JOIN -- Listing 7-4: Using JOIN
SELECT * SELECT *
FROM schools_left JOIN schools_right FROM district_2020 JOIN district_2035
ON schools_left.id = schools_right.id; ON district_2020.id = district_2035.id;
-- Bonus: Also can be specified as INNER JOIN -- Bonus: Also can be specified as INNER JOIN
SELECT * SELECT *
FROM schools_left INNER JOIN schools_right FROM district_2020 INNER JOIN district_2035
ON schools_left.id = schools_right.id; ON district_2020.id = district_2035.id;
-- Listing 7-5: Using LEFT JOIN -- Listing 7-5: Using LEFT JOIN
SELECT * SELECT *
FROM schools_left LEFT JOIN schools_right FROM district_2020 LEFT JOIN district_2035
ON schools_left.id = schools_right.id; ON district_2020.id = district_2035.id;
-- Listing 7-6: Using RIGHT JOIN -- Listing 7-6: Using RIGHT JOIN
SELECT * SELECT *
FROM schools_left RIGHT JOIN schools_right FROM district_2020 RIGHT JOIN district_2035
ON schools_left.id = schools_right.id; ON district_2020.id = district_2035.id;
-- Listing 7-7: Using FULL OUTER JOIN -- Listing 7-7: Using FULL OUTER JOIN
SELECT * SELECT *
FROM schools_left FULL OUTER JOIN schools_right FROM district_2020 FULL OUTER JOIN district_2035
ON schools_left.id = schools_right.id; ON district_2020.id = district_2035.id;
-- Listing 7-8: Using CROSS JOIN -- Listing 7-8: Using CROSS JOIN
SELECT * SELECT *
FROM schools_left CROSS JOIN schools_right; FROM district_2020 CROSS JOIN district_2035;
-- Listing 7-9: Filtering to show missing values with IS NULL -- Listing 7-9: Filtering to show missing values with IS NULL
SELECT * SELECT *
FROM schools_left LEFT JOIN schools_right FROM district_2020 LEFT JOIN district_2035
ON schools_left.id = schools_right.id ON district_2020.id = district_2035.id
WHERE schools_right.id IS NULL; WHERE district_2035.id IS NULL;
SELECT *
FROM district_2020 RIGHT JOIN district_2035
ON district_2020.id = district_2035.id
WHERE district_2020.id IS NULL;
-- Listing 7-10: Querying specific columns in a join -- Listing 7-10: Querying specific columns in a join
SELECT schools_left.id, SELECT district_2020.id,
schools_left.left_school, district_2020.school_2020,
schools_right.right_school district_2035.school_2035
FROM schools_left LEFT JOIN schools_right FROM district_2020 LEFT JOIN district_2035
ON schools_left.id = schools_right.id; ON district_2020.id = district_2035.id;
-- Listing 7-11: Simplifying code with table aliases -- Listing 7-11: Simplifying code with table aliases
SELECT lt.id, SELECT d20.id,
lt.left_school, d20.school_2020,
rt.right_school d35.school_2035
FROM schools_left AS lt LEFT JOIN schools_right AS rt FROM district_2020 AS d20 LEFT JOIN district_2035 AS d35
ON lt.id = rt.id; ON d20.id = d35.id;
-- Listing 7-12: Joining multiple tables -- Listing 7-12: Joining multiple tables
CREATE TABLE schools_enrollment ( CREATE TABLE district_2020_enrollment (
id integer, id integer,
enrollment integer enrollment integer
); );
CREATE TABLE schools_grades ( CREATE TABLE district_2020_grades (
id integer, id integer,
grades varchar(10) grades varchar(10)
); );
INSERT INTO schools_enrollment (id, enrollment) INSERT INTO district_2020_enrollment
VALUES VALUES
(1, 360), (1, 360),
(2, 1001), (2, 1001),
(5, 450), (5, 450),
(6, 927); (6, 927);
INSERT INTO schools_grades (id, grades) INSERT INTO district_2020_grades
VALUES VALUES
(1, 'K-3'), (1, 'K-3'),
(2, '9-12'), (2, '9-12'),
(5, '6-8'), (5, '6-8'),
(6, '9-12'); (6, '9-12');
SELECT lt.id, lt.left_school, en.enrollment, gr.grades SELECT d20.id,
FROM schools_left AS lt LEFT JOIN schools_enrollment AS en d20.school_2020,
ON lt.id = en.id en.enrollment,
LEFT JOIN schools_grades AS gr gr.grades
ON lt.id = gr.id; FROM district_2020 AS d20 LEFT JOIN district_2020_enrollment AS en
ON d20.id = en.id
LEFT JOIN district_2020_grades AS gr
ON d20.id = gr.id;
-- Listing 7-13: Performing math on joined Census tables -- Listing 7-13: Combining query results with UNION
-- Decennial Census 2000. Full data dictionary at https://www.census.gov/prod/cen2000/doc/pl94-171.pdf SELECT * FROM district_2020
-- Note: Some non-number columns have been given more descriptive names UNION
SELECT * FROM district_2035
ORDER BY id;
CREATE TABLE us_counties_2000 ( -- Listing 7-14: Combining query results with UNION ALL
geo_name varchar(90), -- County/state name, SELECT * FROM district_2020
state_us_abbreviation varchar(2), -- State/U.S. abbreviation UNION ALL
state_fips varchar(2), -- State FIPS code SELECT * FROM district_2035
county_fips varchar(3), -- County code ORDER BY id;
p0010001 integer, -- Total population
p0010002 integer, -- Population of one race: -- Listing 7-15: Customizing a UNION query
p0010003 integer, -- White Alone SELECT '2020' AS year,
p0010004 integer, -- Black or African American alone school_2020 AS school
p0010005 integer, -- American Indian and Alaska Native alone FROM district_2020
p0010006 integer, -- Asian alone
p0010007 integer, -- Native Hawaiian and Other Pacific Islander alone UNION
p0010008 integer, -- Some Other Race alone
p0010009 integer, -- Population of two or more races SELECT '2035' AS year,
p0010010 integer, -- Population of two races school_2035
p0020002 integer, -- Hispanic or Latino FROM district_2035
p0020003 integer -- Not Hispanic or Latino: ORDER BY school, year;
-- Listing 7-16: Combining query results with INTERSECT and EXCEPT
SELECT * FROM district_2020
INTERSECT
SELECT * FROM district_2035
ORDER BY id;
SELECT * FROM district_2020
EXCEPT
SELECT * FROM district_2035
ORDER BY id;
-- Listing 7-17: Performing math on joined Census population estimates tables
CREATE TABLE us_counties_pop_est_2010 (
state_fips text, -- State FIPS code
county_fips text, -- County FIPS code
region smallint, -- Region
state_name text, -- State name
county_name text, -- County name
estimates_base_2010 integer, -- 4/1/2010 resident total population estimates base
CONSTRAINT counties_2010_key PRIMARY KEY (state_fips, county_fips)
); );
COPY us_counties_2000 COPY us_counties_pop_est_2010
FROM 'C:\YourDirectory\us_counties_2000.csv' FROM 'C:\YourDirectory\us_counties_pop_est_2010.csv'
WITH (FORMAT CSV, HEADER); WITH (FORMAT CSV, HEADER);
SELECT c2010.geo_name, SELECT c2019.county_name,
c2010.state_us_abbreviation AS state, c2019.state_name,
c2010.p0010001 AS pop_2010, c2019.pop_est_2019 AS pop_2019,
c2000.p0010001 AS pop_2000, c2010.estimates_base_2010 AS pop_2010,
c2010.p0010001 - c2000.p0010001 AS raw_change, c2019.pop_est_2019 - c2010.estimates_base_2010 AS raw_change,
round( (CAST(c2010.p0010001 AS numeric(8,1)) - c2000.p0010001) round( (c2019.pop_est_2019::numeric - c2010.estimates_base_2010)
/ c2000.p0010001 * 100, 1 ) AS pct_change / c2010.estimates_base_2010 * 100, 1 ) AS pct_change
FROM us_counties_2010 c2010 INNER JOIN us_counties_2000 c2000 FROM us_counties_pop_est_2019 AS c2019
ON c2010.state_fips = c2000.state_fips JOIN us_counties_pop_est_2010 AS c2010
AND c2010.county_fips = c2000.county_fips ON c2019.state_fips = c2010.state_fips
AND c2010.p0010001 <> c2000.p0010001 AND c2019.county_fips = c2010.county_fips
ORDER BY pct_change DESC; ORDER BY pct_change DESC;

File diff suppressed because one or more lines are too long

File diff suppressed because it is too large Load Diff

View File

@ -306,7 +306,69 @@ FROM us_counties_pop_est_2019
GROUP BY state_name; GROUP BY state_name;
----------------------------------------------------------------------------
-- Chapter 7: Joining Tables in a Relational Database
----------------------------------------------------------------------------
-- 1. According to the Census population estimates, which county had the
-- greatest percentage loss of population between 2010 and 2019? Try
-- an Internet search to find out what happened. (Hint: The loss is related
-- to a particular type of facility.)
-- Answer:
-- Concho County, Texas, lost 33 percent of its population from 2010 to
-- 2019, the result of the closure of Eden Detention Center.
-- https://www.texasstandard.org/stories/after-edens-prison-closes-what-comes-next-for-this-small-texas-town/
-- Simply use ASC in the ORDER BY clause to re-order the results, like this:
SELECT c2019.county_name,
c2019.state_name,
c2019.pop_est_2019 AS pop_2019,
c2010.estimates_base_2010 AS pop_2010,
c2019.pop_est_2019 - c2010.estimates_base_2010 AS raw_change,
round( (c2019.pop_est_2019::numeric - c2010.estimates_base_2010)
/ c2010.estimates_base_2010 * 100, 1 ) AS pct_change
FROM us_counties_pop_est_2019 AS c2019
JOIN us_counties_pop_est_2010 AS c2010
ON c2019.state_fips = c2010.state_fips
AND c2019.county_fips = c2010.county_fips
ORDER BY pct_change ASC;
-- 2.
SELECT '2010' AS year,
state_fips,
county_fips,
county_name,
state_name,
estimates_base_2010 AS estimate
FROM us_counties_pop_est_2010
UNION
SELECT '2019' AS year,
state_fips,
county_fips,
county_name,
state_name,
pop_est_2019 AS estimate
FROM us_counties_pop_est_2019
ORDER BY state_fips, county_fips, year;
-- 3. Using the percentile_cont() function from Chapter 6,
-- determine the median of the percent change in estimated county
-- population between 2010 and 2019.
-- Answer: -0.5%
SELECT percentile_cont(.5)
WITHIN GROUP (ORDER BY round( (c2019.pop_est_2019::numeric - c2010.estimates_base_2010)
/ c2010.estimates_base_2010 * 100, 1 )) AS percentile_50th
FROM us_counties_pop_est_2019 AS c2019
JOIN us_counties_pop_est_2010 AS c2010
ON c2019.state_fips = c2010.state_fips
AND c2019.county_fips = c2010.county_fips;