Chapter 7 data and code updates
This commit is contained in:
parent
a7e51bbb39
commit
7c60ce3b9c
@ -46,152 +46,183 @@ ON employees.dept_id = departments.dept_id;
|
|||||||
|
|
||||||
-- Listing 7-3: Creating two tables to explore JOIN types
|
-- Listing 7-3: Creating two tables to explore JOIN types
|
||||||
|
|
||||||
CREATE TABLE schools_left (
|
CREATE TABLE district_2020 (
|
||||||
id integer CONSTRAINT left_id_key PRIMARY KEY,
|
id integer CONSTRAINT id_key_2020 PRIMARY KEY,
|
||||||
left_school varchar(30)
|
school_2020 text
|
||||||
);
|
);
|
||||||
|
|
||||||
CREATE TABLE schools_right (
|
CREATE TABLE district_2035 (
|
||||||
id integer CONSTRAINT right_id_key PRIMARY KEY,
|
id integer CONSTRAINT id_key_2035 PRIMARY KEY,
|
||||||
right_school varchar(30)
|
school_2035 text
|
||||||
);
|
);
|
||||||
|
|
||||||
INSERT INTO schools_left (id, left_school) VALUES
|
INSERT INTO district_2020 VALUES
|
||||||
(1, 'Oak Street School'),
|
(1, 'Oak Street School'),
|
||||||
(2, 'Roosevelt High School'),
|
(2, 'Roosevelt High School'),
|
||||||
(5, 'Washington Middle School'),
|
(5, 'Dover Middle School'),
|
||||||
(6, 'Jefferson High School');
|
(6, 'Webutuck High School');
|
||||||
|
|
||||||
INSERT INTO schools_right (id, right_school) VALUES
|
INSERT INTO district_2035 VALUES
|
||||||
(1, 'Oak Street School'),
|
(1, 'Oak Street School'),
|
||||||
(2, 'Roosevelt High School'),
|
(2, 'Roosevelt High School'),
|
||||||
(3, 'Morrison Elementary'),
|
(3, 'Morrison Elementary'),
|
||||||
(4, 'Chase Magnet Academy'),
|
(4, 'Chase Magnet Academy'),
|
||||||
(6, 'Jefferson High School');
|
(6, 'Webutuck High School');
|
||||||
|
|
||||||
-- Listing 7-4: Using JOIN
|
-- Listing 7-4: Using JOIN
|
||||||
|
|
||||||
SELECT *
|
SELECT *
|
||||||
FROM schools_left JOIN schools_right
|
FROM district_2020 JOIN district_2035
|
||||||
ON schools_left.id = schools_right.id;
|
ON district_2020.id = district_2035.id;
|
||||||
|
|
||||||
-- Bonus: Also can be specified as INNER JOIN
|
-- Bonus: Also can be specified as INNER JOIN
|
||||||
|
|
||||||
SELECT *
|
SELECT *
|
||||||
FROM schools_left INNER JOIN schools_right
|
FROM district_2020 INNER JOIN district_2035
|
||||||
ON schools_left.id = schools_right.id;
|
ON district_2020.id = district_2035.id;
|
||||||
|
|
||||||
-- Listing 7-5: Using LEFT JOIN
|
-- Listing 7-5: Using LEFT JOIN
|
||||||
|
|
||||||
SELECT *
|
SELECT *
|
||||||
FROM schools_left LEFT JOIN schools_right
|
FROM district_2020 LEFT JOIN district_2035
|
||||||
ON schools_left.id = schools_right.id;
|
ON district_2020.id = district_2035.id;
|
||||||
|
|
||||||
-- Listing 7-6: Using RIGHT JOIN
|
-- Listing 7-6: Using RIGHT JOIN
|
||||||
|
|
||||||
SELECT *
|
SELECT *
|
||||||
FROM schools_left RIGHT JOIN schools_right
|
FROM district_2020 RIGHT JOIN district_2035
|
||||||
ON schools_left.id = schools_right.id;
|
ON district_2020.id = district_2035.id;
|
||||||
|
|
||||||
-- Listing 7-7: Using FULL OUTER JOIN
|
-- Listing 7-7: Using FULL OUTER JOIN
|
||||||
|
|
||||||
SELECT *
|
SELECT *
|
||||||
FROM schools_left FULL OUTER JOIN schools_right
|
FROM district_2020 FULL OUTER JOIN district_2035
|
||||||
ON schools_left.id = schools_right.id;
|
ON district_2020.id = district_2035.id;
|
||||||
|
|
||||||
-- Listing 7-8: Using CROSS JOIN
|
-- Listing 7-8: Using CROSS JOIN
|
||||||
|
|
||||||
SELECT *
|
SELECT *
|
||||||
FROM schools_left CROSS JOIN schools_right;
|
FROM district_2020 CROSS JOIN district_2035;
|
||||||
|
|
||||||
-- Listing 7-9: Filtering to show missing values with IS NULL
|
-- Listing 7-9: Filtering to show missing values with IS NULL
|
||||||
|
|
||||||
SELECT *
|
SELECT *
|
||||||
FROM schools_left LEFT JOIN schools_right
|
FROM district_2020 LEFT JOIN district_2035
|
||||||
ON schools_left.id = schools_right.id
|
ON district_2020.id = district_2035.id
|
||||||
WHERE schools_right.id IS NULL;
|
WHERE district_2035.id IS NULL;
|
||||||
|
|
||||||
|
SELECT *
|
||||||
|
FROM district_2020 RIGHT JOIN district_2035
|
||||||
|
ON district_2020.id = district_2035.id
|
||||||
|
WHERE district_2020.id IS NULL;
|
||||||
|
|
||||||
-- Listing 7-10: Querying specific columns in a join
|
-- Listing 7-10: Querying specific columns in a join
|
||||||
SELECT schools_left.id,
|
SELECT district_2020.id,
|
||||||
schools_left.left_school,
|
district_2020.school_2020,
|
||||||
schools_right.right_school
|
district_2035.school_2035
|
||||||
FROM schools_left LEFT JOIN schools_right
|
FROM district_2020 LEFT JOIN district_2035
|
||||||
ON schools_left.id = schools_right.id;
|
ON district_2020.id = district_2035.id;
|
||||||
|
|
||||||
-- Listing 7-11: Simplifying code with table aliases
|
-- Listing 7-11: Simplifying code with table aliases
|
||||||
SELECT lt.id,
|
SELECT d20.id,
|
||||||
lt.left_school,
|
d20.school_2020,
|
||||||
rt.right_school
|
d35.school_2035
|
||||||
FROM schools_left AS lt LEFT JOIN schools_right AS rt
|
FROM district_2020 AS d20 LEFT JOIN district_2035 AS d35
|
||||||
ON lt.id = rt.id;
|
ON d20.id = d35.id;
|
||||||
|
|
||||||
-- Listing 7-12: Joining multiple tables
|
-- Listing 7-12: Joining multiple tables
|
||||||
CREATE TABLE schools_enrollment (
|
CREATE TABLE district_2020_enrollment (
|
||||||
id integer,
|
id integer,
|
||||||
enrollment integer
|
enrollment integer
|
||||||
);
|
);
|
||||||
|
|
||||||
CREATE TABLE schools_grades (
|
CREATE TABLE district_2020_grades (
|
||||||
id integer,
|
id integer,
|
||||||
grades varchar(10)
|
grades varchar(10)
|
||||||
);
|
);
|
||||||
|
|
||||||
INSERT INTO schools_enrollment (id, enrollment)
|
INSERT INTO district_2020_enrollment
|
||||||
VALUES
|
VALUES
|
||||||
(1, 360),
|
(1, 360),
|
||||||
(2, 1001),
|
(2, 1001),
|
||||||
(5, 450),
|
(5, 450),
|
||||||
(6, 927);
|
(6, 927);
|
||||||
|
|
||||||
INSERT INTO schools_grades (id, grades)
|
INSERT INTO district_2020_grades
|
||||||
VALUES
|
VALUES
|
||||||
(1, 'K-3'),
|
(1, 'K-3'),
|
||||||
(2, '9-12'),
|
(2, '9-12'),
|
||||||
(5, '6-8'),
|
(5, '6-8'),
|
||||||
(6, '9-12');
|
(6, '9-12');
|
||||||
|
|
||||||
SELECT lt.id, lt.left_school, en.enrollment, gr.grades
|
SELECT d20.id,
|
||||||
FROM schools_left AS lt LEFT JOIN schools_enrollment AS en
|
d20.school_2020,
|
||||||
ON lt.id = en.id
|
en.enrollment,
|
||||||
LEFT JOIN schools_grades AS gr
|
gr.grades
|
||||||
ON lt.id = gr.id;
|
FROM district_2020 AS d20 LEFT JOIN district_2020_enrollment AS en
|
||||||
|
ON d20.id = en.id
|
||||||
|
LEFT JOIN district_2020_grades AS gr
|
||||||
|
ON d20.id = gr.id;
|
||||||
|
|
||||||
-- Listing 7-13: Performing math on joined Census tables
|
-- Listing 7-13: Combining query results with UNION
|
||||||
-- Decennial Census 2000. Full data dictionary at https://www.census.gov/prod/cen2000/doc/pl94-171.pdf
|
SELECT * FROM district_2020
|
||||||
-- Note: Some non-number columns have been given more descriptive names
|
UNION
|
||||||
|
SELECT * FROM district_2035
|
||||||
|
ORDER BY id;
|
||||||
|
|
||||||
CREATE TABLE us_counties_2000 (
|
-- Listing 7-14: Combining query results with UNION ALL
|
||||||
geo_name varchar(90), -- County/state name,
|
SELECT * FROM district_2020
|
||||||
state_us_abbreviation varchar(2), -- State/U.S. abbreviation
|
UNION ALL
|
||||||
state_fips varchar(2), -- State FIPS code
|
SELECT * FROM district_2035
|
||||||
county_fips varchar(3), -- County code
|
ORDER BY id;
|
||||||
p0010001 integer, -- Total population
|
|
||||||
p0010002 integer, -- Population of one race:
|
-- Listing 7-15: Customizing a UNION query
|
||||||
p0010003 integer, -- White Alone
|
SELECT '2020' AS year,
|
||||||
p0010004 integer, -- Black or African American alone
|
school_2020 AS school
|
||||||
p0010005 integer, -- American Indian and Alaska Native alone
|
FROM district_2020
|
||||||
p0010006 integer, -- Asian alone
|
|
||||||
p0010007 integer, -- Native Hawaiian and Other Pacific Islander alone
|
UNION
|
||||||
p0010008 integer, -- Some Other Race alone
|
|
||||||
p0010009 integer, -- Population of two or more races
|
SELECT '2035' AS year,
|
||||||
p0010010 integer, -- Population of two races
|
school_2035
|
||||||
p0020002 integer, -- Hispanic or Latino
|
FROM district_2035
|
||||||
p0020003 integer -- Not Hispanic or Latino:
|
ORDER BY school, year;
|
||||||
|
|
||||||
|
-- Listing 7-16: Combining query results with INTERSECT and EXCEPT
|
||||||
|
SELECT * FROM district_2020
|
||||||
|
INTERSECT
|
||||||
|
SELECT * FROM district_2035
|
||||||
|
ORDER BY id;
|
||||||
|
|
||||||
|
SELECT * FROM district_2020
|
||||||
|
EXCEPT
|
||||||
|
SELECT * FROM district_2035
|
||||||
|
ORDER BY id;
|
||||||
|
|
||||||
|
-- Listing 7-17: Performing math on joined Census population estimates tables
|
||||||
|
CREATE TABLE us_counties_pop_est_2010 (
|
||||||
|
state_fips text, -- State FIPS code
|
||||||
|
county_fips text, -- County FIPS code
|
||||||
|
region smallint, -- Region
|
||||||
|
state_name text, -- State name
|
||||||
|
county_name text, -- County name
|
||||||
|
estimates_base_2010 integer, -- 4/1/2010 resident total population estimates base
|
||||||
|
CONSTRAINT counties_2010_key PRIMARY KEY (state_fips, county_fips)
|
||||||
);
|
);
|
||||||
|
|
||||||
COPY us_counties_2000
|
COPY us_counties_pop_est_2010
|
||||||
FROM 'C:\YourDirectory\us_counties_2000.csv'
|
FROM 'C:\YourDirectory\us_counties_pop_est_2010.csv'
|
||||||
WITH (FORMAT CSV, HEADER);
|
WITH (FORMAT CSV, HEADER);
|
||||||
|
|
||||||
SELECT c2010.geo_name,
|
SELECT c2019.county_name,
|
||||||
c2010.state_us_abbreviation AS state,
|
c2019.state_name,
|
||||||
c2010.p0010001 AS pop_2010,
|
c2019.pop_est_2019 AS pop_2019,
|
||||||
c2000.p0010001 AS pop_2000,
|
c2010.estimates_base_2010 AS pop_2010,
|
||||||
c2010.p0010001 - c2000.p0010001 AS raw_change,
|
c2019.pop_est_2019 - c2010.estimates_base_2010 AS raw_change,
|
||||||
round( (CAST(c2010.p0010001 AS numeric(8,1)) - c2000.p0010001)
|
round( (c2019.pop_est_2019::numeric - c2010.estimates_base_2010)
|
||||||
/ c2000.p0010001 * 100, 1 ) AS pct_change
|
/ c2010.estimates_base_2010 * 100, 1 ) AS pct_change
|
||||||
FROM us_counties_2010 c2010 INNER JOIN us_counties_2000 c2000
|
FROM us_counties_pop_est_2019 AS c2019
|
||||||
ON c2010.state_fips = c2000.state_fips
|
JOIN us_counties_pop_est_2010 AS c2010
|
||||||
AND c2010.county_fips = c2000.county_fips
|
ON c2019.state_fips = c2010.state_fips
|
||||||
AND c2010.p0010001 <> c2000.p0010001
|
AND c2019.county_fips = c2010.county_fips
|
||||||
ORDER BY pct_change DESC;
|
ORDER BY pct_change DESC;
|
||||||
|
|||||||
File diff suppressed because one or more lines are too long
3143
Chapter_07/us_counties_pop_est_2010.csv
Normal file
3143
Chapter_07/us_counties_pop_est_2010.csv
Normal file
File diff suppressed because it is too large
Load Diff
@ -306,7 +306,69 @@ FROM us_counties_pop_est_2019
|
|||||||
GROUP BY state_name;
|
GROUP BY state_name;
|
||||||
|
|
||||||
|
|
||||||
|
----------------------------------------------------------------------------
|
||||||
|
-- Chapter 7: Joining Tables in a Relational Database
|
||||||
|
----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
-- 1. According to the Census population estimates, which county had the
|
||||||
|
-- greatest percentage loss of population between 2010 and 2019? Try
|
||||||
|
-- an Internet search to find out what happened. (Hint: The loss is related
|
||||||
|
-- to a particular type of facility.)
|
||||||
|
|
||||||
|
-- Answer:
|
||||||
|
|
||||||
|
-- Concho County, Texas, lost 33 percent of its population from 2010 to
|
||||||
|
-- 2019, the result of the closure of Eden Detention Center.
|
||||||
|
-- https://www.texasstandard.org/stories/after-edens-prison-closes-what-comes-next-for-this-small-texas-town/
|
||||||
|
|
||||||
|
-- Simply use ASC in the ORDER BY clause to re-order the results, like this:
|
||||||
|
SELECT c2019.county_name,
|
||||||
|
c2019.state_name,
|
||||||
|
c2019.pop_est_2019 AS pop_2019,
|
||||||
|
c2010.estimates_base_2010 AS pop_2010,
|
||||||
|
c2019.pop_est_2019 - c2010.estimates_base_2010 AS raw_change,
|
||||||
|
round( (c2019.pop_est_2019::numeric - c2010.estimates_base_2010)
|
||||||
|
/ c2010.estimates_base_2010 * 100, 1 ) AS pct_change
|
||||||
|
FROM us_counties_pop_est_2019 AS c2019
|
||||||
|
JOIN us_counties_pop_est_2010 AS c2010
|
||||||
|
ON c2019.state_fips = c2010.state_fips
|
||||||
|
AND c2019.county_fips = c2010.county_fips
|
||||||
|
ORDER BY pct_change ASC;
|
||||||
|
|
||||||
|
|
||||||
|
-- 2.
|
||||||
|
|
||||||
|
SELECT '2010' AS year,
|
||||||
|
state_fips,
|
||||||
|
county_fips,
|
||||||
|
county_name,
|
||||||
|
state_name,
|
||||||
|
estimates_base_2010 AS estimate
|
||||||
|
FROM us_counties_pop_est_2010
|
||||||
|
UNION
|
||||||
|
SELECT '2019' AS year,
|
||||||
|
state_fips,
|
||||||
|
county_fips,
|
||||||
|
county_name,
|
||||||
|
state_name,
|
||||||
|
pop_est_2019 AS estimate
|
||||||
|
FROM us_counties_pop_est_2019
|
||||||
|
ORDER BY state_fips, county_fips, year;
|
||||||
|
|
||||||
|
|
||||||
|
-- 3. Using the percentile_cont() function from Chapter 6,
|
||||||
|
-- determine the median of the percent change in estimated county
|
||||||
|
-- population between 2010 and 2019.
|
||||||
|
|
||||||
|
-- Answer: -0.5%
|
||||||
|
|
||||||
|
SELECT percentile_cont(.5)
|
||||||
|
WITHIN GROUP (ORDER BY round( (c2019.pop_est_2019::numeric - c2010.estimates_base_2010)
|
||||||
|
/ c2010.estimates_base_2010 * 100, 1 )) AS percentile_50th
|
||||||
|
FROM us_counties_pop_est_2019 AS c2019
|
||||||
|
JOIN us_counties_pop_est_2010 AS c2010
|
||||||
|
ON c2019.state_fips = c2010.state_fips
|
||||||
|
AND c2019.county_fips = c2010.county_fips;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user