Updates for Chapters 4 and 5, new Census data

This commit is contained in:
anthonydb 2020-06-18 11:03:38 -04:00
parent b1c0a13dfc
commit aec9b05875
6 changed files with 3229 additions and 3305 deletions

View File

@ -1,15 +1,15 @@
--------------------------------------------------------------
-- Practical SQL: A Beginner's Guide to Storytelling with Data
---------------------------------------------------------------------------
-- Practical SQL: A Beginner's Guide to Storytelling with Data, 2nd Edition
-- by Anthony DeBarros
-- Chapter 4 Code Examples
--------------------------------------------------------------
----------------------------------------------------------------------------
-- Listing 4-1: Character data types in action
CREATE TABLE char_data_types (
varchar_column varchar(10),
char_column char(10),
varchar_column varchar(10),
text_column text
);
@ -42,8 +42,8 @@ SELECT * FROM number_data_types;
-- Assumes table created and loaded with Listing 4-2
SELECT
numeric_column * 10000000 AS "Fixed",
real_column * 10000000 AS "Float"
numeric_column * 10000000 AS fixed,
real_column * 10000000 AS floating
FROM number_data_types
WHERE numeric_column = .7;
@ -56,9 +56,9 @@ CREATE TABLE date_time_types (
INSERT INTO date_time_types
VALUES
('2018-12-31 01:00 EST','2 days'),
('2018-12-31 01:00 PST','1 month'),
('2018-12-31 01:00 Australia/Melbourne','1 century'),
('2022-12-31 01:00 EST','2 days'),
('2022-12-31 01:00 PST','1 month'),
('2022-12-31 01:00 Australia/Melbourne','1 century'),
(now(),'1 week');
SELECT * FROM date_time_types;
@ -79,7 +79,7 @@ FROM date_time_types;
SELECT numeric_column,
CAST(numeric_column AS integer),
CAST(numeric_column AS varchar(6))
CAST(numeric_column AS text)
FROM number_data_types;
-- Does not work:

View File

@ -1,9 +1,9 @@
--------------------------------------------------------------
-- Practical SQL: A Beginner's Guide to Storytelling with Data
---------------------------------------------------------------------------
-- Practical SQL: A Beginner's Guide to Storytelling with Data, 2nd Edition
-- by Anthony DeBarros
-- Chapter 5 Code Examples
--------------------------------------------------------------
----------------------------------------------------------------------------
-- Listing 5-1: Using COPY for data import
-- This is example syntax only; running it will produce an error
@ -13,137 +13,54 @@ FROM 'C:\YourDirectory\your_file.csv'
WITH (FORMAT CSV, HEADER);
-- Listing 5-2: A CREATE TABLE statement for Census county data
-- Full data dictionary available at: http://www.census.gov/prod/cen2010/doc/pl94-171.pdf
-- Listing 5-2: A CREATE TABLE statement for Census county population estimates
-- Data dictionary for estimates available at: https://www2.census.gov/programs-surveys/popest/technical-documentation/file-layouts/2010-2019/co-est2019-alldata.pdf
-- Data dictionary for additional columns at: http://www.census.gov/prod/cen2010/doc/pl94-171.pdf
-- Note: Some columns have been given more descriptive names
CREATE TABLE us_counties_2010 (
geo_name varchar(90), -- Name of the geography
state_us_abbreviation varchar(2), -- State/U.S. abbreviation
summary_level varchar(3), -- Summary Level
CREATE TABLE us_counties_pop_est_2019 (
state_fips text, -- State FIPS code
county_fips text, -- County FIPS code
region smallint, -- Region
division smallint, -- Division
state_fips varchar(2), -- State FIPS code
county_fips varchar(3), -- County code
state_name text, -- State name
county_name text, -- County name
area_land bigint, -- Area (Land) in square meters
area_water bigint, -- Area (Water) in square meters
population_count_100_percent integer, -- Population count (100%)
housing_unit_count_100_percent integer, -- Housing Unit count (100%)
area_water bigint, -- Area (Water) in square meters
internal_point_lat numeric(10,7), -- Internal point (latitude)
internal_point_lon numeric(10,7), -- Internal point (longitude)
-- This section is referred to as P1. Race:
p0010001 integer, -- Total population
p0010002 integer, -- Population of one race:
p0010003 integer, -- White Alone
p0010004 integer, -- Black or African American alone
p0010005 integer, -- American Indian and Alaska Native alone
p0010006 integer, -- Asian alone
p0010007 integer, -- Native Hawaiian and Other Pacific Islander alone
p0010008 integer, -- Some Other Race alone
p0010009 integer, -- Population of two or more races
p0010010 integer, -- Population of two races:
p0010011 integer, -- White; Black or African American
p0010012 integer, -- White; American Indian and Alaska Native
p0010013 integer, -- White; Asian
p0010014 integer, -- White; Native Hawaiian and Other Pacific Islander
p0010015 integer, -- White; Some Other Race
p0010016 integer, -- Black or African American; American Indian and Alaska Native
p0010017 integer, -- Black or African American; Asian
p0010018 integer, -- Black or African American; Native Hawaiian and Other Pacific Islander
p0010019 integer, -- Black or African American; Some Other Race
p0010020 integer, -- American Indian and Alaska Native; Asian
p0010021 integer, -- American Indian and Alaska Native; Native Hawaiian and Other Pacific Islander
p0010022 integer, -- American Indian and Alaska Native; Some Other Race
p0010023 integer, -- Asian; Native Hawaiian and Other Pacific Islander
p0010024 integer, -- Asian; Some Other Race
p0010025 integer, -- Native Hawaiian and Other Pacific Islander; Some Other Race
p0010026 integer, -- Population of three races
p0010047 integer, -- Population of four races
p0010063 integer, -- Population of five races
p0010070 integer, -- Population of six races
-- This section is referred to as P2. HISPANIC OR LATINO, AND NOT HISPANIC OR LATINO BY RACE
p0020001 integer, -- Total
p0020002 integer, -- Hispanic or Latino
p0020003 integer, -- Not Hispanic or Latino:
p0020004 integer, -- Population of one race:
p0020005 integer, -- White Alone
p0020006 integer, -- Black or African American alone
p0020007 integer, -- American Indian and Alaska Native alone
p0020008 integer, -- Asian alone
p0020009 integer, -- Native Hawaiian and Other Pacific Islander alone
p0020010 integer, -- Some Other Race alone
p0020011 integer, -- Two or More Races
p0020012 integer, -- Population of two races
p0020028 integer, -- Population of three races
p0020049 integer, -- Population of four races
p0020065 integer, -- Population of five races
p0020072 integer, -- Population of six races
-- This section is referred to as P3. RACE FOR THE POPULATION 18 YEARS AND OVER
p0030001 integer, -- Total
p0030002 integer, -- Population of one race:
p0030003 integer, -- White alone
p0030004 integer, -- Black or African American alone
p0030005 integer, -- American Indian and Alaska Native alone
p0030006 integer, -- Asian alone
p0030007 integer, -- Native Hawaiian and Other Pacific Islander alone
p0030008 integer, -- Some Other Race alone
p0030009 integer, -- Two or More Races
p0030010 integer, -- Population of two races
p0030026 integer, -- Population of three races
p0030047 integer, -- Population of four races
p0030063 integer, -- Population of five races
p0030070 integer, -- Population of six races
-- This section is referred to as P4. HISPANIC OR LATINO, AND NOT HISPANIC OR LATINO BY RACE
-- FOR THE POPULATION 18 YEARS AND OVER
p0040001 integer, -- Total
p0040002 integer, -- Hispanic or Latino
p0040003 integer, -- Not Hispanic or Latino:
p0040004 integer, -- Population of one race:
p0040005 integer, -- White alone
p0040006 integer, -- Black or African American alone
p0040007 integer, -- American Indian and Alaska Native alone
p0040008 integer, -- Asian alone
p0040009 integer, -- Native Hawaiian and Other Pacific Islander alone
p0040010 integer, -- Some Other Race alone
p0040011 integer, -- Two or More Races
p0040012 integer, -- Population of two races
p0040028 integer, -- Population of three races
p0040049 integer, -- Population of four races
p0040065 integer, -- Population of five races
p0040072 integer, -- Population of six races
-- This section is referred to as H1. OCCUPANCY STATUS
h0010001 integer, -- Total housing units
h0010002 integer, -- Occupied
h0010003 integer -- Vacant
pop_est_2018 integer, -- 2018-07-01 resident total population estimate
pop_est_2019 integer, -- 2019-07-01 resident total population estimate
births_2019 integer, -- Births from 2018-07-01 to 2019-06-30
deaths_2019 integer, -- Deaths from 2018-07-01 to 2019-06-30
international_migr_2019 integer, -- Net international migration from 2018-07-01 to 2019-06-30
domestic_migr_2019 integer, -- Net domestic migration from 2018-07-01 to 2019-06-30
residual_2019 integer, -- Residual for 2018-07-01 to 2019-06-30
CONSTRAINT counties_2019_key PRIMARY KEY (state_fips, county_fips)
);
SELECT * FROM us_counties_2010;
SELECT * FROM us_counties_pop_est_2019;
-- Listing 5-3: Importing Census data using COPY
-- Note! If you run into an import error here, be sure you downloaded the code and
-- data for the book according to the steps listed on page xxvii in the Introduction.
-- Windows users: Please check the Note on page xxvii as well.
-- data for the book according to the steps listed in Chapter 1.
-- Windows users: Please check the Note on PAGE XXXXXX as well.
COPY us_counties_2010
FROM 'C:\YourDirectory\us_counties_2010.csv'
COPY us_counties_pop_est_2019
-- FROM 'C:\YourDirectory\us_counties_2010.csv'
FROM '/Users/DeBarrosA/Dropbox (Personal)/DataMonky/Book-Writing/PracticalSQL_2e/Code-Repo/Chapter_05/us_counties_pop_est_2019.csv'
WITH (FORMAT CSV, HEADER);
-- Checking the data
SELECT * FROM us_counties_2010;
SELECT * FROM us_counties_pop_est_2019;
SELECT geo_name, state_us_abbreviation, area_land
FROM us_counties_2010
SELECT county_name, state_name, area_land
FROM us_counties_pop_est_2019
ORDER BY area_land DESC
LIMIT 3;
SELECT geo_name, state_us_abbreviation, internal_point_lon
FROM us_counties_2010
SELECT county_name, state_name, internal_point_lon
FROM us_counties_pop_est_2019
ORDER BY internal_point_lon DESC
LIMIT 5;
@ -151,18 +68,20 @@ LIMIT 5;
-- Listing 5-4: Creating a table to track supervisor salaries
CREATE TABLE supervisor_salaries (
town varchar(30),
county varchar(30),
supervisor varchar(30),
start_date date,
salary money,
benefits money
id integer GENERATED ALWAYS AS IDENTITY PRIMARY KEY,
town text,
county text,
supervisor text,
start_date text,
salary numeric(10,2),
benefits numeric(10,2)
);
-- Listing 5-5: Importing salaries data from CSV to three table columns
COPY supervisor_salaries (town, supervisor, salary)
FROM 'C:\YourDirectory\supervisor_salaries.csv'
-- FROM 'C:\YourDirectory\supervisor_salaries.csv'
FROM '/Users/DeBarrosA/Dropbox (Personal)/DataMonky/Book-Writing/PracticalSQL_2e/Code-Repo/Chapter_05/supervisor_salaries.csv'
WITH (FORMAT CSV, HEADER);
-- Check the data
@ -173,14 +92,16 @@ SELECT * FROM supervisor_salaries LIMIT 2;
DELETE FROM supervisor_salaries;
CREATE TEMPORARY TABLE supervisor_salaries_temp (LIKE supervisor_salaries);
CREATE TEMPORARY TABLE supervisor_salaries_temp
(LIKE supervisor_salaries INCLUDING ALL);
COPY supervisor_salaries_temp (town, supervisor, salary)
FROM 'C:\YourDirectory\supervisor_salaries.csv'
-- FROM 'C:\YourDirectory\supervisor_salaries.csv'
FROM '/Users/DeBarrosA/Dropbox (Personal)/DataMonky/Book-Writing/PracticalSQL_2e/Code-Repo/Chapter_05/supervisor_salaries.csv'
WITH (FORMAT CSV, HEADER);
INSERT INTO supervisor_salaries (town, county, supervisor, salary)
SELECT town, 'Some County', supervisor, salary
SELECT town, 'My County', supervisor, salary
FROM supervisor_salaries_temp;
DROP TABLE supervisor_salaries_temp;
@ -188,25 +109,30 @@ DROP TABLE supervisor_salaries_temp;
-- Check the data
SELECT * FROM supervisor_salaries LIMIT 2;
-- Listing 5-7: Export an entire table with COPY
COPY us_counties_2010
TO 'C:\YourDirectory\us_counties_export.txt'
COPY us_counties_pop_est_2019
-- TO 'C:\YourDirectory\us_counties_export.txt'
TO '/Users/DeBarrosA/Dropbox (Personal)/DataMonky/Book-Writing/PracticalSQL_2e/Code-Repo/Chapter_05/us_counties_export.txt'
WITH (FORMAT CSV, HEADER, DELIMITER '|');
-- Listing 5-8: Exporting selected columns from a table with COPY
COPY us_counties_2010 (geo_name, internal_point_lat, internal_point_lon)
TO 'C:\YourDirectory\us_counties_latlon_export.txt'
COPY us_counties_pop_est_2019
(county_name, internal_point_lat, internal_point_lon)
-- TO 'C:\YourDirectory\us_counties_latlon_export.txt'
TO '/Users/DeBarrosA/Dropbox (Personal)/DataMonky/Book-Writing/PracticalSQL_2e/Code-Repo/Chapter_05/us_counties_latlon_export.csv'
WITH (FORMAT CSV, HEADER, DELIMITER '|');
-- Listing 5-9: Exporting query results with COPY
COPY (
SELECT geo_name, state_us_abbreviation
FROM us_counties_2010
WHERE geo_name ILIKE '%mill%'
SELECT county_name, state_name
FROM us_counties_pop_est_2019
WHERE county_name ILIKE '%mill%'
)
TO 'C:\YourDirectory\us_counties_mill_export.txt'
WITH (FORMAT CSV, HEADER, DELIMITER '|');
-- TO 'C:\YourDirectory\us_counties_mill_export.csv'
TO '/Users/DeBarrosA/Dropbox (Personal)/DataMonky/Book-Writing/PracticalSQL_2e/Code-Repo/Chapter_05/us_counties_mill_export.csv'
WITH (FORMAT CSV, HEADER);

View File

@ -1,6 +1,6 @@
town,supervisor,salary
Anytown,Jones,27000
Bumblyburg,Baker,24999
Moetown,Smith,32100
Bigville,Kao,31500
New Brillig,Carroll,72690
Anytown,Jones,67000
Bumblyburg,Baker,74999
Moetown,Smith,52100
Bigville,Kao,81500
New Brillig,Carroll,102690

1 town supervisor salary
2 Anytown Jones 27000 67000
3 Bumblyburg Baker 24999 74999
4 Moetown Smith 32100 52100
5 Bigville Kao 31500 81500
6 New Brillig Carroll 72690 102690

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -1,13 +1,12 @@
--------------------------------------------------------------
-- Practical SQL: A Beginner's Guide to Storytelling with Data
---------------------------------------------------------------------------
-- Practical SQL: A Beginner's Guide to Storytelling with Data, 2nd Edition
-- by Anthony DeBarros
-- Try It Yourself Questions and Answers
--------------------------------------------------------------
----------------------------------------------------------------------------
--------------------------------------------------------------
-- Chapter 1: Creating Your First Database and Table
-- Chapter 2: Creating Your First Database and Table
--------------------------------------------------------------
-- 1. Imagine you're building a database to catalog all the animals at your
@ -75,7 +74,7 @@ INSERT INTO animal_types (common_name, scientific_name, conservation_status)
VALUES ('Javan Rhino', 'Rhinoceros sondaicus' 'Critically Endangered');
--------------------------------------------------------------
-- Chapter 2: Beginning Data Exploration with SELECT
-- Chapter 3: Beginning Data Exploration with SELECT
--------------------------------------------------------------
-- 1. The school district superintendent asks for a list of teachers in each
@ -109,7 +108,7 @@ ORDER BY salary DESC;
--------------------------------------------------------------
-- Chapter 3: Understanding Data Types
-- Chapter 4: Understanding Data Types
--------------------------------------------------------------
-- 1. Your company delivers fruit and vegetables to local grocery stores, and
@ -150,11 +149,11 @@ varchar(50)
-- accepted date/time formats will result in an error. You can see this with
-- the below example, which tries to cast the string as a timestamp.
SELECT CAST('4//2017' AS timestamp with time zone);
SELECT CAST('4//2021' AS timestamp with time zone);
--------------------------------------------------------------
-- Chapter 4: Importing and Exporting Data
-- Chapter 5: Importing and Exporting Data
--------------------------------------------------------------
-- 1. Write a WITH statement to include with COPY to handle the import of an
@ -182,7 +181,7 @@ CREATE TABLE actors (
-- Note: You may never encounter a file that uses a colon as a delimiter and
-- and pound sign for quoting, but anything is possible.
-- 2. Using the table us_counties_2010 you created and filled in this chapter,
-- 2. Using the table us_counties_pop_est_2019 you created and filled in this chapter,
-- export to a CSV file the 20 counties in the United States that have the most
-- housing units. Make sure you export only each county's name, state, and
-- number of housing units. (Hint: Housing units are totaled for each county in
@ -191,8 +190,8 @@ CREATE TABLE actors (
-- Answer:
COPY (
SELECT geo_name, state_us_abbreviation, housing_unit_count_100_percent
FROM us_counties_2010 ORDER BY housing_unit_count_100_percent DESC LIMIT 20
SELECT county_name, state_name, births_2019
FROM us_counties_pop_est_2019 ORDER BY births_2019 DESC LIMIT 20
)
TO 'C:\YourDirectory\us_counties_housing_export.txt'
WITH (FORMAT CSV, HEADER);