Updates to Chapter 6 and Try It Yourself
This commit is contained in:
parent
b0f23d78a4
commit
360bade5a2
@ -1,9 +1,9 @@
|
|||||||
--------------------------------------------------------------
|
---------------------------------------------------------------------------
|
||||||
-- Practical SQL: A Beginner's Guide to Storytelling with Data
|
-- Practical SQL: A Beginner's Guide to Storytelling with Data, 2nd Edition
|
||||||
-- by Anthony DeBarros
|
-- by Anthony DeBarros
|
||||||
|
|
||||||
-- Chapter 6 Code Examples
|
-- Chapter 6 Code Examples
|
||||||
--------------------------------------------------------------
|
----------------------------------------------------------------------------
|
||||||
|
|
||||||
-- Listing 6-1: Basic addition, subtraction and multiplication with SQL
|
-- Listing 6-1: Basic addition, subtraction and multiplication with SQL
|
||||||
|
|
||||||
@ -34,79 +34,81 @@ SELECT (7 + 8) * 9; -- answer: 135
|
|||||||
SELECT 3 ^ 3 - 1; -- answer: 26
|
SELECT 3 ^ 3 - 1; -- answer: 26
|
||||||
SELECT 3 ^ (3 - 1); -- answer: 9
|
SELECT 3 ^ (3 - 1); -- answer: 9
|
||||||
|
|
||||||
-- Listing 6-4: Selecting Census population columns by race with aliases
|
-- Listing 6-4: Selecting Census population estimate columns with aliases
|
||||||
|
|
||||||
SELECT geo_name,
|
SELECT county_name AS county,
|
||||||
state_us_abbreviation AS "st",
|
state_name AS state,
|
||||||
p0010001 AS "Total Population",
|
pop_est_2019 AS pop,
|
||||||
p0010003 AS "White Alone",
|
births_2019 AS births,
|
||||||
p0010004 AS "Black or African American Alone",
|
deaths_2019 AS deaths,
|
||||||
p0010005 AS "Am Indian/Alaska Native Alone",
|
international_migr_2019 AS int_migr,
|
||||||
p0010006 AS "Asian Alone",
|
domestic_migr_2019 AS dom_migr,
|
||||||
p0010007 AS "Native Hawaiian and Other Pacific Islander Alone",
|
residual_2019 AS residual
|
||||||
p0010008 AS "Some Other Race Alone",
|
FROM us_counties_pop_est_2019;
|
||||||
p0010009 AS "Two or More Races"
|
|
||||||
FROM us_counties_2010;
|
|
||||||
|
|
||||||
-- Listing 6-5: Adding two columns in us_counties_2010
|
SELECT * FROM us_counties_pop_est_2019;
|
||||||
|
|
||||||
SELECT geo_name,
|
-- Listing 6-5: Subtracting two columns in us_counties_pop_est_2019
|
||||||
state_us_abbreviation AS "st",
|
|
||||||
p0010003 AS "White Alone",
|
SELECT county_name AS county,
|
||||||
p0010004 AS "Black Alone",
|
state_name AS state,
|
||||||
p0010003 + p0010004 AS "Total White and Black"
|
births_2019 AS births,
|
||||||
FROM us_counties_2010;
|
deaths_2019 AS deaths,
|
||||||
|
births_2019 - deaths_2019 AS natural_increase
|
||||||
|
FROM us_counties_pop_est_2019;
|
||||||
|
|
||||||
-- Listing 6-6: Checking Census data totals
|
-- Listing 6-6: Checking Census data totals
|
||||||
|
|
||||||
SELECT geo_name,
|
SELECT county_name AS county,
|
||||||
state_us_abbreviation AS "st",
|
state_name AS state,
|
||||||
p0010001 AS "Total",
|
pop_est_2019 AS pop,
|
||||||
p0010003 + p0010004 + p0010005 + p0010006 + p0010007
|
pop_est_2018 + births_2019 - deaths_2019 +
|
||||||
+ p0010008 + p0010009 AS "All Races",
|
international_migr_2019 + domestic_migr_2019 +
|
||||||
(p0010003 + p0010004 + p0010005 + p0010006 + p0010007
|
residual_2019 AS components_total,
|
||||||
+ p0010008 + p0010009) - p0010001 AS "Difference"
|
pop_est_2019 - (pop_est_2018 + births_2019 - deaths_2019 +
|
||||||
FROM us_counties_2010
|
international_migr_2019 + domestic_migr_2019 +
|
||||||
ORDER BY "Difference" DESC;
|
residual_2019) AS difference
|
||||||
|
FROM us_counties_pop_est_2019
|
||||||
|
ORDER BY difference DESC;
|
||||||
|
|
||||||
-- Listing 6-7: Calculating the percent of the population that is
|
-- Listing 6-7: Calculating the percent of a county's area that is water
|
||||||
-- Asian by county (percent of the whole)
|
|
||||||
|
SELECT county_name AS county,
|
||||||
|
state_name AS state,
|
||||||
|
area_water::numeric / (area_land + area_water) * 100 AS pct_water
|
||||||
|
FROM us_counties_pop_est_2019
|
||||||
|
ORDER BY pct_water DESC;
|
||||||
|
|
||||||
SELECT geo_name,
|
|
||||||
state_us_abbreviation AS "st",
|
|
||||||
(CAST(p0010006 AS numeric(8,1)) / p0010001) * 100 AS "pct_asian"
|
|
||||||
FROM us_counties_2010
|
|
||||||
ORDER BY "pct_asian" DESC;
|
|
||||||
|
|
||||||
-- Listing 6-8: Calculating percent change
|
-- Listing 6-8: Calculating percent change
|
||||||
|
|
||||||
CREATE TABLE percent_change (
|
CREATE TABLE percent_change (
|
||||||
department varchar(20),
|
department text,
|
||||||
spend_2014 numeric(10,2),
|
spend_2019 numeric(10,2),
|
||||||
spend_2017 numeric(10,2)
|
spend_2022 numeric(10,2)
|
||||||
);
|
);
|
||||||
|
|
||||||
INSERT INTO percent_change
|
INSERT INTO percent_change
|
||||||
VALUES
|
VALUES
|
||||||
('Building', 250000, 289000),
|
|
||||||
('Assessor', 178556, 179500),
|
('Assessor', 178556, 179500),
|
||||||
('Library', 87777, 90001),
|
('Building', 250000, 289000),
|
||||||
('Clerk', 451980, 650000),
|
('Clerk', 451980, 650000),
|
||||||
('Police', 250000, 223000),
|
('Library', 87777, 90001),
|
||||||
('Recreation', 199000, 195000);
|
('Parks', 250000, 223000),
|
||||||
|
('Water', 199000, 195000);
|
||||||
|
|
||||||
SELECT department,
|
SELECT department,
|
||||||
spend_2014,
|
spend_2019,
|
||||||
spend_2017,
|
spend_2022,
|
||||||
round( (spend_2017 - spend_2014) /
|
round( (spend_2022 - spend_2019) /
|
||||||
spend_2014 * 100, 1 ) AS "pct_change"
|
spend_2019 * 100, 1 ) AS pct_change
|
||||||
FROM percent_change;
|
FROM percent_change;
|
||||||
|
|
||||||
-- Listing 6-9: Using sum() and avg() aggregate functions
|
-- Listing 6-9: Using sum() and avg() aggregate functions
|
||||||
|
|
||||||
SELECT sum(p0010001) AS "County Sum",
|
SELECT sum(pop_est_2019) AS county_sum,
|
||||||
round(avg(p0010001), 0) AS "County Average"
|
round(avg(pop_est_2019), 0) AS county_average
|
||||||
FROM us_counties_2010;
|
FROM us_counties_pop_est_2019;
|
||||||
|
|
||||||
-- Listing 6-10: Testing SQL percentile functions
|
-- Listing 6-10: Testing SQL percentile functions
|
||||||
|
|
||||||
@ -126,37 +128,37 @@ FROM percentile_test;
|
|||||||
|
|
||||||
-- Listing 6-11: Using sum(), avg(), and percentile_cont() aggregate functions
|
-- Listing 6-11: Using sum(), avg(), and percentile_cont() aggregate functions
|
||||||
|
|
||||||
SELECT sum(p0010001) AS "County Sum",
|
SELECT sum(pop_est_2019) AS county_sum,
|
||||||
round(avg(p0010001), 0) AS "County Average",
|
round(avg(pop_est_2019), 0) AS county_average,
|
||||||
percentile_cont(.5)
|
percentile_cont(.5)
|
||||||
WITHIN GROUP (ORDER BY p0010001) AS "County Median"
|
WITHIN GROUP (ORDER BY pop_est_2019) AS county_median
|
||||||
FROM us_counties_2010;
|
FROM us_counties_pop_est_2019;
|
||||||
|
select * from us_counties_pop_est_2019 order by pop_est_2019 asc;
|
||||||
-- Listing 6-12: Passing an array of values to percentile_cont()
|
-- Listing 6-12: Passing an array of values to percentile_cont()
|
||||||
|
|
||||||
-- quartiles
|
-- quartiles
|
||||||
SELECT percentile_cont(array[.25,.5,.75])
|
SELECT percentile_cont(array[.25,.5,.75])
|
||||||
WITHIN GROUP (ORDER BY p0010001) AS "quartiles"
|
WITHIN GROUP (ORDER BY pop_est_2019) AS quartiles
|
||||||
FROM us_counties_2010;
|
FROM us_counties_pop_est_2019;
|
||||||
|
|
||||||
-- Extra:
|
-- Extra:
|
||||||
-- quintiles
|
-- quintiles
|
||||||
SELECT percentile_cont(array[.2,.4,.6,.8])
|
SELECT percentile_cont(array[.2,.4,.6,.8])
|
||||||
WITHIN GROUP (ORDER BY p0010001) AS "quintiles"
|
WITHIN GROUP (ORDER BY pop_est_2019) AS quintiles
|
||||||
FROM us_counties_2010;
|
FROM us_counties_pop_est_2019;
|
||||||
|
|
||||||
-- deciles
|
-- deciles
|
||||||
SELECT percentile_cont(array[.1,.2,.3,.4,.5,.6,.7,.8,.9])
|
SELECT percentile_cont(array[.1,.2,.3,.4,.5,.6,.7,.8,.9])
|
||||||
WITHIN GROUP (ORDER BY p0010001) AS "deciles"
|
WITHIN GROUP (ORDER BY pop_est_2019) AS deciles
|
||||||
FROM us_counties_2010;
|
FROM us_counties_pop_est_2019;
|
||||||
|
|
||||||
-- Listing 6-13: Using unnest() to turn an array into rows
|
-- Listing 6-13: Using unnest() to turn an array into rows
|
||||||
|
|
||||||
SELECT unnest(
|
SELECT unnest(
|
||||||
percentile_cont(array[.25,.5,.75])
|
percentile_cont(array[.25,.5,.75])
|
||||||
WITHIN GROUP (ORDER BY p0010001)
|
WITHIN GROUP (ORDER BY pop_est_2019)
|
||||||
) AS "quartiles"
|
) AS quartiles
|
||||||
FROM us_counties_2010;
|
FROM us_counties_pop_est_2019;
|
||||||
|
|
||||||
-- Listing 6-14: Creating a median() aggregate function in PostgreSQL
|
-- Listing 6-14: Creating a median() aggregate function in PostgreSQL
|
||||||
-- Source: https://wiki.postgresql.org/wiki/Aggregate_Median
|
-- Source: https://wiki.postgresql.org/wiki/Aggregate_Median
|
||||||
@ -194,14 +196,14 @@ CREATE AGGREGATE median(anyelement) (
|
|||||||
|
|
||||||
-- Listing 6-15: Using a median() aggregate function
|
-- Listing 6-15: Using a median() aggregate function
|
||||||
|
|
||||||
SELECT sum(p0010001) AS "County Sum",
|
SELECT sum(pop_est_2019) AS county_sum,
|
||||||
round(avg(p0010001), 0) AS "County Average",
|
round(avg(pop_est_2019), 0) AS county_average,
|
||||||
median(p0010001) AS "County Median",
|
median(pop_est_2019) AS county_median_func,
|
||||||
percentile_cont(.5)
|
percentile_cont(.5)
|
||||||
WITHIN GROUP (ORDER BY P0010001) AS "50th Percentile"
|
WITHIN GROUP (ORDER BY pop_est_2019) AS county_median_perc
|
||||||
FROM us_counties_2010;
|
FROM us_counties_pop_est_2019;
|
||||||
|
|
||||||
-- Listing 6-16: Finding the most-frequent value with mode()
|
-- Listing 6-16: Finding the most-frequent value with mode()
|
||||||
|
|
||||||
SELECT mode() WITHIN GROUP (ORDER BY p0010001)
|
SELECT mode() WITHIN GROUP (ORDER BY pop_est_2019)
|
||||||
FROM us_counties_2010;
|
FROM us_counties_pop_est_2019;
|
||||||
|
|||||||
@ -5,9 +5,11 @@
|
|||||||
-- Try It Yourself Questions and Answers
|
-- Try It Yourself Questions and Answers
|
||||||
----------------------------------------------------------------------------
|
----------------------------------------------------------------------------
|
||||||
|
|
||||||
--------------------------------------------------------------
|
|
||||||
|
|
||||||
|
----------------------------------------------------------------------------
|
||||||
-- Chapter 2: Creating Your First Database and Table
|
-- Chapter 2: Creating Your First Database and Table
|
||||||
--------------------------------------------------------------
|
----------------------------------------------------------------------------
|
||||||
|
|
||||||
-- 1. Imagine you're building a database to catalog all the animals at your
|
-- 1. Imagine you're building a database to catalog all the animals at your
|
||||||
-- local zoo. You want one table for tracking all the kinds of animals and
|
-- local zoo. You want one table for tracking all the kinds of animals and
|
||||||
@ -20,27 +22,30 @@
|
|||||||
-- The first table will hold the animal types and their conservation status:
|
-- The first table will hold the animal types and their conservation status:
|
||||||
|
|
||||||
CREATE TABLE animal_types (
|
CREATE TABLE animal_types (
|
||||||
animal_type_id bigserial CONSTRAINT animal_types_key PRIMARY KEY,
|
animal_type_id bigint PRIMARY KEY GENERATED ALWAYS AS IDENTITY,
|
||||||
common_name varchar(100) NOT NULL,
|
common_name text NOT NULL,
|
||||||
scientific_name varchar(100) NOT NULL,
|
scientific_name text NOT NULL,
|
||||||
conservation_status varchar(50) NOT NULL
|
conservation_status text NOT NULL,
|
||||||
|
CONSTRAINT common_name_unique UNIQUE (common_name)
|
||||||
);
|
);
|
||||||
|
|
||||||
-- Note that I have added keywords on some columns that define constraints
|
-- It's OK if your answer doesn't have all the keywords in the example above. Those
|
||||||
-- such as a PRIMARY KEY. You will learn about these in Chapters 6 and 7.
|
-- keywords reference concepts you'll learn in later chapters, including table
|
||||||
|
-- constraints, primary keys and and IDENTITY columns. What's important is that you
|
||||||
|
-- considered the individual data items you would want to track.
|
||||||
|
|
||||||
-- The second table will hold data on individual animals. Note that the
|
-- The second table will hold data on individual animals. Note that the
|
||||||
-- column animal_type_id references the column of the same name in the
|
-- column animal_type_id references the column of the same name in the
|
||||||
-- table animal types. This is a foreign key, which you will learn about in
|
-- table animal types. This is a foreign key, which you will learn about in
|
||||||
-- Chapter 7.
|
-- Chapter 8.
|
||||||
|
|
||||||
CREATE TABLE menagerie (
|
CREATE TABLE menagerie (
|
||||||
menagerie_id bigserial CONSTRAINT menagerie_key PRIMARY KEY,
|
menagerie_id bigint PRIMARY KEY GENERATED ALWAYS AS IDENTITY,
|
||||||
animal_type_id bigint REFERENCES animal_types (animal_type_id),
|
common_name text REFERENCES animal_types (common_name),
|
||||||
date_acquired date NOT NULL,
|
date_acquired date NOT NULL,
|
||||||
gender varchar(1),
|
gender text,
|
||||||
acquired_from varchar(100),
|
acquired_from text,
|
||||||
name varchar(100),
|
name text,
|
||||||
notes text
|
notes text
|
||||||
);
|
);
|
||||||
|
|
||||||
@ -54,15 +59,16 @@ VALUES ('Bengal Tiger', 'Panthera tigris tigris', 'Endangered'),
|
|||||||
('Arctic Wolf', 'Canis lupus arctos', 'Least Concern');
|
('Arctic Wolf', 'Canis lupus arctos', 'Least Concern');
|
||||||
-- data source: https://www.worldwildlife.org/species/directory?direction=desc&sort=extinction_status
|
-- data source: https://www.worldwildlife.org/species/directory?direction=desc&sort=extinction_status
|
||||||
|
|
||||||
INSERT INTO menagerie (animal_type_id, date_acquired, gender, acquired_from, name, notes)
|
INSERT INTO menagerie (common_name, date_acquired, gender, acquired_from, name, notes)
|
||||||
VALUES
|
VALUES
|
||||||
(1, '3/12/1996', 'F', 'Dhaka Zoo', 'Ariel', 'Healthy coat at last exam.'),
|
('Bengal Tiger', '3/12/1996', 'F', 'Dhaka Zoo', 'Ariel', 'Healthy coat at last exam.'),
|
||||||
(2, '9/30/2000', 'F', 'National Zoo', 'Freddy', 'Strong appetite.');
|
('Arctic Wolf', '9/30/2000', 'F', 'National Zoo', 'Freddy', 'Strong appetite.');
|
||||||
|
|
||||||
-- To view data via pgAdmin, in the object browser, right-click Tables and
|
-- To view data via pgAdmin, in the object browser, right-click Tables and
|
||||||
-- select Refresh. Then right-click the table name and select
|
-- select Refresh. Then right-click the table name and select
|
||||||
-- View/Edit Data > All Rows
|
-- View/Edit Data > All Rows
|
||||||
|
|
||||||
|
|
||||||
-- 2b. Create an additional INSERT statement for one of your tables. On purpose,
|
-- 2b. Create an additional INSERT statement for one of your tables. On purpose,
|
||||||
-- leave out one of the required commas separating the entries in the VALUES
|
-- leave out one of the required commas separating the entries in the VALUES
|
||||||
-- clause of the query. What is the error message? Does it help you find the
|
-- clause of the query. What is the error message? Does it help you find the
|
||||||
@ -73,9 +79,10 @@ VALUES
|
|||||||
INSERT INTO animal_types (common_name, scientific_name, conservation_status)
|
INSERT INTO animal_types (common_name, scientific_name, conservation_status)
|
||||||
VALUES ('Javan Rhino', 'Rhinoceros sondaicus' 'Critically Endangered');
|
VALUES ('Javan Rhino', 'Rhinoceros sondaicus' 'Critically Endangered');
|
||||||
|
|
||||||
--------------------------------------------------------------
|
|
||||||
|
----------------------------------------------------------------------------
|
||||||
-- Chapter 3: Beginning Data Exploration with SELECT
|
-- Chapter 3: Beginning Data Exploration with SELECT
|
||||||
--------------------------------------------------------------
|
----------------------------------------------------------------------------
|
||||||
|
|
||||||
-- 1. The school district superintendent asks for a list of teachers in each
|
-- 1. The school district superintendent asks for a list of teachers in each
|
||||||
-- school. Write a query that lists the schools in alphabetical order along
|
-- school. Write a query that lists the schools in alphabetical order along
|
||||||
@ -107,9 +114,9 @@ WHERE hire_date >= '2010-01-01'
|
|||||||
ORDER BY salary DESC;
|
ORDER BY salary DESC;
|
||||||
|
|
||||||
|
|
||||||
--------------------------------------------------------------
|
----------------------------------------------------------------------------
|
||||||
-- Chapter 4: Understanding Data Types
|
-- Chapter 4: Understanding Data Types
|
||||||
--------------------------------------------------------------
|
----------------------------------------------------------------------------
|
||||||
|
|
||||||
-- 1. Your company delivers fruit and vegetables to local grocery stores, and
|
-- 1. Your company delivers fruit and vegetables to local grocery stores, and
|
||||||
-- you need to track the mileage driven by each driver each day to a tenth
|
-- you need to track the mileage driven by each driver each day to a tenth
|
||||||
@ -136,11 +143,16 @@ numeric(4,1)
|
|||||||
-- Answer:
|
-- Answer:
|
||||||
|
|
||||||
varchar(50)
|
varchar(50)
|
||||||
|
-- or
|
||||||
|
text
|
||||||
|
|
||||||
-- 50 characters is a reasonable length for names, and varchar() ensures you
|
-- 50 characters is a reasonable length for names, and using either varchar(50)
|
||||||
-- will not waste space when names are shorter. Separating first and last names
|
-- or text ensures you will not waste space when names are shorter. Using text will
|
||||||
|
-- ensure that if you run into the exceptionally rare circumstance of a name longer
|
||||||
|
-- than 50 characters, you'll be covered. Also, separating first and last names
|
||||||
-- into their own columns will let you later sort on each independently.
|
-- into their own columns will let you later sort on each independently.
|
||||||
|
|
||||||
|
|
||||||
-- 3. Assume you have a text column that includes strings formatted as dates.
|
-- 3. Assume you have a text column that includes strings formatted as dates.
|
||||||
-- One of the strings is written as '4//2017'. What will happen when you try
|
-- One of the strings is written as '4//2017'. What will happen when you try
|
||||||
-- to convert that string to the timestamp data type?
|
-- to convert that string to the timestamp data type?
|
||||||
@ -152,15 +164,15 @@ varchar(50)
|
|||||||
SELECT CAST('4//2021' AS timestamp with time zone);
|
SELECT CAST('4//2021' AS timestamp with time zone);
|
||||||
|
|
||||||
|
|
||||||
--------------------------------------------------------------
|
----------------------------------------------------------------------------
|
||||||
-- Chapter 5: Importing and Exporting Data
|
-- Chapter 5: Importing and Exporting Data
|
||||||
--------------------------------------------------------------
|
----------------------------------------------------------------------------
|
||||||
|
|
||||||
-- 1. Write a WITH statement to include with COPY to handle the import of an
|
-- 1. Write a WITH statement to include with COPY to handle the import of an
|
||||||
-- imaginary text file that has a first couple of rows that look like this:
|
-- imaginary text file that has a first couple of rows that look like this:
|
||||||
|
|
||||||
-- id:movie:actor
|
id:movie:actor
|
||||||
-- 50:#Mission: Impossible#:Tom Cruise
|
50:#Mission: Impossible#:Tom Cruise
|
||||||
|
|
||||||
-- Answer: The WITH statement will need the options seen here:
|
-- Answer: The WITH statement will need the options seen here:
|
||||||
|
|
||||||
@ -179,13 +191,14 @@ CREATE TABLE actors (
|
|||||||
);
|
);
|
||||||
|
|
||||||
-- Note: You may never encounter a file that uses a colon as a delimiter and
|
-- Note: You may never encounter a file that uses a colon as a delimiter and
|
||||||
-- and pound sign for quoting, but anything is possible.
|
-- pound sign for quoting, but anything is possible!
|
||||||
|
|
||||||
-- 2. Using the table us_counties_pop_est_2019 you created and filled in this chapter,
|
|
||||||
-- export to a CSV file the 20 counties in the United States that have the most
|
-- 2. Using the table us_counties_pop_est_2019 you created and filled in this
|
||||||
-- housing units. Make sure you export only each county's name, state, and
|
-- chapter, export to a CSV file the 20 counties in the United States that had
|
||||||
-- number of housing units. (Hint: Housing units are totaled for each county in
|
-- the most births. Make sure you export only each county’s name, state, and
|
||||||
-- the column housing_unit_count_100_percent.
|
-- number of births. (Hint: births are totaled for each county in the column
|
||||||
|
-- births_2019.)
|
||||||
|
|
||||||
-- Answer:
|
-- Answer:
|
||||||
|
|
||||||
@ -193,12 +206,13 @@ COPY (
|
|||||||
SELECT county_name, state_name, births_2019
|
SELECT county_name, state_name, births_2019
|
||||||
FROM us_counties_pop_est_2019 ORDER BY births_2019 DESC LIMIT 20
|
FROM us_counties_pop_est_2019 ORDER BY births_2019 DESC LIMIT 20
|
||||||
)
|
)
|
||||||
TO 'C:\YourDirectory\us_counties_housing_export.txt'
|
TO 'C:\YourDirectory\us_counties_births_export.txt'
|
||||||
WITH (FORMAT CSV, HEADER);
|
WITH (FORMAT CSV, HEADER);
|
||||||
|
|
||||||
-- Note: This COPY statement uses a SELECT statement to limit the output to
|
-- Note: This COPY statement uses a SELECT statement to limit the output to
|
||||||
-- only the desired columns and rows.
|
-- only the desired columns and rows.
|
||||||
|
|
||||||
|
|
||||||
-- 3. Imagine you're importing a file that contains a column with these values:
|
-- 3. Imagine you're importing a file that contains a column with these values:
|
||||||
-- 17519.668
|
-- 17519.668
|
||||||
-- 20084.461
|
-- 20084.461
|
||||||
@ -212,9 +226,9 @@ WITH (FORMAT CSV, HEADER);
|
|||||||
-- type for the example data is numeric(8,3).
|
-- type for the example data is numeric(8,3).
|
||||||
|
|
||||||
|
|
||||||
--------------------------------------------------------------
|
----------------------------------------------------------------------------
|
||||||
-- Chapter 5: Basic Math and Stats with SQL
|
-- Chapter 6: Basic Math and Stats with SQL
|
||||||
--------------------------------------------------------------
|
----------------------------------------------------------------------------
|
||||||
|
|
||||||
-- 1. Write a SQL statement for calculating the area of a circle whose radius is
|
-- 1. Write a SQL statement for calculating the area of a circle whose radius is
|
||||||
-- 5 inches. Do you need parentheses in your calculation? Why or why not?
|
-- 5 inches. Do you need parentheses in your calculation? Why or why not?
|
||||||
@ -231,64 +245,68 @@ SELECT 3.14 * 5 ^ 2;
|
|||||||
|
|
||||||
SELECT 3.14 * (5 ^ 2);
|
SELECT 3.14 * (5 ^ 2);
|
||||||
|
|
||||||
-- 2. Using the 2010 Census county data, find out which New York state county
|
|
||||||
-- has the highest percentage of the population that identified as "American
|
-- 2. Using the 2019 Census county estimates data, calculate a ratio of births to
|
||||||
-- Indian/Alaska Native Alone." What can you learn about that county from online
|
-- deaths for each county in New York state. Which region of the state generally
|
||||||
-- research that explains the relatively large proportion of American Indian
|
-- saw a higher ratio of births to deaths in 2019?
|
||||||
-- population compared with other New York counties?
|
|
||||||
|
|
||||||
-- Answer:
|
-- Answer:
|
||||||
-- Franklin County, N.Y., with 7.4%. The county contains the St. Regis Mohawk
|
|
||||||
-- Reservation. https://en.wikipedia.org/wiki/St._Regis_Mohawk_Reservation
|
|
||||||
|
|
||||||
SELECT geo_name,
|
SELECT county_name,
|
||||||
state_us_abbreviation,
|
state_name,
|
||||||
p0010001 AS total_population,
|
births_2019 AS births,
|
||||||
p0010005 AS american_indian_alaska_native_alone,
|
deaths_2019 AS DEATHS,
|
||||||
(CAST (p0010005 AS numeric(8,1)) / p0010001) * 100
|
births_2019::numeric / deaths_2019 AS birth_death_ratio
|
||||||
AS percent_american_indian_alaska_native_alone
|
FROM us_counties_pop_est_2019
|
||||||
FROM us_counties_2010
|
WHERE state_name = 'New York'
|
||||||
WHERE state_us_abbreviation = 'NY'
|
ORDER BY birth_death_ratio DESC;
|
||||||
ORDER BY percent_american_indian_alaska_native_alone DESC;
|
|
||||||
|
|
||||||
-- 3. Was the 2010 median county population higher in California or New York?
|
-- Generally, counties in and around New York City had the highest ratio of births
|
||||||
|
-- to deaths in the 2019 estimates. One exception to the trend is Jefferson County,
|
||||||
|
-- which is upstate on the U.S./Canadian border.
|
||||||
|
|
||||||
|
|
||||||
|
-- 3. Was the 2019 median county population estimate higher in California or New York?
|
||||||
|
|
||||||
-- Answer:
|
-- Answer:
|
||||||
-- California had a median county population of 179,140.5 in 2010, almost double
|
-- California had a median county population estimate of 187,029 in 2019, almost double
|
||||||
-- that of New York, at 91,301. Here are two solutions:
|
-- that of New York, at 86,687. Here are two solutions:
|
||||||
|
|
||||||
-- First, you can find the median for each state one at a time:
|
-- First, you can find the median for each state one at a time:
|
||||||
|
|
||||||
SELECT percentile_cont(.5)
|
SELECT percentile_cont(.5)
|
||||||
WITHIN GROUP (ORDER BY p0010001)
|
WITHIN GROUP (ORDER BY pop_est_2019)
|
||||||
FROM us_counties_2010
|
FROM us_counties_pop_est_2019
|
||||||
WHERE state_us_abbreviation = 'NY';
|
WHERE state_name = 'New York';
|
||||||
|
|
||||||
SELECT percentile_cont(.5)
|
SELECT percentile_cont(.5)
|
||||||
WITHIN GROUP (ORDER BY p0010001)
|
WITHIN GROUP (ORDER BY pop_est_2019)
|
||||||
FROM us_counties_2010
|
FROM us_counties_pop_est_2019
|
||||||
WHERE state_us_abbreviation = 'CA';
|
WHERE state_name = 'California';
|
||||||
|
|
||||||
-- Or both in one query (credit: https://github.com/Kennith-eng)
|
-- Or both in one query (credit: https://github.com/Kennith-eng)
|
||||||
|
|
||||||
SELECT state_us_abbreviation,
|
SELECT state_name,
|
||||||
percentile_cont(0.5)
|
percentile_cont(0.5)
|
||||||
WITHIN GROUP (ORDER BY p0010001) AS median
|
WITHIN GROUP (ORDER BY pop_est_2019) AS median
|
||||||
FROM us_counties_2010
|
FROM us_counties_pop_est_2019
|
||||||
WHERE state_us_abbreviation IN ('NY', 'CA')
|
WHERE state_name IN ('New York', 'California')
|
||||||
GROUP BY state_us_abbreviation;
|
GROUP BY state_name;
|
||||||
|
|
||||||
-- Finally, this query shows the median for each state:
|
-- Finally, this query shows the median for each state:
|
||||||
|
|
||||||
SELECT state_us_abbreviation,
|
SELECT state_name,
|
||||||
percentile_cont(0.5)
|
percentile_cont(0.5)
|
||||||
WITHIN GROUP (ORDER BY p0010001) AS median
|
WITHIN GROUP (ORDER BY pop_est_2019) AS median
|
||||||
FROM us_counties_2010
|
FROM us_counties_pop_est_2019
|
||||||
GROUP BY state_us_abbreviation;
|
GROUP BY state_name;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
--------------------------------------------------------------
|
--------------------------------------------------------------
|
||||||
-- Chapter 6: Joining Tables in a Relational Database
|
-- Chapter 7: Joining Tables in a Relational Database
|
||||||
--------------------------------------------------------------
|
--------------------------------------------------------------
|
||||||
|
|
||||||
-- 1. The table us_counties_2010 contains 3,143 rows, and us_counties_2000 has
|
-- 1. The table us_counties_2010 contains 3,143 rows, and us_counties_2000 has
|
||||||
@ -368,7 +386,7 @@ ON c2010.state_fips = c2000.state_fips
|
|||||||
ORDER BY pct_change ASC;
|
ORDER BY pct_change ASC;
|
||||||
|
|
||||||
--------------------------------------------------------------
|
--------------------------------------------------------------
|
||||||
-- Chapter 7: Table Design that Works for You
|
-- Chapter 8: Table Design that Works for You
|
||||||
--------------------------------------------------------------
|
--------------------------------------------------------------
|
||||||
|
|
||||||
-- Consider the following two tables from a database you’re making to keep
|
-- Consider the following two tables from a database you’re making to keep
|
||||||
@ -461,7 +479,7 @@ CREATE TABLE songs (
|
|||||||
|
|
||||||
|
|
||||||
----------------------------------------------------------------
|
----------------------------------------------------------------
|
||||||
-- Chapter 8: Extracting Information by Grouping and Summarizing
|
-- Chapter 9: Extracting Information by Grouping and Summarizing
|
||||||
----------------------------------------------------------------
|
----------------------------------------------------------------
|
||||||
|
|
||||||
-- 1. We saw that library visits have declined in most places. But what is the
|
-- 1. We saw that library visits have declined in most places. But what is the
|
||||||
@ -582,7 +600,7 @@ WHERE pls14.fscskey IS NULL OR pls09.fscskey IS NULL;
|
|||||||
-- that do not appear in both tables.
|
-- that do not appear in both tables.
|
||||||
|
|
||||||
--------------------------------------------------------------
|
--------------------------------------------------------------
|
||||||
-- Chapter 9: Inspecting and Modifying Data
|
-- Chapter 10: Inspecting and Modifying Data
|
||||||
--------------------------------------------------------------
|
--------------------------------------------------------------
|
||||||
|
|
||||||
-- In this exercise, you’ll turn the meat_poultry_egg_inspect table into useful
|
-- In this exercise, you’ll turn the meat_poultry_egg_inspect table into useful
|
||||||
@ -636,7 +654,7 @@ WHERE meat_processing = TRUE AND
|
|||||||
poultry_processing = TRUE;
|
poultry_processing = TRUE;
|
||||||
|
|
||||||
--------------------------------------------------------------
|
--------------------------------------------------------------
|
||||||
-- Chapter 10: Statistical Functions in SQL
|
-- Chapter 11: Statistical Functions in SQL
|
||||||
--------------------------------------------------------------
|
--------------------------------------------------------------
|
||||||
|
|
||||||
-- 1. In Listing 10-2, the correlation coefficient, or r value, of the
|
-- 1. In Listing 10-2, the correlation coefficient, or r value, of the
|
||||||
@ -720,7 +738,7 @@ WHERE popu_lsa >= 250000;
|
|||||||
|
|
||||||
|
|
||||||
--------------------------------------------------------------
|
--------------------------------------------------------------
|
||||||
-- Chapter 11: Working with Dates and Times
|
-- Chapter 12: Working with Dates and Times
|
||||||
--------------------------------------------------------------
|
--------------------------------------------------------------
|
||||||
|
|
||||||
-- 1. Using the New York City taxi data, calculate the length of each ride using
|
-- 1. Using the New York City taxi data, calculate the length of each ride using
|
||||||
@ -787,7 +805,7 @@ WHERE tpep_dropoff_datetime - tpep_pickup_datetime <= '3 hours'::interval;
|
|||||||
-- expect this given that cost of a taxi ride is based on both time and distance.
|
-- expect this given that cost of a taxi ride is based on both time and distance.
|
||||||
|
|
||||||
--------------------------------------------------------------
|
--------------------------------------------------------------
|
||||||
-- Chapter 12: Advanced Query Techniques
|
-- Chapter 13: Advanced Query Techniques
|
||||||
--------------------------------------------------------------
|
--------------------------------------------------------------
|
||||||
|
|
||||||
-- 1. Revise the code in Listing 12-15 to dig deeper into the nuances of
|
-- 1. Revise the code in Listing 12-15 to dig deeper into the nuances of
|
||||||
@ -858,7 +876,7 @@ AS (flavor varchar(20),
|
|||||||
|
|
||||||
|
|
||||||
-------------------------------------------------------------
|
-------------------------------------------------------------
|
||||||
-- Chapter 13: Mining Text to Find Meaningful Data
|
-- Chapter 14: Mining Text to Find Meaningful Data
|
||||||
--------------------------------------------------------------
|
--------------------------------------------------------------
|
||||||
|
|
||||||
-- 1. The style guide of a publishing company you're writing for wants you to
|
-- 1. The style guide of a publishing company you're writing for wants you to
|
||||||
@ -934,7 +952,7 @@ LIMIT 5;
|
|||||||
|
|
||||||
|
|
||||||
--------------------------------------------------------------
|
--------------------------------------------------------------
|
||||||
-- Chapter 14: Analyzing Spatial Data with PostGIS
|
-- Chapter 15: Analyzing Spatial Data with PostGIS
|
||||||
--------------------------------------------------------------
|
--------------------------------------------------------------
|
||||||
|
|
||||||
-- 1. Earlier, you found which US county has the largest area. Now,
|
-- 1. Earlier, you found which US county has the largest area. Now,
|
||||||
@ -1004,7 +1022,14 @@ ORDER BY census.statefp10, census.name10;
|
|||||||
-- Can you spot it?
|
-- Can you spot it?
|
||||||
|
|
||||||
--------------------------------------------------------------
|
--------------------------------------------------------------
|
||||||
-- Chapter 15: Saving Time with Views, Functions, and Triggers
|
-- Chapter 16: Working with JSON Data
|
||||||
|
--------------------------------------------------------------
|
||||||
|
|
||||||
|
-- To come ...
|
||||||
|
|
||||||
|
|
||||||
|
--------------------------------------------------------------
|
||||||
|
-- Chapter 17: Saving Time with Views, Functions, and Triggers
|
||||||
--------------------------------------------------------------
|
--------------------------------------------------------------
|
||||||
|
|
||||||
-- 1. Create a view that displays the number of New York City taxi trips per
|
-- 1. Create a view that displays the number of New York City taxi trips per
|
||||||
@ -1085,14 +1110,14 @@ SELECT * FROM meat_poultry_egg_inspect
|
|||||||
WHERE company = 'testcompany';
|
WHERE company = 'testcompany';
|
||||||
|
|
||||||
--------------------------------------------------------------
|
--------------------------------------------------------------
|
||||||
-- Chapter 16: Using PostgreSQL From the Command Line
|
-- Chapter 18: Using PostgreSQL From the Command Line
|
||||||
--------------------------------------------------------------
|
--------------------------------------------------------------
|
||||||
|
|
||||||
-- For this chapter, use psql to review any of the exercises in the book.
|
-- For this chapter, use psql to review any of the exercises in the book.
|
||||||
|
|
||||||
|
|
||||||
--------------------------------------------------------------
|
--------------------------------------------------------------
|
||||||
-- Chapter 17: Maintaining Your Database
|
-- Chapter 19: Maintaining Your Database
|
||||||
--------------------------------------------------------------
|
--------------------------------------------------------------
|
||||||
|
|
||||||
-- To back up the gis_analysis database, use the pg_dump utility at the command line:
|
-- To back up the gis_analysis database, use the pg_dump utility at the command line:
|
||||||
@ -1100,7 +1125,7 @@ WHERE company = 'testcompany';
|
|||||||
|
|
||||||
|
|
||||||
-----------------------------------------------------------------
|
-----------------------------------------------------------------
|
||||||
-- Chapter 18: Identifying and Telling the Story Behind Your Data
|
-- Chapter 20: Identifying and Telling the Story Behind Your Data
|
||||||
-----------------------------------------------------------------
|
-----------------------------------------------------------------
|
||||||
|
|
||||||
-- This is a non-coding chapter.
|
-- This is a non-coding chapter.
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user