Updates to Chapter 6 and Try It Yourself
This commit is contained in:
parent
b0f23d78a4
commit
360bade5a2
@ -1,9 +1,9 @@
|
||||
--------------------------------------------------------------
|
||||
-- Practical SQL: A Beginner's Guide to Storytelling with Data
|
||||
---------------------------------------------------------------------------
|
||||
-- Practical SQL: A Beginner's Guide to Storytelling with Data, 2nd Edition
|
||||
-- by Anthony DeBarros
|
||||
|
||||
-- Chapter 6 Code Examples
|
||||
--------------------------------------------------------------
|
||||
----------------------------------------------------------------------------
|
||||
|
||||
-- Listing 6-1: Basic addition, subtraction and multiplication with SQL
|
||||
|
||||
@ -34,79 +34,81 @@ SELECT (7 + 8) * 9; -- answer: 135
|
||||
SELECT 3 ^ 3 - 1; -- answer: 26
|
||||
SELECT 3 ^ (3 - 1); -- answer: 9
|
||||
|
||||
-- Listing 6-4: Selecting Census population columns by race with aliases
|
||||
-- Listing 6-4: Selecting Census population estimate columns with aliases
|
||||
|
||||
SELECT geo_name,
|
||||
state_us_abbreviation AS "st",
|
||||
p0010001 AS "Total Population",
|
||||
p0010003 AS "White Alone",
|
||||
p0010004 AS "Black or African American Alone",
|
||||
p0010005 AS "Am Indian/Alaska Native Alone",
|
||||
p0010006 AS "Asian Alone",
|
||||
p0010007 AS "Native Hawaiian and Other Pacific Islander Alone",
|
||||
p0010008 AS "Some Other Race Alone",
|
||||
p0010009 AS "Two or More Races"
|
||||
FROM us_counties_2010;
|
||||
SELECT county_name AS county,
|
||||
state_name AS state,
|
||||
pop_est_2019 AS pop,
|
||||
births_2019 AS births,
|
||||
deaths_2019 AS deaths,
|
||||
international_migr_2019 AS int_migr,
|
||||
domestic_migr_2019 AS dom_migr,
|
||||
residual_2019 AS residual
|
||||
FROM us_counties_pop_est_2019;
|
||||
|
||||
-- Listing 6-5: Adding two columns in us_counties_2010
|
||||
SELECT * FROM us_counties_pop_est_2019;
|
||||
|
||||
SELECT geo_name,
|
||||
state_us_abbreviation AS "st",
|
||||
p0010003 AS "White Alone",
|
||||
p0010004 AS "Black Alone",
|
||||
p0010003 + p0010004 AS "Total White and Black"
|
||||
FROM us_counties_2010;
|
||||
-- Listing 6-5: Subtracting two columns in us_counties_pop_est_2019
|
||||
|
||||
SELECT county_name AS county,
|
||||
state_name AS state,
|
||||
births_2019 AS births,
|
||||
deaths_2019 AS deaths,
|
||||
births_2019 - deaths_2019 AS natural_increase
|
||||
FROM us_counties_pop_est_2019;
|
||||
|
||||
-- Listing 6-6: Checking Census data totals
|
||||
|
||||
SELECT geo_name,
|
||||
state_us_abbreviation AS "st",
|
||||
p0010001 AS "Total",
|
||||
p0010003 + p0010004 + p0010005 + p0010006 + p0010007
|
||||
+ p0010008 + p0010009 AS "All Races",
|
||||
(p0010003 + p0010004 + p0010005 + p0010006 + p0010007
|
||||
+ p0010008 + p0010009) - p0010001 AS "Difference"
|
||||
FROM us_counties_2010
|
||||
ORDER BY "Difference" DESC;
|
||||
SELECT county_name AS county,
|
||||
state_name AS state,
|
||||
pop_est_2019 AS pop,
|
||||
pop_est_2018 + births_2019 - deaths_2019 +
|
||||
international_migr_2019 + domestic_migr_2019 +
|
||||
residual_2019 AS components_total,
|
||||
pop_est_2019 - (pop_est_2018 + births_2019 - deaths_2019 +
|
||||
international_migr_2019 + domestic_migr_2019 +
|
||||
residual_2019) AS difference
|
||||
FROM us_counties_pop_est_2019
|
||||
ORDER BY difference DESC;
|
||||
|
||||
-- Listing 6-7: Calculating the percent of the population that is
|
||||
-- Asian by county (percent of the whole)
|
||||
-- Listing 6-7: Calculating the percent of a county's area that is water
|
||||
|
||||
SELECT county_name AS county,
|
||||
state_name AS state,
|
||||
area_water::numeric / (area_land + area_water) * 100 AS pct_water
|
||||
FROM us_counties_pop_est_2019
|
||||
ORDER BY pct_water DESC;
|
||||
|
||||
SELECT geo_name,
|
||||
state_us_abbreviation AS "st",
|
||||
(CAST(p0010006 AS numeric(8,1)) / p0010001) * 100 AS "pct_asian"
|
||||
FROM us_counties_2010
|
||||
ORDER BY "pct_asian" DESC;
|
||||
|
||||
-- Listing 6-8: Calculating percent change
|
||||
|
||||
CREATE TABLE percent_change (
|
||||
department varchar(20),
|
||||
spend_2014 numeric(10,2),
|
||||
spend_2017 numeric(10,2)
|
||||
department text,
|
||||
spend_2019 numeric(10,2),
|
||||
spend_2022 numeric(10,2)
|
||||
);
|
||||
|
||||
INSERT INTO percent_change
|
||||
VALUES
|
||||
('Building', 250000, 289000),
|
||||
('Assessor', 178556, 179500),
|
||||
('Library', 87777, 90001),
|
||||
('Building', 250000, 289000),
|
||||
('Clerk', 451980, 650000),
|
||||
('Police', 250000, 223000),
|
||||
('Recreation', 199000, 195000);
|
||||
('Library', 87777, 90001),
|
||||
('Parks', 250000, 223000),
|
||||
('Water', 199000, 195000);
|
||||
|
||||
SELECT department,
|
||||
spend_2014,
|
||||
spend_2017,
|
||||
round( (spend_2017 - spend_2014) /
|
||||
spend_2014 * 100, 1 ) AS "pct_change"
|
||||
spend_2019,
|
||||
spend_2022,
|
||||
round( (spend_2022 - spend_2019) /
|
||||
spend_2019 * 100, 1 ) AS pct_change
|
||||
FROM percent_change;
|
||||
|
||||
-- Listing 6-9: Using sum() and avg() aggregate functions
|
||||
|
||||
SELECT sum(p0010001) AS "County Sum",
|
||||
round(avg(p0010001), 0) AS "County Average"
|
||||
FROM us_counties_2010;
|
||||
SELECT sum(pop_est_2019) AS county_sum,
|
||||
round(avg(pop_est_2019), 0) AS county_average
|
||||
FROM us_counties_pop_est_2019;
|
||||
|
||||
-- Listing 6-10: Testing SQL percentile functions
|
||||
|
||||
@ -126,37 +128,37 @@ FROM percentile_test;
|
||||
|
||||
-- Listing 6-11: Using sum(), avg(), and percentile_cont() aggregate functions
|
||||
|
||||
SELECT sum(p0010001) AS "County Sum",
|
||||
round(avg(p0010001), 0) AS "County Average",
|
||||
SELECT sum(pop_est_2019) AS county_sum,
|
||||
round(avg(pop_est_2019), 0) AS county_average,
|
||||
percentile_cont(.5)
|
||||
WITHIN GROUP (ORDER BY p0010001) AS "County Median"
|
||||
FROM us_counties_2010;
|
||||
|
||||
WITHIN GROUP (ORDER BY pop_est_2019) AS county_median
|
||||
FROM us_counties_pop_est_2019;
|
||||
select * from us_counties_pop_est_2019 order by pop_est_2019 asc;
|
||||
-- Listing 6-12: Passing an array of values to percentile_cont()
|
||||
|
||||
-- quartiles
|
||||
SELECT percentile_cont(array[.25,.5,.75])
|
||||
WITHIN GROUP (ORDER BY p0010001) AS "quartiles"
|
||||
FROM us_counties_2010;
|
||||
WITHIN GROUP (ORDER BY pop_est_2019) AS quartiles
|
||||
FROM us_counties_pop_est_2019;
|
||||
|
||||
-- Extra:
|
||||
-- quintiles
|
||||
SELECT percentile_cont(array[.2,.4,.6,.8])
|
||||
WITHIN GROUP (ORDER BY p0010001) AS "quintiles"
|
||||
FROM us_counties_2010;
|
||||
WITHIN GROUP (ORDER BY pop_est_2019) AS quintiles
|
||||
FROM us_counties_pop_est_2019;
|
||||
|
||||
-- deciles
|
||||
SELECT percentile_cont(array[.1,.2,.3,.4,.5,.6,.7,.8,.9])
|
||||
WITHIN GROUP (ORDER BY p0010001) AS "deciles"
|
||||
FROM us_counties_2010;
|
||||
WITHIN GROUP (ORDER BY pop_est_2019) AS deciles
|
||||
FROM us_counties_pop_est_2019;
|
||||
|
||||
-- Listing 6-13: Using unnest() to turn an array into rows
|
||||
|
||||
SELECT unnest(
|
||||
percentile_cont(array[.25,.5,.75])
|
||||
WITHIN GROUP (ORDER BY p0010001)
|
||||
) AS "quartiles"
|
||||
FROM us_counties_2010;
|
||||
WITHIN GROUP (ORDER BY pop_est_2019)
|
||||
) AS quartiles
|
||||
FROM us_counties_pop_est_2019;
|
||||
|
||||
-- Listing 6-14: Creating a median() aggregate function in PostgreSQL
|
||||
-- Source: https://wiki.postgresql.org/wiki/Aggregate_Median
|
||||
@ -194,14 +196,14 @@ CREATE AGGREGATE median(anyelement) (
|
||||
|
||||
-- Listing 6-15: Using a median() aggregate function
|
||||
|
||||
SELECT sum(p0010001) AS "County Sum",
|
||||
round(avg(p0010001), 0) AS "County Average",
|
||||
median(p0010001) AS "County Median",
|
||||
SELECT sum(pop_est_2019) AS county_sum,
|
||||
round(avg(pop_est_2019), 0) AS county_average,
|
||||
median(pop_est_2019) AS county_median_func,
|
||||
percentile_cont(.5)
|
||||
WITHIN GROUP (ORDER BY P0010001) AS "50th Percentile"
|
||||
FROM us_counties_2010;
|
||||
WITHIN GROUP (ORDER BY pop_est_2019) AS county_median_perc
|
||||
FROM us_counties_pop_est_2019;
|
||||
|
||||
-- Listing 6-16: Finding the most-frequent value with mode()
|
||||
|
||||
SELECT mode() WITHIN GROUP (ORDER BY p0010001)
|
||||
FROM us_counties_2010;
|
||||
SELECT mode() WITHIN GROUP (ORDER BY pop_est_2019)
|
||||
FROM us_counties_pop_est_2019;
|
||||
|
||||
@ -5,9 +5,11 @@
|
||||
-- Try It Yourself Questions and Answers
|
||||
----------------------------------------------------------------------------
|
||||
|
||||
--------------------------------------------------------------
|
||||
|
||||
|
||||
----------------------------------------------------------------------------
|
||||
-- Chapter 2: Creating Your First Database and Table
|
||||
--------------------------------------------------------------
|
||||
----------------------------------------------------------------------------
|
||||
|
||||
-- 1. Imagine you're building a database to catalog all the animals at your
|
||||
-- local zoo. You want one table for tracking all the kinds of animals and
|
||||
@ -20,27 +22,30 @@
|
||||
-- The first table will hold the animal types and their conservation status:
|
||||
|
||||
CREATE TABLE animal_types (
|
||||
animal_type_id bigserial CONSTRAINT animal_types_key PRIMARY KEY,
|
||||
common_name varchar(100) NOT NULL,
|
||||
scientific_name varchar(100) NOT NULL,
|
||||
conservation_status varchar(50) NOT NULL
|
||||
animal_type_id bigint PRIMARY KEY GENERATED ALWAYS AS IDENTITY,
|
||||
common_name text NOT NULL,
|
||||
scientific_name text NOT NULL,
|
||||
conservation_status text NOT NULL,
|
||||
CONSTRAINT common_name_unique UNIQUE (common_name)
|
||||
);
|
||||
|
||||
-- Note that I have added keywords on some columns that define constraints
|
||||
-- such as a PRIMARY KEY. You will learn about these in Chapters 6 and 7.
|
||||
-- It's OK if your answer doesn't have all the keywords in the example above. Those
|
||||
-- keywords reference concepts you'll learn in later chapters, including table
|
||||
-- constraints, primary keys and and IDENTITY columns. What's important is that you
|
||||
-- considered the individual data items you would want to track.
|
||||
|
||||
-- The second table will hold data on individual animals. Note that the
|
||||
-- column animal_type_id references the column of the same name in the
|
||||
-- table animal types. This is a foreign key, which you will learn about in
|
||||
-- Chapter 7.
|
||||
-- Chapter 8.
|
||||
|
||||
CREATE TABLE menagerie (
|
||||
menagerie_id bigserial CONSTRAINT menagerie_key PRIMARY KEY,
|
||||
animal_type_id bigint REFERENCES animal_types (animal_type_id),
|
||||
menagerie_id bigint PRIMARY KEY GENERATED ALWAYS AS IDENTITY,
|
||||
common_name text REFERENCES animal_types (common_name),
|
||||
date_acquired date NOT NULL,
|
||||
gender varchar(1),
|
||||
acquired_from varchar(100),
|
||||
name varchar(100),
|
||||
gender text,
|
||||
acquired_from text,
|
||||
name text,
|
||||
notes text
|
||||
);
|
||||
|
||||
@ -54,15 +59,16 @@ VALUES ('Bengal Tiger', 'Panthera tigris tigris', 'Endangered'),
|
||||
('Arctic Wolf', 'Canis lupus arctos', 'Least Concern');
|
||||
-- data source: https://www.worldwildlife.org/species/directory?direction=desc&sort=extinction_status
|
||||
|
||||
INSERT INTO menagerie (animal_type_id, date_acquired, gender, acquired_from, name, notes)
|
||||
INSERT INTO menagerie (common_name, date_acquired, gender, acquired_from, name, notes)
|
||||
VALUES
|
||||
(1, '3/12/1996', 'F', 'Dhaka Zoo', 'Ariel', 'Healthy coat at last exam.'),
|
||||
(2, '9/30/2000', 'F', 'National Zoo', 'Freddy', 'Strong appetite.');
|
||||
('Bengal Tiger', '3/12/1996', 'F', 'Dhaka Zoo', 'Ariel', 'Healthy coat at last exam.'),
|
||||
('Arctic Wolf', '9/30/2000', 'F', 'National Zoo', 'Freddy', 'Strong appetite.');
|
||||
|
||||
-- To view data via pgAdmin, in the object browser, right-click Tables and
|
||||
-- select Refresh. Then right-click the table name and select
|
||||
-- View/Edit Data > All Rows
|
||||
|
||||
|
||||
-- 2b. Create an additional INSERT statement for one of your tables. On purpose,
|
||||
-- leave out one of the required commas separating the entries in the VALUES
|
||||
-- clause of the query. What is the error message? Does it help you find the
|
||||
@ -73,9 +79,10 @@ VALUES
|
||||
INSERT INTO animal_types (common_name, scientific_name, conservation_status)
|
||||
VALUES ('Javan Rhino', 'Rhinoceros sondaicus' 'Critically Endangered');
|
||||
|
||||
--------------------------------------------------------------
|
||||
|
||||
----------------------------------------------------------------------------
|
||||
-- Chapter 3: Beginning Data Exploration with SELECT
|
||||
--------------------------------------------------------------
|
||||
----------------------------------------------------------------------------
|
||||
|
||||
-- 1. The school district superintendent asks for a list of teachers in each
|
||||
-- school. Write a query that lists the schools in alphabetical order along
|
||||
@ -107,9 +114,9 @@ WHERE hire_date >= '2010-01-01'
|
||||
ORDER BY salary DESC;
|
||||
|
||||
|
||||
--------------------------------------------------------------
|
||||
----------------------------------------------------------------------------
|
||||
-- Chapter 4: Understanding Data Types
|
||||
--------------------------------------------------------------
|
||||
----------------------------------------------------------------------------
|
||||
|
||||
-- 1. Your company delivers fruit and vegetables to local grocery stores, and
|
||||
-- you need to track the mileage driven by each driver each day to a tenth
|
||||
@ -136,11 +143,16 @@ numeric(4,1)
|
||||
-- Answer:
|
||||
|
||||
varchar(50)
|
||||
-- or
|
||||
text
|
||||
|
||||
-- 50 characters is a reasonable length for names, and varchar() ensures you
|
||||
-- will not waste space when names are shorter. Separating first and last names
|
||||
-- 50 characters is a reasonable length for names, and using either varchar(50)
|
||||
-- or text ensures you will not waste space when names are shorter. Using text will
|
||||
-- ensure that if you run into the exceptionally rare circumstance of a name longer
|
||||
-- than 50 characters, you'll be covered. Also, separating first and last names
|
||||
-- into their own columns will let you later sort on each independently.
|
||||
|
||||
|
||||
-- 3. Assume you have a text column that includes strings formatted as dates.
|
||||
-- One of the strings is written as '4//2017'. What will happen when you try
|
||||
-- to convert that string to the timestamp data type?
|
||||
@ -152,15 +164,15 @@ varchar(50)
|
||||
SELECT CAST('4//2021' AS timestamp with time zone);
|
||||
|
||||
|
||||
--------------------------------------------------------------
|
||||
----------------------------------------------------------------------------
|
||||
-- Chapter 5: Importing and Exporting Data
|
||||
--------------------------------------------------------------
|
||||
----------------------------------------------------------------------------
|
||||
|
||||
-- 1. Write a WITH statement to include with COPY to handle the import of an
|
||||
-- imaginary text file that has a first couple of rows that look like this:
|
||||
|
||||
-- id:movie:actor
|
||||
-- 50:#Mission: Impossible#:Tom Cruise
|
||||
id:movie:actor
|
||||
50:#Mission: Impossible#:Tom Cruise
|
||||
|
||||
-- Answer: The WITH statement will need the options seen here:
|
||||
|
||||
@ -179,13 +191,14 @@ CREATE TABLE actors (
|
||||
);
|
||||
|
||||
-- Note: You may never encounter a file that uses a colon as a delimiter and
|
||||
-- and pound sign for quoting, but anything is possible.
|
||||
-- pound sign for quoting, but anything is possible!
|
||||
|
||||
-- 2. Using the table us_counties_pop_est_2019 you created and filled in this chapter,
|
||||
-- export to a CSV file the 20 counties in the United States that have the most
|
||||
-- housing units. Make sure you export only each county's name, state, and
|
||||
-- number of housing units. (Hint: Housing units are totaled for each county in
|
||||
-- the column housing_unit_count_100_percent.
|
||||
|
||||
-- 2. Using the table us_counties_pop_est_2019 you created and filled in this
|
||||
-- chapter, export to a CSV file the 20 counties in the United States that had
|
||||
-- the most births. Make sure you export only each county’s name, state, and
|
||||
-- number of births. (Hint: births are totaled for each county in the column
|
||||
-- births_2019.)
|
||||
|
||||
-- Answer:
|
||||
|
||||
@ -193,12 +206,13 @@ COPY (
|
||||
SELECT county_name, state_name, births_2019
|
||||
FROM us_counties_pop_est_2019 ORDER BY births_2019 DESC LIMIT 20
|
||||
)
|
||||
TO 'C:\YourDirectory\us_counties_housing_export.txt'
|
||||
TO 'C:\YourDirectory\us_counties_births_export.txt'
|
||||
WITH (FORMAT CSV, HEADER);
|
||||
|
||||
-- Note: This COPY statement uses a SELECT statement to limit the output to
|
||||
-- only the desired columns and rows.
|
||||
|
||||
|
||||
-- 3. Imagine you're importing a file that contains a column with these values:
|
||||
-- 17519.668
|
||||
-- 20084.461
|
||||
@ -212,9 +226,9 @@ WITH (FORMAT CSV, HEADER);
|
||||
-- type for the example data is numeric(8,3).
|
||||
|
||||
|
||||
--------------------------------------------------------------
|
||||
-- Chapter 5: Basic Math and Stats with SQL
|
||||
--------------------------------------------------------------
|
||||
----------------------------------------------------------------------------
|
||||
-- Chapter 6: Basic Math and Stats with SQL
|
||||
----------------------------------------------------------------------------
|
||||
|
||||
-- 1. Write a SQL statement for calculating the area of a circle whose radius is
|
||||
-- 5 inches. Do you need parentheses in your calculation? Why or why not?
|
||||
@ -231,64 +245,68 @@ SELECT 3.14 * 5 ^ 2;
|
||||
|
||||
SELECT 3.14 * (5 ^ 2);
|
||||
|
||||
-- 2. Using the 2010 Census county data, find out which New York state county
|
||||
-- has the highest percentage of the population that identified as "American
|
||||
-- Indian/Alaska Native Alone." What can you learn about that county from online
|
||||
-- research that explains the relatively large proportion of American Indian
|
||||
-- population compared with other New York counties?
|
||||
|
||||
-- 2. Using the 2019 Census county estimates data, calculate a ratio of births to
|
||||
-- deaths for each county in New York state. Which region of the state generally
|
||||
-- saw a higher ratio of births to deaths in 2019?
|
||||
|
||||
-- Answer:
|
||||
-- Franklin County, N.Y., with 7.4%. The county contains the St. Regis Mohawk
|
||||
-- Reservation. https://en.wikipedia.org/wiki/St._Regis_Mohawk_Reservation
|
||||
|
||||
SELECT geo_name,
|
||||
state_us_abbreviation,
|
||||
p0010001 AS total_population,
|
||||
p0010005 AS american_indian_alaska_native_alone,
|
||||
(CAST (p0010005 AS numeric(8,1)) / p0010001) * 100
|
||||
AS percent_american_indian_alaska_native_alone
|
||||
FROM us_counties_2010
|
||||
WHERE state_us_abbreviation = 'NY'
|
||||
ORDER BY percent_american_indian_alaska_native_alone DESC;
|
||||
SELECT county_name,
|
||||
state_name,
|
||||
births_2019 AS births,
|
||||
deaths_2019 AS DEATHS,
|
||||
births_2019::numeric / deaths_2019 AS birth_death_ratio
|
||||
FROM us_counties_pop_est_2019
|
||||
WHERE state_name = 'New York'
|
||||
ORDER BY birth_death_ratio DESC;
|
||||
|
||||
-- 3. Was the 2010 median county population higher in California or New York?
|
||||
-- Generally, counties in and around New York City had the highest ratio of births
|
||||
-- to deaths in the 2019 estimates. One exception to the trend is Jefferson County,
|
||||
-- which is upstate on the U.S./Canadian border.
|
||||
|
||||
|
||||
-- 3. Was the 2019 median county population estimate higher in California or New York?
|
||||
|
||||
-- Answer:
|
||||
-- California had a median county population of 179,140.5 in 2010, almost double
|
||||
-- that of New York, at 91,301. Here are two solutions:
|
||||
-- California had a median county population estimate of 187,029 in 2019, almost double
|
||||
-- that of New York, at 86,687. Here are two solutions:
|
||||
|
||||
-- First, you can find the median for each state one at a time:
|
||||
|
||||
SELECT percentile_cont(.5)
|
||||
WITHIN GROUP (ORDER BY p0010001)
|
||||
FROM us_counties_2010
|
||||
WHERE state_us_abbreviation = 'NY';
|
||||
WITHIN GROUP (ORDER BY pop_est_2019)
|
||||
FROM us_counties_pop_est_2019
|
||||
WHERE state_name = 'New York';
|
||||
|
||||
SELECT percentile_cont(.5)
|
||||
WITHIN GROUP (ORDER BY p0010001)
|
||||
FROM us_counties_2010
|
||||
WHERE state_us_abbreviation = 'CA';
|
||||
WITHIN GROUP (ORDER BY pop_est_2019)
|
||||
FROM us_counties_pop_est_2019
|
||||
WHERE state_name = 'California';
|
||||
|
||||
-- Or both in one query (credit: https://github.com/Kennith-eng)
|
||||
|
||||
SELECT state_us_abbreviation,
|
||||
SELECT state_name,
|
||||
percentile_cont(0.5)
|
||||
WITHIN GROUP (ORDER BY p0010001) AS median
|
||||
FROM us_counties_2010
|
||||
WHERE state_us_abbreviation IN ('NY', 'CA')
|
||||
GROUP BY state_us_abbreviation;
|
||||
|
||||
WITHIN GROUP (ORDER BY pop_est_2019) AS median
|
||||
FROM us_counties_pop_est_2019
|
||||
WHERE state_name IN ('New York', 'California')
|
||||
GROUP BY state_name;
|
||||
|
||||
-- Finally, this query shows the median for each state:
|
||||
|
||||
SELECT state_us_abbreviation,
|
||||
SELECT state_name,
|
||||
percentile_cont(0.5)
|
||||
WITHIN GROUP (ORDER BY p0010001) AS median
|
||||
FROM us_counties_2010
|
||||
GROUP BY state_us_abbreviation;
|
||||
WITHIN GROUP (ORDER BY pop_est_2019) AS median
|
||||
FROM us_counties_pop_est_2019
|
||||
GROUP BY state_name;
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
--------------------------------------------------------------
|
||||
-- Chapter 6: Joining Tables in a Relational Database
|
||||
-- Chapter 7: Joining Tables in a Relational Database
|
||||
--------------------------------------------------------------
|
||||
|
||||
-- 1. The table us_counties_2010 contains 3,143 rows, and us_counties_2000 has
|
||||
@ -368,7 +386,7 @@ ON c2010.state_fips = c2000.state_fips
|
||||
ORDER BY pct_change ASC;
|
||||
|
||||
--------------------------------------------------------------
|
||||
-- Chapter 7: Table Design that Works for You
|
||||
-- Chapter 8: Table Design that Works for You
|
||||
--------------------------------------------------------------
|
||||
|
||||
-- Consider the following two tables from a database you’re making to keep
|
||||
@ -461,7 +479,7 @@ CREATE TABLE songs (
|
||||
|
||||
|
||||
----------------------------------------------------------------
|
||||
-- Chapter 8: Extracting Information by Grouping and Summarizing
|
||||
-- Chapter 9: Extracting Information by Grouping and Summarizing
|
||||
----------------------------------------------------------------
|
||||
|
||||
-- 1. We saw that library visits have declined in most places. But what is the
|
||||
@ -582,7 +600,7 @@ WHERE pls14.fscskey IS NULL OR pls09.fscskey IS NULL;
|
||||
-- that do not appear in both tables.
|
||||
|
||||
--------------------------------------------------------------
|
||||
-- Chapter 9: Inspecting and Modifying Data
|
||||
-- Chapter 10: Inspecting and Modifying Data
|
||||
--------------------------------------------------------------
|
||||
|
||||
-- In this exercise, you’ll turn the meat_poultry_egg_inspect table into useful
|
||||
@ -636,7 +654,7 @@ WHERE meat_processing = TRUE AND
|
||||
poultry_processing = TRUE;
|
||||
|
||||
--------------------------------------------------------------
|
||||
-- Chapter 10: Statistical Functions in SQL
|
||||
-- Chapter 11: Statistical Functions in SQL
|
||||
--------------------------------------------------------------
|
||||
|
||||
-- 1. In Listing 10-2, the correlation coefficient, or r value, of the
|
||||
@ -720,7 +738,7 @@ WHERE popu_lsa >= 250000;
|
||||
|
||||
|
||||
--------------------------------------------------------------
|
||||
-- Chapter 11: Working with Dates and Times
|
||||
-- Chapter 12: Working with Dates and Times
|
||||
--------------------------------------------------------------
|
||||
|
||||
-- 1. Using the New York City taxi data, calculate the length of each ride using
|
||||
@ -787,7 +805,7 @@ WHERE tpep_dropoff_datetime - tpep_pickup_datetime <= '3 hours'::interval;
|
||||
-- expect this given that cost of a taxi ride is based on both time and distance.
|
||||
|
||||
--------------------------------------------------------------
|
||||
-- Chapter 12: Advanced Query Techniques
|
||||
-- Chapter 13: Advanced Query Techniques
|
||||
--------------------------------------------------------------
|
||||
|
||||
-- 1. Revise the code in Listing 12-15 to dig deeper into the nuances of
|
||||
@ -858,7 +876,7 @@ AS (flavor varchar(20),
|
||||
|
||||
|
||||
-------------------------------------------------------------
|
||||
-- Chapter 13: Mining Text to Find Meaningful Data
|
||||
-- Chapter 14: Mining Text to Find Meaningful Data
|
||||
--------------------------------------------------------------
|
||||
|
||||
-- 1. The style guide of a publishing company you're writing for wants you to
|
||||
@ -934,7 +952,7 @@ LIMIT 5;
|
||||
|
||||
|
||||
--------------------------------------------------------------
|
||||
-- Chapter 14: Analyzing Spatial Data with PostGIS
|
||||
-- Chapter 15: Analyzing Spatial Data with PostGIS
|
||||
--------------------------------------------------------------
|
||||
|
||||
-- 1. Earlier, you found which US county has the largest area. Now,
|
||||
@ -1004,7 +1022,14 @@ ORDER BY census.statefp10, census.name10;
|
||||
-- Can you spot it?
|
||||
|
||||
--------------------------------------------------------------
|
||||
-- Chapter 15: Saving Time with Views, Functions, and Triggers
|
||||
-- Chapter 16: Working with JSON Data
|
||||
--------------------------------------------------------------
|
||||
|
||||
-- To come ...
|
||||
|
||||
|
||||
--------------------------------------------------------------
|
||||
-- Chapter 17: Saving Time with Views, Functions, and Triggers
|
||||
--------------------------------------------------------------
|
||||
|
||||
-- 1. Create a view that displays the number of New York City taxi trips per
|
||||
@ -1085,14 +1110,14 @@ SELECT * FROM meat_poultry_egg_inspect
|
||||
WHERE company = 'testcompany';
|
||||
|
||||
--------------------------------------------------------------
|
||||
-- Chapter 16: Using PostgreSQL From the Command Line
|
||||
-- Chapter 18: Using PostgreSQL From the Command Line
|
||||
--------------------------------------------------------------
|
||||
|
||||
-- For this chapter, use psql to review any of the exercises in the book.
|
||||
|
||||
|
||||
--------------------------------------------------------------
|
||||
-- Chapter 17: Maintaining Your Database
|
||||
-- Chapter 19: Maintaining Your Database
|
||||
--------------------------------------------------------------
|
||||
|
||||
-- To back up the gis_analysis database, use the pg_dump utility at the command line:
|
||||
@ -1100,7 +1125,7 @@ WHERE company = 'testcompany';
|
||||
|
||||
|
||||
-----------------------------------------------------------------
|
||||
-- Chapter 18: Identifying and Telling the Story Behind Your Data
|
||||
-- Chapter 20: Identifying and Telling the Story Behind Your Data
|
||||
-----------------------------------------------------------------
|
||||
|
||||
-- This is a non-coding chapter.
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user