From 7e2bcf3cc78c85af58fb630440e960a008c64dec Mon Sep 17 00:00:00 2001
From: anthonydb <anthonymdebarros@gmail.com>
Date: Sun, 26 Jul 2020 15:40:46 -0400
Subject: [PATCH] Remove first edition code from Try it Yourself file

---
 Try_It_Yourself/Try_It_Yourself.sql | 827 +---------------------------
 1 file changed, 4 insertions(+), 823 deletions(-)

diff --git a/Try_It_Yourself/Try_It_Yourself.sql b/Try_It_Yourself/Try_It_Yourself.sql
index 0e4df83..b5bd9bd 100644
--- a/Try_It_Yourself/Try_It_Yourself.sql
+++ b/Try_It_Yourself/Try_It_Yourself.sql
@@ -5,7 +5,11 @@
 -- Try It Yourself Questions and Answers
 ----------------------------------------------------------------------------
 
+----------------------------------------------------------------------------
+-- Chapter 1: Setting Up Your Coding Environment
+----------------------------------------------------------------------------
 
+-- There are no Try It Yourself exercises in this chapter!
 
 ----------------------------------------------------------------------------
 -- Chapter 2: Creating Your First Database and Table
@@ -305,827 +309,4 @@ GROUP BY state_name;
 
 
 
---------------------------------------------------------------
--- Chapter 7: Joining Tables in a Relational Database
---------------------------------------------------------------
 
--- 1. The table us_counties_2010 contains 3,143 rows, and us_counties_2000 has
--- 3,141. That reflects the ongoing adjustments to county-level geographies that
--- typically result from government decision making. Using appropriate joins and
--- the NULL value, identify which counties don't exist in both tables. For fun,
--- search online to  nd out why they’re missing
-
--- Answers:
-
--- Counties that exist in 2010 data but not 2000 include five county equivalents
--- in Alaska (called boroughs) plus Broomfield County, Colorado.
-
-SELECT c2010.geo_name,
-       c2010.state_us_abbreviation,
-       c2000.geo_name
-FROM us_counties_2010 c2010 LEFT JOIN us_counties_2000 c2000
-ON c2010.state_fips = c2000.state_fips
-   AND c2010.county_fips = c2000.county_fips
-WHERE c2000.geo_name IS NULL;
-
--- Counties that exist in 2000 data but not 2010 include three county
--- equivalents in Alaska (called boroughs) plus Clifton Forge city, Virginia,
--- which gave up its independent city status in 2001:
-
-SELECT c2010.geo_name,
-       c2000.geo_name,
-       c2000.state_us_abbreviation
-FROM us_counties_2010 c2010 RIGHT JOIN us_counties_2000 c2000
-ON c2010.state_fips = c2000.state_fips
-   AND c2010.county_fips = c2000.county_fips
-WHERE c2010.geo_name IS NULL;
-
--- 2. Using either the median() or percentile_cont() functions in Chapter 5,
--- determine the median of the percent change in county population.
-
--- Answer: 3.2%
-
--- Using median():
-
-SELECT median(round( (CAST(c2010.p0010001 AS numeric(8,1)) - c2000.p0010001)
-           / c2000.p0010001 * 100, 1 )) AS median_pct_change
-FROM us_counties_2010 c2010 INNER JOIN us_counties_2000 c2000
-ON c2010.state_fips = c2000.state_fips
-   AND c2010.county_fips = c2000.county_fips;
-
--- Using percentile_cont():
-
-SELECT percentile_cont(.5)
-       WITHIN GROUP (ORDER BY round( (CAST(c2010.p0010001 AS numeric(8,1)) - c2000.p0010001)
-           / c2000.p0010001 * 100, 1 )) AS percentile_50th
-FROM us_counties_2010 c2010 INNER JOIN us_counties_2000 c2000
-ON c2010.state_fips = c2000.state_fips
-   AND c2010.county_fips = c2000.county_fips;
-
--- Note: In both examples, you're finding the median of all the
--- county population percent change values.
-
-
--- 3. Which county had the greatest percentage loss of population between 2000
--- and 2010? Do you have any idea why? Hint: a weather event happened in 2005.
-
--- Answer: St. Bernard Parish, La. It and other Louisiana parishes (the county
--- equivalent name in Louisiana) experienced substantial population loss
--- following Hurricane Katrina in 2005.
-
-SELECT c2010.geo_name,
-       c2010.state_us_abbreviation,
-       c2010.p0010001 AS pop_2010,
-       c2000.p0010001 AS pop_2000,
-       c2010.p0010001 - c2000.p0010001 AS raw_change,
-       round( (CAST(c2010.p0010001 AS DECIMAL(8,1)) - c2000.p0010001)
-           / c2000.p0010001 * 100, 1 ) AS pct_change
-FROM us_counties_2010 c2010 INNER JOIN us_counties_2000 c2000
-ON c2010.state_fips = c2000.state_fips
-   AND c2010.county_fips = c2000.county_fips
-ORDER BY pct_change ASC;
-
---------------------------------------------------------------
--- Chapter 8: Table Design that Works for You
---------------------------------------------------------------
-
--- Consider the following two tables from a database you’re making to keep
--- track of your vinyl LP collection. Start by reviewing these CREATE TABLE
--- statements.
-
--- The albums table includes information specific to the overall collection
--- of songs on the disc. The songs table catalogs each track on the album.
--- Each song has a title and its own artist column, because each song might.
--- feature its own collection of artists.
-
-CREATE TABLE albums (
-    album_id bigserial,
-    album_catalog_code varchar(100),
-    album_title text,
-    album_artist text,
-    album_time interval,
-    album_release_date date,
-    album_genre varchar(40),
-    album_description text
-);
-
-CREATE TABLE songs (
-    song_id bigserial,
-    song_title text,
-    song_artist text,
-    album_id bigint
-);
-
--- Use the tables to answer these questions:
-
--- 1. Modify these CREATE TABLE statements to include primary and foreign keys
--- plus additional constraints on both tables. Explain why you made your
--- choices.
-
-CREATE TABLE albums (
-    album_id bigserial,
-    album_catalog_code varchar(100) NOT NULL,
-    album_title text NOT NULL,
-    album_artist text NOT NULL,
-    album_release_date date,
-    album_genre varchar(40),
-    album_description text,
-    CONSTRAINT album_id_key PRIMARY KEY (album_id),
-    CONSTRAINT release_date_check CHECK (album_release_date > '1/1/1925')
-);
-
-CREATE TABLE songs (
-    song_id bigserial,
-    song_title text NOT NULL,
-    song_artist text NOT NULL,
-    album_id bigint REFERENCES albums (album_id),
-    CONSTRAINT song_id_key PRIMARY KEY (song_id)
-);
-
--- Answers:
--- a) Both tables get a primary key using surrogate key id values that are
--- auto-generated via serial data types.
-
--- b) The songs table references albums via a foreign key constraint.
-
--- c) In both tables, the title and artist columns cannot be empty, which
--- is specified via a NOT NULL constraint. We assume that every album and
--- song should at minimum have that information.
-
--- d) In albums, the album_release_date column has a CHECK constraint
--- because it would be likely impossible for us to own an LP made before 1925.
-
-
--- 2. Instead of using album_id as a surrogate key for your primary key, are
--- there any columns in albums that could be useful as a natural key? What would
--- you have to know to decide?
-
--- Answer:
--- We could consider the album_catalog_code. We would have to answer yes to
--- these questions:
--- - Is it going to be unique across all albums released by all companies?
--- - Will we always have one?
-
-
--- 3. To speed up queries, which columns are good candidates for indexes?
-
--- Answer:
--- Primary key columns get indexes by default, but we should add an index
--- to the album_id foreign key column in the songs table because we'll use
--- it in table joins. It's likely that we'll query these tables to search
--- by titles and artists, so those columns in both tables should get indexes
--- too. The album_release_date in albums also is a candidate if we expect
--- to perform many queries that include date ranges.
-
-
-----------------------------------------------------------------
--- Chapter 9: Extracting Information by Grouping and Summarizing
-----------------------------------------------------------------
-
--- 1. We saw that library visits have declined in most places. But what is the
--- pattern in the use of technology in libraries? Both the 2014 and 2009 library
--- survey tables contain the columns gpterms (the number of internet-connected
--- computers used by the public) and pitusr (uses of public internet computers
--- per year). Modify the code in Listing 8-13 to calculate the percent change in
--- the sum of each column over time. Watch out for negative values!
-
--- Answer:
--- Use sum() on gpterms (computer terminals) by state, find percent change, and
--- then sort.
-
-SELECT pls14.stabr,
-       sum(pls14.gpterms) AS gpterms_2014,
-       sum(pls09.gpterms) AS gpterms_2009,
-       round( (CAST(sum(pls14.gpterms) AS decimal(10,1)) - sum(pls09.gpterms)) /
-                    sum(pls09.gpterms) * 100, 2 ) AS pct_change
-FROM pls_fy2014_pupld14a pls14 JOIN pls_fy2009_pupld09a pls09
-ON pls14.fscskey = pls09.fscskey
-WHERE pls14.gpterms >= 0 AND pls09.gpterms >= 0
-GROUP BY pls14.stabr
-ORDER BY pct_change DESC;
-
--- The query results show a consistent increase in the number of internet
--- computers used by the public in most states.
-
--- Use sum() on pitusr (uses of public internet computers per year) by state,
--- add percent change, and sort.
-
-SELECT pls14.stabr,
-       sum(pls14.pitusr) AS pitusr_2014,
-       sum(pls09.pitusr) AS pitusr_2009,
-       round( (CAST(sum(pls14.pitusr) AS decimal(10,1)) - sum(pls09.pitusr)) /
-                    sum(pls09.pitusr) * 100, 2 ) AS pct_change
-FROM pls_fy2014_pupld14a pls14 JOIN pls_fy2009_pupld09a pls09
-ON pls14.fscskey = pls09.fscskey
-WHERE pls14.pitusr >= 0 AND pls09.pitusr >= 0
-GROUP BY pls14.stabr
-ORDER BY pct_change DESC;
-
--- The query results show most states have seen a decrease in the total uses
--- of public internet computers per year.
-
--- 2. Both library survey tables contain a column called obereg, a two-digit
--- Bureau of Economic Analysis Code that classifies each library agency
--- according to a region of the United States, such as New England, Rocky
--- Mountains, and so on. Just as we calculated the percent change in visits
--- grouped by state, do the same to group percent changes in visits by US
--- regions using obereg. Consult the survey documentation to find the meaning
--- of each region code. For a bonus challenge, create a table with the obereg
--- code as the primary key and the region name as text, and join it to the
--- summary query to group by the region name rather than the code.
-
--- Answer:
-
--- a) sum() visits by region.
-
-SELECT pls14.obereg,
-       sum(pls14.visits) AS visits_2014,
-       sum(pls09.visits) AS visits_2009,
-       round( (CAST(sum(pls14.visits) AS decimal(10,1)) - sum(pls09.visits)) /
-                    sum(pls09.visits) * 100, 2 ) AS pct_change
-FROM pls_fy2014_pupld14a pls14 JOIN pls_fy2009_pupld09a pls09
-ON pls14.fscskey = pls09.fscskey
-WHERE pls14.visits >= 0 AND pls09.visits >= 0
-GROUP BY pls14.obereg
-ORDER BY pct_change DESC;
-
--- b) Bonus: creating the regions lookup table and adding it to the query.
-
-CREATE TABLE obereg_codes (
-    obereg varchar(2) CONSTRAINT obereg_key PRIMARY KEY,
-    region varchar(50)
-);
-
-INSERT INTO obereg_codes
-VALUES ('01', 'New England (CT ME MA NH RI VT)'),
-       ('02', 'Mid East (DE DC MD NJ NY PA)'),
-       ('03', 'Great Lakes (IL IN MI OH WI)'),
-       ('04', 'Plains (IA KS MN MO NE ND SD)'),
-       ('05', 'Southeast (AL AR FL GA KY LA MS NC SC TN VA WV)'),
-       ('06', 'Soutwest (AZ NM OK TX)'),
-       ('07', 'Rocky Mountains (CO ID MT UT WY)'),
-       ('08', 'Far West (AK CA HI NV OR WA)'),
-       ('09', 'Outlying Areas (AS GU MP PR VI)');
-
--- sum() visits by region.
-
-SELECT obereg_codes.region,
-       sum(pls14.visits) AS visits_2014,
-       sum(pls09.visits) AS visits_2009,
-       round( (CAST(sum(pls14.visits) AS decimal(10,1)) - sum(pls09.visits)) /
-                    sum(pls09.visits) * 100, 2 ) AS pct_change
-FROM pls_fy2014_pupld14a pls14 JOIN pls_fy2009_pupld09a pls09
-   ON pls14.fscskey = pls09.fscskey
-JOIN obereg_codes
-   ON pls14.obereg = obereg_codes.obereg
-WHERE pls14.visits >= 0 AND pls09.visits >= 0
-GROUP BY obereg_codes.region
-ORDER BY pct_change DESC;
-
-
--- 3. Thinking back to the types of joins you learned in Chapter 6, which join
--- type will show you all the rows in both tables, including those without a
--- match? Write such a query and add an IS NULL filter in a WHERE clause to
--- show agencies not included in one or the other table.
-
--- Answer: a FULL OUTER JOIN will show all rows in both tables.
-
-SELECT pls14.libname, pls14.city, pls14.stabr, pls14.statstru, pls14.c_admin, pls14.branlib,
-       pls09.libname, pls09.city, pls09.stabr, pls09.statstru, pls09.c_admin, pls09.branlib
-FROM pls_fy2014_pupld14a pls14 FULL OUTER JOIN pls_fy2009_pupld09a pls09
-ON pls14.fscskey = pls09.fscskey
-WHERE pls14.fscskey IS NULL OR pls09.fscskey IS NULL;
-
--- Note: The IS NULL statements in the WHERE clause limit results to those
--- that do not appear in both tables.
-
---------------------------------------------------------------
--- Chapter 10: Inspecting and Modifying Data
---------------------------------------------------------------
-
--- In this exercise, you’ll turn the meat_poultry_egg_inspect table into useful
--- information. You needed to answer two questions: How many of the companies
--- in the table process meat, and how many process poultry?
-
--- Create two new columns called meat_processing and poultry_processing. Each
--- can be of the type boolean.
-
--- Using UPDATE, set meat_processing = TRUE on any row where the activities
--- column contains the text 'Meat Processing'. Do the same update on the
--- poultry_processing column, but this time lookup for the text
--- 'Poultry Processing' in activities.
-
--- Use the data from the new, updated columns to count how many companies
--- perform each type of activity. For a bonus challenge, count how many
--- companies perform both activities.
-
--- Answer:
--- a) Add the columns
-
-ALTER TABLE meat_poultry_egg_inspect ADD COLUMN meat_processing boolean;
-ALTER TABLE meat_poultry_egg_inspect ADD COLUMN poultry_processing boolean;
-
-SELECT * FROM meat_poultry_egg_inspect; -- view table with new empty columns
-
--- b) Update the columns
-
-UPDATE meat_poultry_egg_inspect
-SET meat_processing = TRUE
-WHERE activities ILIKE '%meat processing%'; -- case-insensitive match with wildcards
-
-UPDATE meat_poultry_egg_inspect
-SET poultry_processing = TRUE
-WHERE activities ILIKE '%poultry processing%'; -- case-insensitive match with wildcards
-
--- c) view the updated table
-
-SELECT * FROM meat_poultry_egg_inspect;
-
--- d) Count meat and poultry processors
-
-SELECT count(meat_processing), count(poultry_processing)
-FROM meat_poultry_egg_inspect;
-
--- e) Count those who do both
-
-SELECT count(*)
-FROM meat_poultry_egg_inspect
-WHERE meat_processing = TRUE AND
-      poultry_processing = TRUE;
-
---------------------------------------------------------------
--- Chapter 11: Statistical Functions in SQL
---------------------------------------------------------------
-
--- 1. In Listing 10-2, the correlation coefficient, or r value, of the
--- variables pct_bachelors_higher and median_hh_income was about .68.
--- Write a query to show the correlation between pct_masters_higher and
--- median_hh_income. Is the r value higher or lower? What might explain
--- the difference?
-
--- Answer:
--- The r value of pct_bachelors_higher and median_hh_income is about .57, which
--- shows a lower connection between percent master's degree or higher and
--- income than percent bachelor's degree or higher and income. One possible
--- explanation is that attaining a master's degree or higher may have a more
--- incremental impact on earnings than attaining a bachelor's degree.
-
-SELECT
-    round(
-      corr(median_hh_income, pct_bachelors_higher)::numeric, 2
-      ) AS bachelors_income_r,
-    round(
-      corr(median_hh_income, pct_masters_higher)::numeric, 2
-      ) AS masters_income_r
-FROM acs_2011_2015_stats;
-
-
--- 2. In the FBI crime data, Which cities with a population of 500,000 or
--- more have the highest rates of motor vehicle thefts (column
--- motor_vehicle_theft)? Which have the highest violent crime rates
--- (column violent_crime)?
-
--- Answer:
--- a) In 2015, Milwaukee and Albuquerque had the two highest rates of motor
--- vehicle theft:
-
-SELECT
-    city,
-    st,
-    population,
-    motor_vehicle_theft,
-    round(
-        (motor_vehicle_theft::numeric / population) * 100000, 1
-        ) AS vehicle_theft_per_100000
-FROM fbi_crime_data_2015
-WHERE population >= 500000
-ORDER BY vehicle_theft_per_100000 DESC;
-
--- b) In 2015, Detroit and Memphis had the two highest rates of violent crime.
-
-SELECT
-    city,
-    st,
-    population,
-    violent_crime,
-    round(
-        (violent_crime::numeric / population) * 100000, 1
-        ) AS violent_crime_per_100000
-FROM fbi_crime_data_2015
-WHERE population >= 500000
-ORDER BY violent_crime_per_100000 DESC;
-
--- 3. As a bonus challenge, revisit the libraries data in the table
--- pls_fy2014_pupld14a in Chapter 8. Rank library agencies based on the rate
--- of visits per 1,000 population (variable popu_lsa), and limit the query to
--- agencies serving 250,000 people or more.
-
--- Answer:
--- Cuyahoga County Public Library tops the rankings with 12,963 visits per
--- thousand people (or roughly 13 visits per person).
-
-SELECT
-    libname,
-    stabr,
-    visits,
-    popu_lsa,
-    round(
-        (visits::numeric / popu_lsa) * 1000, 1
-        ) AS visits_per_1000,
-    rank() OVER (ORDER BY (visits::numeric / popu_lsa) * 1000 DESC)
-FROM pls_fy2014_pupld14a
-WHERE popu_lsa >= 250000;
-
-
---------------------------------------------------------------
--- Chapter 12: Working with Dates and Times
---------------------------------------------------------------
-
--- 1. Using the New York City taxi data, calculate the length of each ride using
--- the pickup and drop-off timestamps. Sort the query results from the longest
--- ride to the shortest. Do you notice anything about the longest or shortest
--- trips that you might want to ask city officials about?
-
--- Answer: More than 480 of the trips last more than 10 hours, which seems
--- excessive. Moreover, two records have drop-off times before the pickup time,
--- and several have pickup and drop-off times that are the same. It's worth
--- asking whether these records have timestamp errors.
-
-SELECT
-    trip_id,
-    tpep_pickup_datetime,
-    tpep_dropoff_datetime,
-    tpep_dropoff_datetime - tpep_pickup_datetime AS length_of_ride
-FROM nyc_yellow_taxi_trips_2016_06_01
-ORDER BY length_of_ride DESC;
-
--- 2. Using the AT TIME ZONE keywords, write a query that displays the date and
--- time for London, Johannesburg, Moscow, and Melbourne the moment January 1,
--- 2100, arrives in New York City.
-
--- Answer:
-
-SELECT '2100-01-01 00:00:00-05' AT TIME ZONE 'US/Eastern' AS new_york,
-       '2100-01-01 00:00:00-05' AT TIME ZONE 'Europe/London' AS london,
-       '2100-01-01 00:00:00-05' AT TIME ZONE 'Africa/Johannesburg' AS johannesburg,
-       '2100-01-01 00:00:00-05' AT TIME ZONE 'Europe/Moscow' AS moscow,
-       '2100-01-01 00:00:00-05' AT TIME ZONE 'Australia/Melbourne' AS melbourne;
-
--- 3. As a bonus challenge, use the statistics functions in Chapter 10 to
--- calculate the correlation coefficient and r-squared values using trip time
--- and the total_amount column in the New York City taxi data, which represents
--- total amount charged to passengers. Do the same with trip_distance and
--- total_amount. Limit the query to rides that last three hours or less.
-
--- Answer:
-
-SELECT
-    round(
-          corr(total_amount, (
-              date_part('epoch', tpep_dropoff_datetime) -
-              date_part('epoch', tpep_pickup_datetime)
-                ))::numeric, 2
-          ) AS amount_time_corr,
-    round(
-        regr_r2(total_amount, (
-              date_part('epoch', tpep_dropoff_datetime) -
-              date_part('epoch', tpep_pickup_datetime)
-        ))::numeric, 2
-    ) AS amount_time_r2,
-    round(
-          corr(total_amount, trip_distance)::numeric, 2
-          ) AS amount_distance_corr,
-    round(
-        regr_r2(total_amount, trip_distance)::numeric, 2
-    ) AS amount_distance_r2
-FROM nyc_yellow_taxi_trips_2016_06_01
-WHERE tpep_dropoff_datetime - tpep_pickup_datetime <= '3 hours'::interval;
-
--- Note: Both correlations are strong, with r values of 0.80 or higher. We'd
--- expect this given that cost of a taxi ride is based on both time and distance.
-
---------------------------------------------------------------
--- Chapter 13: Advanced Query Techniques
---------------------------------------------------------------
-
--- 1. Revise the code in Listing 12-15 to dig deeper into the nuances of
--- Waikiki’s high temperatures. Limit the temps_collapsed table to the Waikiki
--- maximum daily temperature observations. Then use the WHEN clauses in the
--- CASE statement to reclassify the temperatures into seven groups that would
--- result in the following text output:
-
--- '90 or more'
--- '88-89'
--- '86-87'
--- '84-85'
--- '82-83'
--- '80-81'
--- '79 or less'
-
--- In which of those groups does Waikiki’s daily maximum temperature fall most
--- often?
-
--- Answer: Between 86 and 87 degrees. Nice.
-
-WITH temps_collapsed (station_name, max_temperature_group) AS
-    (SELECT station_name,
-           CASE WHEN max_temp >= 90 THEN '90 or more'
-                WHEN max_temp BETWEEN 88 AND 89 THEN '88-89'
-                WHEN max_temp BETWEEN 86 AND 87 THEN '86-87'
-                WHEN max_temp BETWEEN 84 AND 85 THEN '84-85'
-                WHEN max_temp BETWEEN 82 AND 83 THEN '82-83'
-                WHEN max_temp BETWEEN 80 AND 81 THEN '80-81'
-                WHEN max_temp <= 79 THEN '79 or less'
-            END
-    FROM temperature_readings
-    WHERE station_name = 'WAIKIKI 717.2 HI US')
-
-SELECT station_name, max_temperature_group, count(*)
-FROM temps_collapsed
-GROUP BY station_name, max_temperature_group
-ORDER BY max_temperature_group;
-
--- 2. Revise the ice cream survey crosstab in Listing 12-11 to flip the table.
--- In other words, make flavor the rows and office the columns. Which elements
--- of the query do you need to change? Are the counts different?
-
--- Answer: You need to re-order the columns in the first subquery so flavor is
--- first and office is second. count(*) stays third. Then, you must change
--- the second subquery to produce a grouped list of office. Finally, you must
--- add the office names to the output list.
-
--- The numbers don't change, just the order presented in the crosstab.
-
-SELECT *
-FROM crosstab('SELECT flavor,
-                      office,
-                      count(*)
-               FROM ice_cream_survey
-               GROUP BY flavor, office
-               ORDER BY flavor',
-
-              'SELECT office
-               FROM ice_cream_survey
-               GROUP BY office
-               ORDER BY office')
-
-AS (flavor varchar(20),
-    downtown bigint,
-    midtown bigint,
-    uptown bigint);
-
-
--------------------------------------------------------------
--- Chapter 14: Mining Text to Find Meaningful Data
---------------------------------------------------------------
-
--- 1. The style guide of a publishing company you're writing for wants you to
--- avoid commas before suffixes in names. But there are several names like
--- Alvarez, Jr. and Williams, Sr. in your author database. Which functions can
--- you use to remove the comma? Would a regular expression function help?
--- How would you capture just the suffixes to place them into a separate column?
-
--- Answer: You can use either the standard SQL replace() function or the
--- PostgreSQL regexp_replace() function:
-
-SELECT replace('Williams, Sr.', ', ', ' ');
-SELECT regexp_replace('Williams, Sr.', ', ', ' ');
-
--- Answer: To capture just the suffixes, search for characters after a comma
--- and space and place those inside a match group:
-
-SELECT (regexp_match('Williams, Sr.', '.*, (.*)'))[1];
-
-
--- 2. Using any one of the State of the Union addresses, count the number of
--- unique words that are five characters or more. Hint: you can use
--- regexp_split_to_table() in a subquery to create a table of words to count.
--- Bonus: remove commas and periods at the end of each word.
-
--- Answer:
-
-WITH
-    word_list (word)
-AS
-    (
-        SELECT regexp_split_to_table(speech_text, '\s') AS word
-        FROM president_speeches
-        WHERE speech_date = '1974-01-30'
-    )
-
-SELECT lower(
-               replace(replace(replace(word, ',', ''), '.', ''), ':', '')
-             ) AS cleaned_word,
-       count(*)
-FROM word_list
-WHERE length(word) >= 5
-GROUP BY cleaned_word
-ORDER BY count(*) DESC;
-
--- Note: This query uses a Common Table Expression to first separate each word
--- in the text into a separate row in a table named word_list. Then the SELECT
--- statement counts the words, which are cleaned up with two operations. First,
--- several nested replace functions remove commas, periods, and colons. Second,
--- all words are converted to lowercase so that when we count we group words
--- that may appear with various cases (e.g., "Military" and "military").
-
-
--- 3. Rewrite the query in Listing 13-25 using the ts_rank_cd() function
--- instead of ts_rank(). According to th PostgreSQL documentation, ts_rank_cd()
--- computes cover density, which takes into account how close the lexeme search
--- terms are to each other. Does using the ts_rank_cd() function significantly
--- change the results?
-
--- Answer:
--- The ranking does change, although the same speeches are generally
--- represented. The change might be more or less pronounced given another set
--- of texts.
-
-SELECT president,
-       speech_date,
-       ts_rank_cd(search_speech_text, search_query, 2) AS rank_score
-FROM president_speeches,
-     to_tsquery('war & security & threat & enemy') search_query
-WHERE search_speech_text @@ search_query
-ORDER BY rank_score DESC
-LIMIT 5;
-
-
---------------------------------------------------------------
--- Chapter 15: Analyzing Spatial Data with PostGIS
---------------------------------------------------------------
-
--- 1. Earlier, you found which US county has the largest area. Now,
--- aggregate the county data to find the area of each state in square
--- miles. (Use the statefp10 column in the us_counties_2010_shp table.)
--- How many states are bigger than the Yukon-Koyukuk area?
-
--- Answer: Just three states are bigger than Yukon-Koyukuk: Of course,
--- one is Alaska itself (FIPS 02). The other two are Texas (FIPS 48),
--- and California (FIPS 06).
-
-SELECT statefp10 AS st,
-       round (
-              ( sum(ST_Area(geom::geography) / 2589988.110336))::numeric, 2
-             ) AS square_miles
-FROM us_counties_2010_shp
-GROUP BY statefp10
-ORDER BY square_miles DESC;
-
--- 2. Using ST_Distance(), determine how many miles separate these two farmers’
--- markets: the Oakleaf Greenmarket (9700 Argyle Forest Blvd, Jacksonville,
--- Florida) and Columbia Farmers Market (1701 West Ash Street, Columbia,
--- Missouri). You’ll need to first find the coordinates for both in the
--- farmers_markets table.
--- Tip: you can also write this query using the Common Table Expression syntax
--- you learned in Chapter 12.
-
--- Answer: About 851 miles.
-
-WITH
-    market_start (geog_point) AS
-    (
-     SELECT geog_point
-     FROM farmers_markets
-     WHERE market_name = 'The Oakleaf Greenmarket'
-    ),
-    market_end (geog_point) AS
-    (
-     SELECT geog_point
-     FROM farmers_markets
-     WHERE market_name = 'Columbia Farmers Market'
-    )
-SELECT ST_Distance(market_start.geog_point, market_end.geog_point) / 1609.344 -- convert to meters to miles
-FROM market_start, market_end;
-
--- 3. More than 500 rows in the farmers_markets table are missing a value
--- in the county column, an example of dirty government data. Using the
--- us_counties_2010_shp table and the ST_Intersects() function, perform a
--- spatial join to find the missing county names based on the longitude and
--- latitude of each market. Because geog_point in farmers_markets is of the
--- geography type and its SRID is 4326, you’ll need to cast geom in the Census
--- table to the geography type and change its SRID using ST_SetSRID().
-
--- Answer:
-
-SELECT census.name10,
-       census.statefp10,
-       markets.market_name,
-       markets.county,
-       markets.st
-FROM farmers_markets markets JOIN us_counties_2010_shp census
-    ON ST_Intersects(markets.geog_point, ST_SetSRID(census.geom,4326)::geography)
-    WHERE markets.county IS NULL
-ORDER BY census.statefp10, census.name10;
-
--- Note that this query also highlights a farmer's market that is mis-geocoded.
--- Can you spot it?
-
---------------------------------------------------------------
--- Chapter 16: Working with JSON Data
---------------------------------------------------------------
-
--- To come ...
-
-
---------------------------------------------------------------
--- Chapter 17: Saving Time with Views, Functions, and Triggers
---------------------------------------------------------------
-
--- 1. Create a view that displays the number of New York City taxi trips per
--- hour. Use the taxi data in Chapter 11 and the query in Listing 11-8.
-
--- Answer:
-
-CREATE VIEW nyc_taxi_trips_per_hour AS
-    SELECT
-         date_part('hour', tpep_pickup_datetime),
-         count(date_part('hour', tpep_pickup_datetime))
-    FROM nyc_yellow_taxi_trips_2016_06_01
-    GROUP BY date_part('hour', tpep_pickup_datetime)
-    ORDER BY date_part('hour', tpep_pickup_datetime);
-
-SELECT * FROM nyc_taxi_trips_per_hour;
-
--- 2. In Chapter 10, you learned how to calculate rates per thousand. Turn that
--- formula into a rates_per_thousand() function that takes three arguments
--- to calculate the result: observed_number, base_number, and decimal_places.
-
--- Answer: This uses PL/pgSQL, but you could use a SQL function as well.
-
-CREATE OR REPLACE FUNCTION
-rate_per_thousand(observed_number numeric,
-                  base_number numeric,
-                  decimal_places integer DEFAULT 1)
-RETURNS numeric(10,2) AS $$
-BEGIN
-    RETURN
-        round(
-        (observed_number / base_number) * 1000, decimal_places
-        );
-END;
-$$ LANGUAGE plpgsql;
-
--- Test the function:
-
-SELECT rate_per_thousand(50, 11000, 2);
-
--- 3. In Chapter 9, you worked with the meat_poultry_egg_inspect table that
--- listed food processing facilities. Write a trigger that automatically adds an
--- inspection date each time you insert a new facility into the table. Use the
--- inspection_date column added in Listing 9-19, and set the date to be six
--- months from the current date. You should be able to describe the steps needed
--- to implement a trigger and how the steps relate to each other.
-
--- Answer:
--- a) Add the column
-
-ALTER TABLE meat_poultry_egg_inspect ADD COLUMN inspection_date date;
-
--- b) Create the function that the trigger will execute.
-
-CREATE OR REPLACE FUNCTION add_inspection_date()
-    RETURNS trigger AS $$
-    BEGIN
-       UPDATE meat_poultry_egg_inspect
-       SET inspection_date = now() + '6 months'::interval; -- Here, we set the inspection date to six months in the future
-    RETURN NEW;
-    END;
-$$ LANGUAGE plpgsql;
-
--- c) Create the trigger
-
-CREATE TRIGGER inspection_date_update
-  AFTER INSERT
-  ON meat_poultry_egg_inspect
-  FOR EACH ROW
-  EXECUTE PROCEDURE add_inspection_date();
-
--- d) Test the insertion of a company and examine the result
-
-INSERT INTO meat_poultry_egg_inspect(est_number, company)
-VALUES ('test123', 'testcompany');
-
-SELECT * FROM meat_poultry_egg_inspect
-WHERE company = 'testcompany';
-
---------------------------------------------------------------
--- Chapter 18: Using PostgreSQL From the Command Line
---------------------------------------------------------------
-
--- For this chapter, use psql to review any of the exercises in the book.
-
-
---------------------------------------------------------------
--- Chapter 19: Maintaining Your Database
---------------------------------------------------------------
-
--- To back up the gis_analysis database, use the pg_dump utility at the command line:
--- pg_dump -d gis_analysis -U [your-username] -Fc > gis_analysis_backup_custom.sql
-
-
------------------------------------------------------------------
--- Chapter 20: Identifying and Telling the Story Behind Your Data
------------------------------------------------------------------
-
--- This is a non-coding chapter.