From 204d3cc55d29c860b3625cb933adb2b94d475f7c Mon Sep 17 00:00:00 2001 From: Anthony DeBarros Date: Wed, 26 Feb 2020 21:43:12 -0500 Subject: [PATCH] More renumbering --- Chapter_02/Chapter_01.sql | 34 ---- Chapter_02/README.md | 2 +- Chapter_03/Chapter_02.sql | 104 ----------- Chapter_03/README.md | 2 +- Chapter_04/Chapter_03.sql | 90 ---------- Chapter_04/README.md | 2 +- Chapter_05/Chapter_04.sql | 212 ---------------------- Chapter_05/README.md | 2 +- Chapter_06/Chapter_05.sql | 207 ---------------------- Chapter_06/README.md | 2 +- Chapter_07/Chapter_06.sql | 196 -------------------- Chapter_07/README.md | 2 +- Chapter_08/Chapter_07.sql | 184 ------------------- Chapter_08/README.md | 2 +- Chapter_09/Chapter_08.sql | 299 ------------------------------- Chapter_09/README.md | 2 +- Chapter_10/Chapter_09.sql | 259 --------------------------- Chapter_10/README.md | 4 +- Chapter_11/Chapter_10.sql | 167 ------------------ Chapter_11/README.md | 2 +- Chapter_12/Chapter_11.sql | 233 ------------------------ Chapter_12/README.md | 2 +- Chapter_13/Chapter_12.sql | 310 -------------------------------- Chapter_13/README.md | 2 +- Chapter_14/Chapter_13.sql | 363 -------------------------------------- Chapter_14/README.md | 2 +- Chapter_15/Chapter_14.sql | 252 -------------------------- Chapter_15/README.md | 2 +- Chapter_17/Chapter_15.sql | 321 --------------------------------- Chapter_17/README.md | 2 +- Chapter_18/README.md | 2 +- Chapter_19/Chapter_17.sql | 104 ----------- Chapter_19/README.md | 2 +- 33 files changed, 18 insertions(+), 3353 deletions(-) delete mode 100644 Chapter_02/Chapter_01.sql delete mode 100644 Chapter_03/Chapter_02.sql delete mode 100644 Chapter_04/Chapter_03.sql delete mode 100644 Chapter_05/Chapter_04.sql delete mode 100644 Chapter_06/Chapter_05.sql delete mode 100644 Chapter_07/Chapter_06.sql delete mode 100644 Chapter_08/Chapter_07.sql delete mode 100644 Chapter_09/Chapter_08.sql delete mode 100644 Chapter_10/Chapter_09.sql delete mode 100644 Chapter_11/Chapter_10.sql delete mode 100644 Chapter_12/Chapter_11.sql delete mode 100644 Chapter_13/Chapter_12.sql delete mode 100644 Chapter_14/Chapter_13.sql delete mode 100644 Chapter_15/Chapter_14.sql delete mode 100644 Chapter_17/Chapter_15.sql delete mode 100644 Chapter_19/Chapter_17.sql diff --git a/Chapter_02/Chapter_01.sql b/Chapter_02/Chapter_01.sql deleted file mode 100644 index d5494f3..0000000 --- a/Chapter_02/Chapter_01.sql +++ /dev/null @@ -1,34 +0,0 @@ --------------------------------------------------------------- --- Practical SQL: A Beginner's Guide to Storytelling with Data --- by Anthony DeBarros - --- Chapter 1 Code Examples --------------------------------------------------------------- - --- Listing 1-1: Creating a database named analysis - -CREATE DATABASE analysis; - --- Listing 1-2: Creating a table named teachers with six columns - -CREATE TABLE teachers ( - id bigserial, - first_name varchar(25), - last_name varchar(50), - school varchar(50), - hire_date date, - salary numeric -); - --- This command will remove (drop) the table. --- DROP TABLE teachers; - --- Listing 1-3 Inserting data into the teachers table - -INSERT INTO teachers (first_name, last_name, school, hire_date, salary) -VALUES ('Janet', 'Smith', 'F.D. Roosevelt HS', '2011-10-30', 36200), - ('Lee', 'Reynolds', 'F.D. Roosevelt HS', '1993-05-22', 65000), - ('Samuel', 'Cole', 'Myers Middle School', '2005-08-01', 43500), - ('Samantha', 'Bush', 'Myers Middle School', '2011-10-30', 36200), - ('Betty', 'Diaz', 'Myers Middle School', '2005-08-30', 43500), - ('Kathleen', 'Roush', 'F.D. Roosevelt HS', '2010-10-22', 38500); diff --git a/Chapter_02/README.md b/Chapter_02/README.md index a561f18..0f34195 100644 --- a/Chapter_02/README.md +++ b/Chapter_02/README.md @@ -2,7 +2,7 @@ [PracticalSQL](https://www.nostarch.com/practicalSQL) by Anthony DeBarros is [available from No Starch Press](https://www.nostarch.com/practicalSQL). -### Chapter 1: Creating Your First Database and Table +### Chapter 2: Creating Your First Database and Table Introduces PostgreSQL, the pgAdmin user interface, and the code for loading a simple data set about teachers into a new database. diff --git a/Chapter_03/Chapter_02.sql b/Chapter_03/Chapter_02.sql deleted file mode 100644 index ea76448..0000000 --- a/Chapter_03/Chapter_02.sql +++ /dev/null @@ -1,104 +0,0 @@ --------------------------------------------------------------- --- Practical SQL: A Beginner's Guide to Storytelling with Data --- by Anthony DeBarros - --- Chapter 2 Code Examples --------------------------------------------------------------- - --- Listing 2-1: Querying all rows and columns from the teachers table - -SELECT * FROM teachers; - --- Listing 2-2: Querying a subset of columns - -SELECT last_name, first_name, salary FROM teachers; - --- Listing 2-3: Querying distinct values in the school column - -SELECT DISTINCT school -FROM teachers; - --- Listing 2-4: Querying distinct pairs of values in the school and salary --- columns - -SELECT DISTINCT school, salary -FROM teachers; - --- Listing 2-5: Sorting a column with ORDER BY - -SELECT first_name, last_name, salary -FROM teachers -ORDER BY salary DESC; - --- Listing 2-6: Sorting multiple columns with ORDER BY - -SELECT last_name, school, hire_date -FROM teachers -ORDER BY school ASC, hire_date DESC; - --- Listing 2-7: Filtering rows using WHERE - -SELECT last_name, school, hire_date -FROM teachers -WHERE school = 'Myers Middle School'; - --- Examples of WHERE comparison operators - --- Teachers with first name of Janet -SELECT first_name, last_name, school -FROM teachers -WHERE first_name = 'Janet'; - --- School names not equal to F.D. Roosevelt HS -SELECT school -FROM teachers -WHERE school != 'F.D. Roosevelt HS'; - --- Teachers hired before Jan. 1, 2000 -SELECT first_name, last_name, hire_date -FROM teachers -WHERE hire_date < '2000-01-01'; - --- Teachers earning 43,500 or more -SELECT first_name, last_name, salary -FROM teachers -WHERE salary >= 43500; - --- Teachers who earn between $40,000 and $65,000 -SELECT first_name, last_name, school, salary -FROM teachers -WHERE salary BETWEEN 40000 AND 65000; - --- Listing 2-8: Filtering with LIKE AND ILIKE - -SELECT first_name -FROM teachers -WHERE first_name LIKE 'sam%'; - -SELECT first_name -FROM teachers -WHERE first_name ILIKE 'sam%'; - --- Listing 2-9: Combining operators using AND and OR - -SELECT * -FROM teachers -WHERE school = 'Myers Middle School' - AND salary < 40000; - -SELECT * -FROM teachers -WHERE last_name = 'Cole' - OR last_name = 'Bush'; - -SELECT * -FROM teachers -WHERE school = 'F.D. Roosevelt HS' - AND (salary < 38000 OR salary > 40000); - --- Listing 2-10: A SELECT statement including WHERE and ORDER BY - -SELECT first_name, last_name, school, hire_date, salary -FROM teachers -WHERE school LIKE '%Roos%' -ORDER BY hire_date DESC; diff --git a/Chapter_03/README.md b/Chapter_03/README.md index bcac356..b03fb1f 100644 --- a/Chapter_03/README.md +++ b/Chapter_03/README.md @@ -2,7 +2,7 @@ [PracticalSQL](https://www.nostarch.com/practicalSQL) by Anthony DeBarros is [available from No Starch Press](https://www.nostarch.com/practicalSQL). -### Chapter 2: Beginning Data Exploration with `SELECT` +### Chapter 3: Beginning Data Exploration with `SELECT` Explores basic SQL query syntax, including how to sort and filter data. diff --git a/Chapter_04/Chapter_03.sql b/Chapter_04/Chapter_03.sql deleted file mode 100644 index 716669c..0000000 --- a/Chapter_04/Chapter_03.sql +++ /dev/null @@ -1,90 +0,0 @@ --------------------------------------------------------------- --- Practical SQL: A Beginner's Guide to Storytelling with Data --- by Anthony DeBarros - --- Chapter 3 Code Examples --------------------------------------------------------------- - --- Listing 3-1: Character data types in action - -CREATE TABLE char_data_types ( - varchar_column varchar(10), - char_column char(10), - text_column text -); - -INSERT INTO char_data_types -VALUES - ('abc', 'abc', 'abc'), - ('defghi', 'defghi', 'defghi'); - -COPY char_data_types TO 'C:\YourDirectory\typetest.txt' -WITH (FORMAT CSV, HEADER, DELIMITER '|'); - - --- Listing 3-2: Number data types in action - -CREATE TABLE number_data_types ( - numeric_column numeric(20,5), - real_column real, - double_column double precision -); - -INSERT INTO number_data_types -VALUES - (.7, .7, .7), - (2.13579, 2.13579, 2.13579), - (2.1357987654, 2.1357987654, 2.1357987654); - -SELECT * FROM number_data_types; - --- Listing 3-3: Rounding issues with float columns --- Assumes table created and loaded with Listing 3-2 - -SELECT - numeric_column * 10000000 AS "Fixed", - real_column * 10000000 AS "Float" -FROM number_data_types -WHERE numeric_column = .7; - --- Listing 3-4: Timestamp and interval types in action - -CREATE TABLE date_time_types ( - timestamp_column timestamp with time zone, - interval_column interval -); - -INSERT INTO date_time_types -VALUES - ('2018-12-31 01:00 EST','2 days'), - ('2018-12-31 01:00 PST','1 month'), - ('2018-12-31 01:00 Australia/Melbourne','1 century'), - (now(),'1 week'); - -SELECT * FROM date_time_types; - --- Listing 3-5: Using the interval data type --- Assumes script 3-4 has been run - -SELECT - timestamp_column, - interval_column, - timestamp_column - interval_column AS new_date -FROM date_time_types; - --- Listing 3-6: Three CAST() examples - -SELECT timestamp_column, CAST(timestamp_column AS varchar(10)) -FROM date_time_types; - -SELECT numeric_column, - CAST(numeric_column AS integer), - CAST(numeric_column AS varchar(6)) -FROM number_data_types; - --- Does not work: -SELECT CAST(char_column AS integer) FROM char_data_types; - --- Alternate notation for CAST is the double-colon: -SELECT timestamp_column::varchar(10) -FROM date_time_types; diff --git a/Chapter_04/README.md b/Chapter_04/README.md index 6d9338b..fb3d9b5 100644 --- a/Chapter_04/README.md +++ b/Chapter_04/README.md @@ -2,7 +2,7 @@ [PracticalSQL](https://www.nostarch.com/practicalSQL) by Anthony DeBarros is [available from No Starch Press](https://www.nostarch.com/practicalSQL). -### Chapter 3: Understanding Data Types +### Chapter 4: Understanding Data Types Explains the definitions for setting columns in a table to hold specific types of data, from text to dates to various forms of numbers. diff --git a/Chapter_05/Chapter_04.sql b/Chapter_05/Chapter_04.sql deleted file mode 100644 index b34888c..0000000 --- a/Chapter_05/Chapter_04.sql +++ /dev/null @@ -1,212 +0,0 @@ --------------------------------------------------------------- --- Practical SQL: A Beginner's Guide to Storytelling with Data --- by Anthony DeBarros - --- Chapter 4 Code Examples --------------------------------------------------------------- - --- Listing 4-1: Using COPY for data import --- This is example syntax only; running it will produce an error - -COPY table_name -FROM 'C:\YourDirectory\your_file.csv' -WITH (FORMAT CSV, HEADER); - - --- Listing 4-2: A CREATE TABLE statement for Census county data --- Full data dictionary available at: http://www.census.gov/prod/cen2010/doc/pl94-171.pdf --- Note: Some columns have been given more descriptive names - -CREATE TABLE us_counties_2010 ( - geo_name varchar(90), -- Name of the geography - state_us_abbreviation varchar(2), -- State/U.S. abbreviation - summary_level varchar(3), -- Summary Level - region smallint, -- Region - division smallint, -- Division - state_fips varchar(2), -- State FIPS code - county_fips varchar(3), -- County code - area_land bigint, -- Area (Land) in square meters - area_water bigint, -- Area (Water) in square meters - population_count_100_percent integer, -- Population count (100%) - housing_unit_count_100_percent integer, -- Housing Unit count (100%) - internal_point_lat numeric(10,7), -- Internal point (latitude) - internal_point_lon numeric(10,7), -- Internal point (longitude) - - -- This section is referred to as P1. Race: - p0010001 integer, -- Total population - p0010002 integer, -- Population of one race: - p0010003 integer, -- White Alone - p0010004 integer, -- Black or African American alone - p0010005 integer, -- American Indian and Alaska Native alone - p0010006 integer, -- Asian alone - p0010007 integer, -- Native Hawaiian and Other Pacific Islander alone - p0010008 integer, -- Some Other Race alone - p0010009 integer, -- Population of two or more races - p0010010 integer, -- Population of two races: - p0010011 integer, -- White; Black or African American - p0010012 integer, -- White; American Indian and Alaska Native - p0010013 integer, -- White; Asian - p0010014 integer, -- White; Native Hawaiian and Other Pacific Islander - p0010015 integer, -- White; Some Other Race - p0010016 integer, -- Black or African American; American Indian and Alaska Native - p0010017 integer, -- Black or African American; Asian - p0010018 integer, -- Black or African American; Native Hawaiian and Other Pacific Islander - p0010019 integer, -- Black or African American; Some Other Race - p0010020 integer, -- American Indian and Alaska Native; Asian - p0010021 integer, -- American Indian and Alaska Native; Native Hawaiian and Other Pacific Islander - p0010022 integer, -- American Indian and Alaska Native; Some Other Race - p0010023 integer, -- Asian; Native Hawaiian and Other Pacific Islander - p0010024 integer, -- Asian; Some Other Race - p0010025 integer, -- Native Hawaiian and Other Pacific Islander; Some Other Race - p0010026 integer, -- Population of three races - p0010047 integer, -- Population of four races - p0010063 integer, -- Population of five races - p0010070 integer, -- Population of six races - - -- This section is referred to as P2. HISPANIC OR LATINO, AND NOT HISPANIC OR LATINO BY RACE - p0020001 integer, -- Total - p0020002 integer, -- Hispanic or Latino - p0020003 integer, -- Not Hispanic or Latino: - p0020004 integer, -- Population of one race: - p0020005 integer, -- White Alone - p0020006 integer, -- Black or African American alone - p0020007 integer, -- American Indian and Alaska Native alone - p0020008 integer, -- Asian alone - p0020009 integer, -- Native Hawaiian and Other Pacific Islander alone - p0020010 integer, -- Some Other Race alone - p0020011 integer, -- Two or More Races - p0020012 integer, -- Population of two races - p0020028 integer, -- Population of three races - p0020049 integer, -- Population of four races - p0020065 integer, -- Population of five races - p0020072 integer, -- Population of six races - - -- This section is referred to as P3. RACE FOR THE POPULATION 18 YEARS AND OVER - p0030001 integer, -- Total - p0030002 integer, -- Population of one race: - p0030003 integer, -- White alone - p0030004 integer, -- Black or African American alone - p0030005 integer, -- American Indian and Alaska Native alone - p0030006 integer, -- Asian alone - p0030007 integer, -- Native Hawaiian and Other Pacific Islander alone - p0030008 integer, -- Some Other Race alone - p0030009 integer, -- Two or More Races - p0030010 integer, -- Population of two races - p0030026 integer, -- Population of three races - p0030047 integer, -- Population of four races - p0030063 integer, -- Population of five races - p0030070 integer, -- Population of six races - - -- This section is referred to as P4. HISPANIC OR LATINO, AND NOT HISPANIC OR LATINO BY RACE - -- FOR THE POPULATION 18 YEARS AND OVER - p0040001 integer, -- Total - p0040002 integer, -- Hispanic or Latino - p0040003 integer, -- Not Hispanic or Latino: - p0040004 integer, -- Population of one race: - p0040005 integer, -- White alone - p0040006 integer, -- Black or African American alone - p0040007 integer, -- American Indian and Alaska Native alone - p0040008 integer, -- Asian alone - p0040009 integer, -- Native Hawaiian and Other Pacific Islander alone - p0040010 integer, -- Some Other Race alone - p0040011 integer, -- Two or More Races - p0040012 integer, -- Population of two races - p0040028 integer, -- Population of three races - p0040049 integer, -- Population of four races - p0040065 integer, -- Population of five races - p0040072 integer, -- Population of six races - - -- This section is referred to as H1. OCCUPANCY STATUS - h0010001 integer, -- Total housing units - h0010002 integer, -- Occupied - h0010003 integer -- Vacant -); - -SELECT * FROM us_counties_2010; - --- Listing 4-3: Importing Census data using COPY --- Note! If you run into an import error here, be sure you downloaded the code and --- data for the book according to the steps listed on page xxvii in the Introduction. --- Windows users: Please check the Note on page xxvii as well. - -COPY us_counties_2010 -FROM 'C:\YourDirectory\us_counties_2010.csv' -WITH (FORMAT CSV, HEADER); - --- Checking the data - -SELECT * FROM us_counties_2010; - -SELECT geo_name, state_us_abbreviation, area_land -FROM us_counties_2010 -ORDER BY area_land DESC -LIMIT 3; - -SELECT geo_name, state_us_abbreviation, internal_point_lon -FROM us_counties_2010 -ORDER BY internal_point_lon DESC -LIMIT 5; - - --- Listing 4-4: Creating a table to track supervisor salaries - -CREATE TABLE supervisor_salaries ( - town varchar(30), - county varchar(30), - supervisor varchar(30), - start_date date, - salary money, - benefits money -); - --- Listing 4-5: Importing salaries data from CSV to three table columns - -COPY supervisor_salaries (town, supervisor, salary) -FROM 'C:\YourDirectory\supervisor_salaries.csv' -WITH (FORMAT CSV, HEADER); - --- Check the data -SELECT * FROM supervisor_salaries LIMIT 2; - --- Listing 4-6 Use a temporary table to add a default value to a column during --- import - -DELETE FROM supervisor_salaries; - -CREATE TEMPORARY TABLE supervisor_salaries_temp (LIKE supervisor_salaries); - -COPY supervisor_salaries_temp (town, supervisor, salary) -FROM 'C:\YourDirectory\supervisor_salaries.csv' -WITH (FORMAT CSV, HEADER); - -INSERT INTO supervisor_salaries (town, county, supervisor, salary) -SELECT town, 'Some County', supervisor, salary -FROM supervisor_salaries_temp; - -DROP TABLE supervisor_salaries_temp; - --- Check the data -SELECT * FROM supervisor_salaries LIMIT 2; - --- Listing 4-7: Export an entire table with COPY - -COPY us_counties_2010 -TO 'C:\YourDirectory\us_counties_export.txt' -WITH (FORMAT CSV, HEADER, DELIMITER '|'); - - --- Listing 4-8: Exporting selected columns from a table with COPY - -COPY us_counties_2010 (geo_name, internal_point_lat, internal_point_lon) -TO 'C:\YourDirectory\us_counties_latlon_export.txt' -WITH (FORMAT CSV, HEADER, DELIMITER '|'); - --- Listing 4-9: Exporting query results with COPY - -COPY ( - SELECT geo_name, state_us_abbreviation - FROM us_counties_2010 - WHERE geo_name ILIKE '%mill%' - ) -TO 'C:\YourDirectory\us_counties_mill_export.txt' -WITH (FORMAT CSV, HEADER, DELIMITER '|'); diff --git a/Chapter_05/README.md b/Chapter_05/README.md index 7b7ad7c..99df829 100644 --- a/Chapter_05/README.md +++ b/Chapter_05/README.md @@ -2,6 +2,6 @@ [PracticalSQL](https://www.nostarch.com/practicalSQL) by Anthony DeBarros is [available from No Starch Press](https://www.nostarch.com/practicalSQL). -### Chapter 4: Importing and Exporting Data +### Chapter 5: Importing and Exporting Data Explains how to use SQL commands to load data from external files and then export it. You’ll load a table of U.S. Census population data that you’ll use throughout the book. diff --git a/Chapter_06/Chapter_05.sql b/Chapter_06/Chapter_05.sql deleted file mode 100644 index dacc76f..0000000 --- a/Chapter_06/Chapter_05.sql +++ /dev/null @@ -1,207 +0,0 @@ --------------------------------------------------------------- --- Practical SQL: A Beginner's Guide to Storytelling with Data --- by Anthony DeBarros - --- Chapter 5 Code Examples --------------------------------------------------------------- - --- Listing 5-1: Basic addition, subtraction and multiplication with SQL - -SELECT 2 + 2; -- addition -SELECT 9 - 1; -- subtraction -SELECT 3 * 4; -- multiplication - --- Listing 5-2: Integer and decimal division with SQL - -SELECT 11 / 6; -- integer division -SELECT 11 % 6; -- modulo division -SELECT 11.0 / 6; -- decimal division -SELECT CAST(11 AS numeric(3,1)) / 6; - --- Listing 5-3: Exponents, roots and factorials with SQL - -SELECT 3 ^ 4; -- exponentiation -SELECT |/ 10; -- square root (operator) -SELECT sqrt(10); -- square root (function) -SELECT ||/ 10; -- cube root -SELECT 4 !; -- factorial - --- Order of operations - -SELECT 7 + 8 * 9; -- answer: 79 -SELECT (7 + 8) * 9; -- answer: 135 - -SELECT 3 ^ 3 - 1; -- answer: 26 -SELECT 3 ^ (3 - 1); -- answer: 9 - --- Listing 5-4: Selecting Census population columns by race with aliases - -SELECT geo_name, - state_us_abbreviation AS "st", - p0010001 AS "Total Population", - p0010003 AS "White Alone", - p0010004 AS "Black or African American Alone", - p0010005 AS "Am Indian/Alaska Native Alone", - p0010006 AS "Asian Alone", - p0010007 AS "Native Hawaiian and Other Pacific Islander Alone", - p0010008 AS "Some Other Race Alone", - p0010009 AS "Two or More Races" -FROM us_counties_2010; - --- Listing 5-5: Adding two columns in us_counties_2010 - -SELECT geo_name, - state_us_abbreviation AS "st", - p0010003 AS "White Alone", - p0010004 AS "Black Alone", - p0010003 + p0010004 AS "Total White and Black" -FROM us_counties_2010; - --- Listing 5-6: Checking Census data totals - -SELECT geo_name, - state_us_abbreviation AS "st", - p0010001 AS "Total", - p0010003 + p0010004 + p0010005 + p0010006 + p0010007 - + p0010008 + p0010009 AS "All Races", - (p0010003 + p0010004 + p0010005 + p0010006 + p0010007 - + p0010008 + p0010009) - p0010001 AS "Difference" -FROM us_counties_2010 -ORDER BY "Difference" DESC; - --- Listing 5-7: Calculating the percent of the population that is --- Asian by county (percent of the whole) - -SELECT geo_name, - state_us_abbreviation AS "st", - (CAST(p0010006 AS numeric(8,1)) / p0010001) * 100 AS "pct_asian" -FROM us_counties_2010 -ORDER BY "pct_asian" DESC; - --- Listing 5-8: Calculating percent change - -CREATE TABLE percent_change ( - department varchar(20), - spend_2014 numeric(10,2), - spend_2017 numeric(10,2) -); - -INSERT INTO percent_change -VALUES - ('Building', 250000, 289000), - ('Assessor', 178556, 179500), - ('Library', 87777, 90001), - ('Clerk', 451980, 650000), - ('Police', 250000, 223000), - ('Recreation', 199000, 195000); - -SELECT department, - spend_2014, - spend_2017, - round( (spend_2017 - spend_2014) / - spend_2014 * 100, 1 ) AS "pct_change" -FROM percent_change; - --- Listing 5-9: Using sum() and avg() aggregate functions - -SELECT sum(p0010001) AS "County Sum", - round(avg(p0010001), 0) AS "County Average" -FROM us_counties_2010; - --- Listing 5-10: Testing SQL percentile functions - -CREATE TABLE percentile_test ( - numbers integer -); - -INSERT INTO percentile_test (numbers) VALUES - (1), (2), (3), (4), (5), (6); - -SELECT - percentile_cont(.5) - WITHIN GROUP (ORDER BY numbers), - percentile_disc(.5) - WITHIN GROUP (ORDER BY numbers) -FROM percentile_test; - --- Listing 5-11: Using sum(), avg(), and percentile_cont() aggregate functions - -SELECT sum(p0010001) AS "County Sum", - round(avg(p0010001), 0) AS "County Average", - percentile_cont(.5) - WITHIN GROUP (ORDER BY p0010001) AS "County Median" -FROM us_counties_2010; - --- Listing 5-12: Passing an array of values to percentile_cont() - --- quartiles -SELECT percentile_cont(array[.25,.5,.75]) - WITHIN GROUP (ORDER BY p0010001) AS "quartiles" -FROM us_counties_2010; - --- Extra: --- quintiles -SELECT percentile_cont(array[.2,.4,.6,.8]) - WITHIN GROUP (ORDER BY p0010001) AS "quintiles" -FROM us_counties_2010; - --- deciles -SELECT percentile_cont(array[.1,.2,.3,.4,.5,.6,.7,.8,.9]) - WITHIN GROUP (ORDER BY p0010001) AS "deciles" -FROM us_counties_2010; - --- Listing 5-13: Using unnest() to turn an array into rows - -SELECT unnest( - percentile_cont(array[.25,.5,.75]) - WITHIN GROUP (ORDER BY p0010001) - ) AS "quartiles" -FROM us_counties_2010; - --- Listing 5-14: Creating a median() aggregate function in PostgreSQL --- Source: https://wiki.postgresql.org/wiki/Aggregate_Median - -CREATE OR REPLACE FUNCTION _final_median(anyarray) - RETURNS float8 AS -$$ - WITH q AS - ( - SELECT val - FROM unnest($1) val - WHERE VAL IS NOT NULL - ORDER BY 1 - ), - cnt AS - ( - SELECT COUNT(*) AS c FROM q - ) - SELECT AVG(val)::float8 - FROM - ( - SELECT val FROM q - LIMIT 2 - MOD((SELECT c FROM cnt), 2) - OFFSET GREATEST(CEIL((SELECT c FROM cnt) / 2.0) - 1,0) - ) q2; -$$ -LANGUAGE sql IMMUTABLE; - -CREATE AGGREGATE median(anyelement) ( - SFUNC=array_append, - STYPE=anyarray, - FINALFUNC=_final_median, - INITCOND='{}' -); - --- Listing 5-15: Using a median() aggregate function - -SELECT sum(p0010001) AS "County Sum", - round(avg(p0010001), 0) AS "County Average", - median(p0010001) AS "County Median", - percentile_cont(.5) - WITHIN GROUP (ORDER BY P0010001) AS "50th Percentile" -FROM us_counties_2010; - --- Listing 5-16: Finding the most-frequent value with mode() - -SELECT mode() WITHIN GROUP (ORDER BY p0010001) -FROM us_counties_2010; diff --git a/Chapter_06/README.md b/Chapter_06/README.md index 106b336..955bb4a 100644 --- a/Chapter_06/README.md +++ b/Chapter_06/README.md @@ -2,7 +2,7 @@ [PracticalSQL](https://www.nostarch.com/practicalSQL) by Anthony DeBarros is [available from No Starch Press](https://www.nostarch.com/practicalSQL). -### Chapter 5: Basic Math and Stats with SQL +### Chapter 6: Basic Math and Stats with SQL Covers arithmetic operations and introduces aggregate functions for finding sums, averages, and medians. diff --git a/Chapter_07/Chapter_06.sql b/Chapter_07/Chapter_06.sql deleted file mode 100644 index b8130da..0000000 --- a/Chapter_07/Chapter_06.sql +++ /dev/null @@ -1,196 +0,0 @@ --------------------------------------------------------------- --- Practical SQL: A Beginner's Guide to Storytelling with Data --- by Anthony DeBarros - --- Chapter 6 Code Examples --------------------------------------------------------------- - --- Listing 6-1: Creating the departments and employees tables - -CREATE TABLE departments ( - dept_id bigserial, - dept varchar(100), - city varchar(100), - CONSTRAINT dept_key PRIMARY KEY (dept_id), - CONSTRAINT dept_city_unique UNIQUE (dept, city) -); - -CREATE TABLE employees ( - emp_id bigserial, - first_name varchar(100), - last_name varchar(100), - salary integer, - dept_id integer REFERENCES departments (dept_id), - CONSTRAINT emp_key PRIMARY KEY (emp_id), - CONSTRAINT emp_dept_unique UNIQUE (emp_id, dept_id) -); - -INSERT INTO departments (dept, city) -VALUES - ('Tax', 'Atlanta'), - ('IT', 'Boston'); - -INSERT INTO employees (first_name, last_name, salary, dept_id) -VALUES - ('Nancy', 'Jones', 62500, 1), - ('Lee', 'Smith', 59300, 1), - ('Soo', 'Nguyen', 83000, 2), - ('Janet', 'King', 95000, 2); - --- Listing 6-2: Joining the employees and departments tables - -SELECT * -FROM employees JOIN departments -ON employees.dept_id = departments.dept_id; - --- Listing 6-3: Creating two tables to explore JOIN types - -CREATE TABLE schools_left ( - id integer CONSTRAINT left_id_key PRIMARY KEY, - left_school varchar(30) -); - -CREATE TABLE schools_right ( - id integer CONSTRAINT right_id_key PRIMARY KEY, - right_school varchar(30) -); - -INSERT INTO schools_left (id, left_school) VALUES - (1, 'Oak Street School'), - (2, 'Roosevelt High School'), - (5, 'Washington Middle School'), - (6, 'Jefferson High School'); - -INSERT INTO schools_right (id, right_school) VALUES - (1, 'Oak Street School'), - (2, 'Roosevelt High School'), - (3, 'Morrison Elementary'), - (4, 'Chase Magnet Academy'), - (6, 'Jefferson High School'); - --- Listing 6-4: Using JOIN - -SELECT * -FROM schools_left JOIN schools_right -ON schools_left.id = schools_right.id; - --- Bonus: Also can be specified as INNER JOIN - -SELECT * -FROM schools_left INNER JOIN schools_right -ON schools_left.id = schools_right.id; - --- Listing 6-5: Using LEFT JOIN - -SELECT * -FROM schools_left LEFT JOIN schools_right -ON schools_left.id = schools_right.id; - --- Listing 6-6: Using RIGHT JOIN - -SELECT * -FROM schools_left RIGHT JOIN schools_right -ON schools_left.id = schools_right.id; - --- Listing 6-7: Using FULL OUTER JOIN - -SELECT * -FROM schools_left FULL OUTER JOIN schools_right -ON schools_left.id = schools_right.id; - --- Listing 6-8: Using CROSS JOIN - -SELECT * -FROM schools_left CROSS JOIN schools_right; - --- Listing 6-9: Filtering to show missing values with IS NULL - -SELECT * -FROM schools_left LEFT JOIN schools_right -ON schools_left.id = schools_right.id -WHERE schools_right.id IS NULL; - --- Listing 6-10: Querying specific columns in a join -SELECT schools_left.id, - schools_left.left_school, - schools_right.right_school -FROM schools_left LEFT JOIN schools_right -ON schools_left.id = schools_right.id; - --- Listing 6-11: Simplifying code with table aliases -SELECT lt.id, - lt.left_school, - rt.right_school -FROM schools_left AS lt LEFT JOIN schools_right AS rt -ON lt.id = rt.id; - --- Listing 6-12: Joining multiple tables -CREATE TABLE schools_enrollment ( - id integer, - enrollment integer -); - -CREATE TABLE schools_grades ( - id integer, - grades varchar(10) -); - -INSERT INTO schools_enrollment (id, enrollment) -VALUES - (1, 360), - (2, 1001), - (5, 450), - (6, 927); - -INSERT INTO schools_grades (id, grades) -VALUES - (1, 'K-3'), - (2, '9-12'), - (5, '6-8'), - (6, '9-12'); - -SELECT lt.id, lt.left_school, en.enrollment, gr.grades -FROM schools_left AS lt LEFT JOIN schools_enrollment AS en - ON lt.id = en.id -LEFT JOIN schools_grades AS gr - ON lt.id = gr.id; - --- Listing 6-13: Performing math on joined Census tables --- Decennial Census 2000. Full data dictionary at https://www.census.gov/prod/cen2000/doc/pl94-171.pdf --- Note: Some non-number columns have been given more descriptive names - -CREATE TABLE us_counties_2000 ( - geo_name varchar(90), -- County/state name, - state_us_abbreviation varchar(2), -- State/U.S. abbreviation - state_fips varchar(2), -- State FIPS code - county_fips varchar(3), -- County code - p0010001 integer, -- Total population - p0010002 integer, -- Population of one race: - p0010003 integer, -- White Alone - p0010004 integer, -- Black or African American alone - p0010005 integer, -- American Indian and Alaska Native alone - p0010006 integer, -- Asian alone - p0010007 integer, -- Native Hawaiian and Other Pacific Islander alone - p0010008 integer, -- Some Other Race alone - p0010009 integer, -- Population of two or more races - p0010010 integer, -- Population of two races - p0020002 integer, -- Hispanic or Latino - p0020003 integer -- Not Hispanic or Latino: -); - -COPY us_counties_2000 -FROM 'C:\YourDirectory\us_counties_2000.csv' -WITH (FORMAT CSV, HEADER); - -SELECT c2010.geo_name, - c2010.state_us_abbreviation AS state, - c2010.p0010001 AS pop_2010, - c2000.p0010001 AS pop_2000, - c2010.p0010001 - c2000.p0010001 AS raw_change, - round( (CAST(c2010.p0010001 AS numeric(8,1)) - c2000.p0010001) - / c2000.p0010001 * 100, 1 ) AS pct_change -FROM us_counties_2010 c2010 INNER JOIN us_counties_2000 c2000 -ON c2010.state_fips = c2000.state_fips - AND c2010.county_fips = c2000.county_fips - AND c2010.p0010001 <> c2000.p0010001 -ORDER BY pct_change DESC; diff --git a/Chapter_07/README.md b/Chapter_07/README.md index 7aa8f33..2f2cbc4 100644 --- a/Chapter_07/README.md +++ b/Chapter_07/README.md @@ -2,7 +2,7 @@ [PracticalSQL](https://www.nostarch.com/practicalSQL) by Anthony DeBarros is [available from No Starch Press](https://www.nostarch.com/practicalSQL). -### Chapter 6: Joining Tables in a Relational Database +### Chapter 7: Joining Tables in a Relational Database Explains how to query multiple, related tables by joining them on key columns. You’ll learn how and when to use different types of joins. diff --git a/Chapter_08/Chapter_07.sql b/Chapter_08/Chapter_07.sql deleted file mode 100644 index 3cc6cac..0000000 --- a/Chapter_08/Chapter_07.sql +++ /dev/null @@ -1,184 +0,0 @@ --------------------------------------------------------------- --- Practical SQL: A Beginner's Guide to Storytelling with Data --- by Anthony DeBarros - --- Chapter 7 Code Examples --------------------------------------------------------------- - --- Listing 7-1: Declaring a single-column natural key as primary key - --- As a column constraint -CREATE TABLE natural_key_example ( - license_id varchar(10) CONSTRAINT license_key PRIMARY KEY, - first_name varchar(50), - last_name varchar(50) -); - --- Drop the table before trying again -DROP TABLE natural_key_example; - --- As a table constraint -CREATE TABLE natural_key_example ( - license_id varchar(10), - first_name varchar(50), - last_name varchar(50), - CONSTRAINT license_key PRIMARY KEY (license_id) -); - --- Listing 7-2: Example of a primary key violation -INSERT INTO natural_key_example (license_id, first_name, last_name) -VALUES ('T229901', 'Lynn', 'Malero'); - -INSERT INTO natural_key_example (license_id, first_name, last_name) -VALUES ('T229901', 'Sam', 'Tracy'); - --- Listing 7-3: Declaring a composite primary key as a natural key -CREATE TABLE natural_key_composite_example ( - student_id varchar(10), - school_day date, - present boolean, - CONSTRAINT student_key PRIMARY KEY (student_id, school_day) -); - --- Listing 7-4: Example of a composite primary key violation - -INSERT INTO natural_key_composite_example (student_id, school_day, present) -VALUES(775, '1/22/2017', 'Y'); - -INSERT INTO natural_key_composite_example (student_id, school_day, present) -VALUES(775, '1/23/2017', 'Y'); - -INSERT INTO natural_key_composite_example (student_id, school_day, present) -VALUES(775, '1/23/2017', 'N'); - --- Listing 7-5: Declaring a bigserial column as a surrogate key - -CREATE TABLE surrogate_key_example ( - order_number bigserial, - product_name varchar(50), - order_date date, - CONSTRAINT order_key PRIMARY KEY (order_number) -); - -INSERT INTO surrogate_key_example (product_name, order_date) -VALUES ('Beachball Polish', '2015-03-17'), - ('Wrinkle De-Atomizer', '2017-05-22'), - ('Flux Capacitor', '1985-10-26'); - -SELECT * FROM surrogate_key_example; - --- Listing 7-6: A foreign key example - -CREATE TABLE licenses ( - license_id varchar(10), - first_name varchar(50), - last_name varchar(50), - CONSTRAINT licenses_key PRIMARY KEY (license_id) -); - -CREATE TABLE registrations ( - registration_id varchar(10), - registration_date date, - license_id varchar(10) REFERENCES licenses (license_id), - CONSTRAINT registration_key PRIMARY KEY (registration_id, license_id) -); - -INSERT INTO licenses (license_id, first_name, last_name) -VALUES ('T229901', 'Lynn', 'Malero'); - -INSERT INTO registrations (registration_id, registration_date, license_id) -VALUES ('A203391', '3/17/2017', 'T229901'); - -INSERT INTO registrations (registration_id, registration_date, license_id) -VALUES ('A75772', '3/17/2017', 'T000001'); - --- Listing 7-7: CHECK constraint examples - -CREATE TABLE check_constraint_example ( - user_id bigserial, - user_role varchar(50), - salary integer, - CONSTRAINT user_id_key PRIMARY KEY (user_id), - CONSTRAINT check_role_in_list CHECK (user_role IN('Admin', 'Staff')), - CONSTRAINT check_salary_not_zero CHECK (salary > 0) -); - --- Both of these will fail: -INSERT INTO check_constraint_example (user_role) -VALUES ('admin'); - -INSERT INTO check_constraint_example (salary) -VALUES (0); - --- Listing 7-8: UNIQUE constraint example - -CREATE TABLE unique_constraint_example ( - contact_id bigserial CONSTRAINT contact_id_key PRIMARY KEY, - first_name varchar(50), - last_name varchar(50), - email varchar(200), - CONSTRAINT email_unique UNIQUE (email) -); - -INSERT INTO unique_constraint_example (first_name, last_name, email) -VALUES ('Samantha', 'Lee', 'slee@example.org'); - -INSERT INTO unique_constraint_example (first_name, last_name, email) -VALUES ('Betty', 'Diaz', 'bdiaz@example.org'); - -INSERT INTO unique_constraint_example (first_name, last_name, email) -VALUES ('Sasha', 'Lee', 'slee@example.org'); - --- Listing 7-9: NOT NULL constraint example - -CREATE TABLE not_null_example ( - student_id bigserial, - first_name varchar(50) NOT NULL, - last_name varchar(50) NOT NULL, - CONSTRAINT student_id_key PRIMARY KEY (student_id) -); - --- Listing 7-10: Dropping and adding a primary key and a NOT NULL constraint - --- Drop -ALTER TABLE not_null_example DROP CONSTRAINT student_id_key; - --- Add -ALTER TABLE not_null_example ADD CONSTRAINT student_id_key PRIMARY KEY (student_id); - --- Drop -ALTER TABLE not_null_example ALTER COLUMN first_name DROP NOT NULL; - --- Add -ALTER TABLE not_null_example ALTER COLUMN first_name SET NOT NULL; - --- Listing 7-11: Importing New York City address data - -CREATE TABLE new_york_addresses ( - longitude numeric(9,6), - latitude numeric(9,6), - street_number varchar(10), - street varchar(32), - unit varchar(7), - postcode varchar(5), - id integer CONSTRAINT new_york_key PRIMARY KEY -); - -COPY new_york_addresses -FROM 'C:\YourDirectory\city_of_new_york.csv' -WITH (FORMAT CSV, HEADER); - --- Listing 7-12: Benchmark queries for index performance - -EXPLAIN ANALYZE SELECT * FROM new_york_addresses -WHERE street = 'BROADWAY'; - -EXPLAIN ANALYZE SELECT * FROM new_york_addresses -WHERE street = '52 STREET'; - -EXPLAIN ANALYZE SELECT * FROM new_york_addresses -WHERE street = 'ZWICKY AVENUE'; - --- Listing 7-13: Creating a B-Tree index on the new_york_addresses table - -CREATE INDEX street_idx ON new_york_addresses (street); diff --git a/Chapter_08/README.md b/Chapter_08/README.md index e632a1f..8ebef92 100644 --- a/Chapter_08/README.md +++ b/Chapter_08/README.md @@ -2,7 +2,7 @@ [PracticalSQL](https://www.nostarch.com/practicalSQL) by Anthony DeBarros is [available from No Starch Press](https://www.nostarch.com/practicalSQL). -### Chapter 7: Table Design that Works for You +### Chapter 8: Table Design that Works for You Covers how to set up tables to improve the organization and integrity of your data as well as how to speed up queries using indexes. diff --git a/Chapter_09/Chapter_08.sql b/Chapter_09/Chapter_08.sql deleted file mode 100644 index e44dc35..0000000 --- a/Chapter_09/Chapter_08.sql +++ /dev/null @@ -1,299 +0,0 @@ --------------------------------------------------------------- --- Practical SQL: A Beginner's Guide to Storytelling with Data --- by Anthony DeBarros - --- Chapter 8 Code Examples --------------------------------------------------------------- - --- Listing 8-1: Creating and filling the 2014 Public Libraries Survey table - -CREATE TABLE pls_fy2014_pupld14a ( - stabr varchar(2) NOT NULL, - fscskey varchar(6) CONSTRAINT fscskey2014_key PRIMARY KEY, - libid varchar(20) NOT NULL, - libname varchar(100) NOT NULL, - obereg varchar(2) NOT NULL, - rstatus integer NOT NULL, - statstru varchar(2) NOT NULL, - statname varchar(2) NOT NULL, - stataddr varchar(2) NOT NULL, - longitud numeric(10,7) NOT NULL, - latitude numeric(10,7) NOT NULL, - fipsst varchar(2) NOT NULL, - fipsco varchar(3) NOT NULL, - address varchar(35) NOT NULL, - city varchar(20) NOT NULL, - zip varchar(5) NOT NULL, - zip4 varchar(4) NOT NULL, - cnty varchar(20) NOT NULL, - phone varchar(10) NOT NULL, - c_relatn varchar(2) NOT NULL, - c_legbas varchar(2) NOT NULL, - c_admin varchar(2) NOT NULL, - geocode varchar(3) NOT NULL, - lsabound varchar(1) NOT NULL, - startdat varchar(10), - enddate varchar(10), - popu_lsa integer NOT NULL, - centlib integer NOT NULL, - branlib integer NOT NULL, - bkmob integer NOT NULL, - master numeric(8,2) NOT NULL, - libraria numeric(8,2) NOT NULL, - totstaff numeric(8,2) NOT NULL, - locgvt integer NOT NULL, - stgvt integer NOT NULL, - fedgvt integer NOT NULL, - totincm integer NOT NULL, - salaries integer, - benefit integer, - staffexp integer, - prmatexp integer NOT NULL, - elmatexp integer NOT NULL, - totexpco integer NOT NULL, - totopexp integer NOT NULL, - lcap_rev integer NOT NULL, - scap_rev integer NOT NULL, - fcap_rev integer NOT NULL, - cap_rev integer NOT NULL, - capital integer NOT NULL, - bkvol integer NOT NULL, - ebook integer NOT NULL, - audio_ph integer NOT NULL, - audio_dl float NOT NULL, - video_ph integer NOT NULL, - video_dl float NOT NULL, - databases integer NOT NULL, - subscrip integer NOT NULL, - hrs_open integer NOT NULL, - visits integer NOT NULL, - referenc integer NOT NULL, - regbor integer NOT NULL, - totcir integer NOT NULL, - kidcircl integer NOT NULL, - elmatcir integer NOT NULL, - loanto integer NOT NULL, - loanfm integer NOT NULL, - totpro integer NOT NULL, - totatten integer NOT NULL, - gpterms integer NOT NULL, - pitusr integer NOT NULL, - wifisess integer NOT NULL, - yr_sub integer NOT NULL -); - -CREATE INDEX libname2014_idx ON pls_fy2014_pupld14a (libname); -CREATE INDEX stabr2014_idx ON pls_fy2014_pupld14a (stabr); -CREATE INDEX city2014_idx ON pls_fy2014_pupld14a (city); -CREATE INDEX visits2014_idx ON pls_fy2014_pupld14a (visits); - -COPY pls_fy2014_pupld14a -FROM 'C:\YourDirectory\pls_fy2014_pupld14a.csv' -WITH (FORMAT CSV, HEADER); - --- Listing 8-2: Creating and filling the 2009 Public Libraries Survey table - -CREATE TABLE pls_fy2009_pupld09a ( - stabr varchar(2) NOT NULL, - fscskey varchar(6) CONSTRAINT fscskey2009_key PRIMARY KEY, - libid varchar(20) NOT NULL, - libname varchar(100) NOT NULL, - address varchar(35) NOT NULL, - city varchar(20) NOT NULL, - zip varchar(5) NOT NULL, - zip4 varchar(4) NOT NULL, - cnty varchar(20) NOT NULL, - phone varchar(10) NOT NULL, - c_relatn varchar(2) NOT NULL, - c_legbas varchar(2) NOT NULL, - c_admin varchar(2) NOT NULL, - geocode varchar(3) NOT NULL, - lsabound varchar(1) NOT NULL, - startdat varchar(10), - enddate varchar(10), - popu_lsa integer NOT NULL, - centlib integer NOT NULL, - branlib integer NOT NULL, - bkmob integer NOT NULL, - master numeric(8,2) NOT NULL, - libraria numeric(8,2) NOT NULL, - totstaff numeric(8,2) NOT NULL, - locgvt integer NOT NULL, - stgvt integer NOT NULL, - fedgvt integer NOT NULL, - totincm integer NOT NULL, - salaries integer, - benefit integer, - staffexp integer, - prmatexp integer NOT NULL, - elmatexp integer NOT NULL, - totexpco integer NOT NULL, - totopexp integer NOT NULL, - lcap_rev integer NOT NULL, - scap_rev integer NOT NULL, - fcap_rev integer NOT NULL, - cap_rev integer NOT NULL, - capital integer NOT NULL, - bkvol integer NOT NULL, - ebook integer NOT NULL, - audio integer NOT NULL, - video integer NOT NULL, - databases integer NOT NULL, - subscrip integer NOT NULL, - hrs_open integer NOT NULL, - visits integer NOT NULL, - referenc integer NOT NULL, - regbor integer NOT NULL, - totcir integer NOT NULL, - kidcircl integer NOT NULL, - loanto integer NOT NULL, - loanfm integer NOT NULL, - totpro integer NOT NULL, - totatten integer NOT NULL, - gpterms integer NOT NULL, - pitusr integer NOT NULL, - yr_sub integer NOT NULL, - obereg varchar(2) NOT NULL, - rstatus integer NOT NULL, - statstru varchar(2) NOT NULL, - statname varchar(2) NOT NULL, - stataddr varchar(2) NOT NULL, - longitud numeric(10,7) NOT NULL, - latitude numeric(10,7) NOT NULL, - fipsst varchar(2) NOT NULL, - fipsco varchar(3) NOT NULL -); - -CREATE INDEX libname2009_idx ON pls_fy2009_pupld09a (libname); -CREATE INDEX stabr2009_idx ON pls_fy2009_pupld09a (stabr); -CREATE INDEX city2009_idx ON pls_fy2009_pupld09a (city); -CREATE INDEX visits2009_idx ON pls_fy2009_pupld09a (visits); - -COPY pls_fy2009_pupld09a -FROM 'C:\YourDirectory\pls_fy2009_pupld09a.csv' -WITH (FORMAT CSV, HEADER); - --- Listing 8-3: Using count() for table row counts - -SELECT count(*) -FROM pls_fy2014_pupld14a; - -SELECT count(*) -FROM pls_fy2009_pupld09a; - --- Listing 8-4: Using count() for the number of values in a column - -SELECT count(salaries) -FROM pls_fy2014_pupld14a; - --- Listing 8-5: Using count() for the number of distinct values in a column - -SELECT count(libname) -FROM pls_fy2014_pupld14a; - -SELECT count(DISTINCT libname) -FROM pls_fy2014_pupld14a; - --- Bonus: find duplicate libnames -SELECT libname, count(libname) -FROM pls_fy2014_pupld14a -GROUP BY libname -ORDER BY count(libname) DESC; - --- Bonus: see location of every Oxford Public Library -SELECT libname, city, stabr -FROM pls_fy2014_pupld14a -WHERE libname = 'OXFORD PUBLIC LIBRARY'; - --- Listing 8-6: Finding the most and fewest visits using max() and min() -SELECT max(visits), min(visits) -FROM pls_fy2014_pupld14a; - --- Listing 8-7: Using GROUP BY on the stabr column - --- There are 56 in 2014. -SELECT stabr -FROM pls_fy2014_pupld14a -GROUP BY stabr -ORDER BY stabr; - --- Bonus: there are 55 in 2009. -SELECT stabr -FROM pls_fy2009_pupld09a -GROUP BY stabr -ORDER BY stabr; - --- Listing 8-8: Using GROUP BY on the city and stabr columns - -SELECT city, stabr -FROM pls_fy2014_pupld14a -GROUP BY city, stabr -ORDER BY city, stabr; - --- Bonus: We can count some of the combos -SELECT city, stabr, count(*) -FROM pls_fy2014_pupld14a -GROUP BY city, stabr -ORDER BY count(*) DESC; - --- Listing 8-9: GROUP BY with count() on the stabr column - -SELECT stabr, count(*) -FROM pls_fy2014_pupld14a -GROUP BY stabr -ORDER BY count(*) DESC; - --- Listing 8-10: GROUP BY with count() on the stabr and stataddr columns - -SELECT stabr, stataddr, count(*) -FROM pls_fy2014_pupld14a -GROUP BY stabr, stataddr -ORDER BY stabr ASC, count(*) DESC; - --- Listing 8-11: Using the sum() aggregate function to total visits to --- libraries in 2014 and 2009 - --- 2014 -SELECT sum(visits) AS visits_2014 -FROM pls_fy2014_pupld14a -WHERE visits >= 0; - --- 2009 -SELECT sum(visits) AS visits_2009 -FROM pls_fy2009_pupld09a -WHERE visits >= 0; - --- Listing 8-12: Using sum() to total visits on joined 2014 and 2009 library tables - -SELECT sum(pls14.visits) AS visits_2014, - sum(pls09.visits) AS visits_2009 -FROM pls_fy2014_pupld14a pls14 JOIN pls_fy2009_pupld09a pls09 -ON pls14.fscskey = pls09.fscskey -WHERE pls14.visits >= 0 AND pls09.visits >= 0; - --- Listing 8-13: Using GROUP BY to track percent change in library visits by state - -SELECT pls14.stabr, - sum(pls14.visits) AS visits_2014, - sum(pls09.visits) AS visits_2009, - round( (CAST(sum(pls14.visits) AS decimal(10,1)) - sum(pls09.visits)) / - sum(pls09.visits) * 100, 2 ) AS pct_change -FROM pls_fy2014_pupld14a pls14 JOIN pls_fy2009_pupld09a pls09 -ON pls14.fscskey = pls09.fscskey -WHERE pls14.visits >= 0 AND pls09.visits >= 0 -GROUP BY pls14.stabr -ORDER BY pct_change DESC; - --- Listing 8-14: Using HAVING to filter the results of an aggregate query - -SELECT pls14.stabr, - sum(pls14.visits) AS visits_2014, - sum(pls09.visits) AS visits_2009, - round( (CAST(sum(pls14.visits) AS decimal(10,1)) - sum(pls09.visits)) / - sum(pls09.visits) * 100, 2 ) AS pct_change -FROM pls_fy2014_pupld14a pls14 JOIN pls_fy2009_pupld09a pls09 -ON pls14.fscskey = pls09.fscskey -WHERE pls14.visits >= 0 AND pls09.visits >= 0 -GROUP BY pls14.stabr -HAVING sum(pls14.visits) > 50000000 -ORDER BY pct_change DESC; diff --git a/Chapter_09/README.md b/Chapter_09/README.md index fced9d7..7f89250 100644 --- a/Chapter_09/README.md +++ b/Chapter_09/README.md @@ -2,7 +2,7 @@ [PracticalSQL](https://www.nostarch.com/practicalSQL) by Anthony DeBarros is [available from No Starch Press](https://www.nostarch.com/practicalSQL). -### Chapter 8: Extracting Information by Grouping and Summarizing +### Chapter 9: Extracting Information by Grouping and Summarizing Explains how to use aggregate functions to find trends in U.S. library use based on annual surveys. diff --git a/Chapter_10/Chapter_09.sql b/Chapter_10/Chapter_09.sql deleted file mode 100644 index d4bfe6b..0000000 --- a/Chapter_10/Chapter_09.sql +++ /dev/null @@ -1,259 +0,0 @@ --------------------------------------------------------------- --- Practical SQL: A Beginner's Guide to Storytelling with Data --- by Anthony DeBarros - --- Chapter 9 Code Examples --------------------------------------------------------------- - --- Listing 9-1: Importing the FSIS Meat, Poultry, and Egg Inspection Directory --- https://catalog.data.gov/dataset/meat-poultry-and-egg-inspection-directory-by-establishment-name - -CREATE TABLE meat_poultry_egg_inspect ( - est_number varchar(50) CONSTRAINT est_number_key PRIMARY KEY, - company varchar(100), - street varchar(100), - city varchar(30), - st varchar(2), - zip varchar(5), - phone varchar(14), - grant_date date, - activities text, - dbas text -); - -COPY meat_poultry_egg_inspect -FROM 'C:\YourDirectory\MPI_Directory_by_Establishment_Name.csv' -WITH (FORMAT CSV, HEADER, DELIMITER ','); - -CREATE INDEX company_idx ON meat_poultry_egg_inspect (company); - --- Count the rows imported: -SELECT count(*) FROM meat_poultry_egg_inspect; - --- Listing 9-2: Finding multiple companies at the same address -SELECT company, - street, - city, - st, - count(*) AS address_count -FROM meat_poultry_egg_inspect -GROUP BY company, street, city, st -HAVING count(*) > 1 -ORDER BY company, street, city, st; - --- Listing 9-3: Grouping and counting states -SELECT st, - count(*) AS st_count -FROM meat_poultry_egg_inspect -GROUP BY st -ORDER BY st; - --- Listing 9-4: Using IS NULL to find missing values in the st column -SELECT est_number, - company, - city, - st, - zip -FROM meat_poultry_egg_inspect -WHERE st IS NULL; - --- Listing 9-5: Using GROUP BY and count() to find inconsistent company names - -SELECT company, - count(*) AS company_count -FROM meat_poultry_egg_inspect -GROUP BY company -ORDER BY company ASC; - --- Listing 9-6: Using length() and count() to test the zip column - -SELECT length(zip), - count(*) AS length_count -FROM meat_poultry_egg_inspect -GROUP BY length(zip) -ORDER BY length(zip) ASC; - --- Listing 9-7: Filtering with length() to find short zip values - -SELECT st, - count(*) AS st_count -FROM meat_poultry_egg_inspect -WHERE length(zip) < 5 -GROUP BY st -ORDER BY st ASC; - --- Listing 9-8: Backing up a table - -CREATE TABLE meat_poultry_egg_inspect_backup AS -SELECT * FROM meat_poultry_egg_inspect; - --- Check number of records: -SELECT - (SELECT count(*) FROM meat_poultry_egg_inspect) AS original, - (SELECT count(*) FROM meat_poultry_egg_inspect_backup) AS backup; - --- Listing 9-9: Creating and filling the st_copy column with ALTER TABLE and UPDATE - -ALTER TABLE meat_poultry_egg_inspect ADD COLUMN st_copy varchar(2); - -UPDATE meat_poultry_egg_inspect -SET st_copy = st; - --- Listing 9-10: Checking values in the st and st_copy columns - -SELECT st, - st_copy -FROM meat_poultry_egg_inspect -ORDER BY st; - --- Listing 9-11: Updating the st column for three establishments - -UPDATE meat_poultry_egg_inspect -SET st = 'MN' -WHERE est_number = 'V18677A'; - -UPDATE meat_poultry_egg_inspect -SET st = 'AL' -WHERE est_number = 'M45319+P45319'; - -UPDATE meat_poultry_egg_inspect -SET st = 'WI' -WHERE est_number = 'M263A+P263A+V263A'; - --- Listing 9-12: Restoring original st column values - --- Restoring from the column backup -UPDATE meat_poultry_egg_inspect -SET st = st_copy; - --- Restoring from the table backup -UPDATE meat_poultry_egg_inspect original -SET st = backup.st -FROM meat_poultry_egg_inspect_backup backup -WHERE original.est_number = backup.est_number; - --- Listing 9-13: Creating and filling the company_standard column - -ALTER TABLE meat_poultry_egg_inspect ADD COLUMN company_standard varchar(100); - -UPDATE meat_poultry_egg_inspect -SET company_standard = company; - --- Listing 9-14: Use UPDATE to modify field values that match a string - -UPDATE meat_poultry_egg_inspect -SET company_standard = 'Armour-Eckrich Meats' -WHERE company LIKE 'Armour%'; - -SELECT company, company_standard -FROM meat_poultry_egg_inspect -WHERE company LIKE 'Armour%'; - --- Listing 9-15: Creating and filling the zip_copy column - -ALTER TABLE meat_poultry_egg_inspect ADD COLUMN zip_copy varchar(5); - -UPDATE meat_poultry_egg_inspect -SET zip_copy = zip; - --- Listing 9-16: Modify codes in the zip column missing two leading zeros - -UPDATE meat_poultry_egg_inspect -SET zip = '00' || zip -WHERE st IN('PR','VI') AND length(zip) = 3; - --- Listing 9-17: Modify codes in the zip column missing one leading zero - -UPDATE meat_poultry_egg_inspect -SET zip = '0' || zip -WHERE st IN('CT','MA','ME','NH','NJ','RI','VT') AND length(zip) = 4; - --- Listing 9-18: Creating and filling a state_regions table - -CREATE TABLE state_regions ( - st varchar(2) CONSTRAINT st_key PRIMARY KEY, - region varchar(20) NOT NULL -); - -COPY state_regions -FROM 'C:\YourDirectory\state_regions.csv' -WITH (FORMAT CSV, HEADER, DELIMITER ','); - --- Listing 9-19: Adding and updating an inspection_date column - -ALTER TABLE meat_poultry_egg_inspect ADD COLUMN inspection_date date; - -UPDATE meat_poultry_egg_inspect inspect -SET inspection_date = '2019-12-01' -WHERE EXISTS (SELECT state_regions.region - FROM state_regions - WHERE inspect.st = state_regions.st - AND state_regions.region = 'New England'); - --- Listing 9-20: Viewing updated inspection_date values - -SELECT st, inspection_date -FROM meat_poultry_egg_inspect -GROUP BY st, inspection_date -ORDER BY st; - --- Listing 9-21: Delete rows matching an expression - -DELETE FROM meat_poultry_egg_inspect -WHERE st IN('PR','VI'); - --- Listing 9-22: Remove a column from a table using DROP - -ALTER TABLE meat_poultry_egg_inspect DROP COLUMN zip_copy; - --- Listing 9-23: Remove a table from a database using DROP - -DROP TABLE meat_poultry_egg_inspect_backup; - --- Listing 9-24: Demonstrating a transaction block - --- Start transaction and perform update -START TRANSACTION; - -UPDATE meat_poultry_egg_inspect -SET company = 'AGRO Merchantss Oakland LLC' -WHERE company = 'AGRO Merchants Oakland, LLC'; - --- view changes -SELECT company -FROM meat_poultry_egg_inspect -WHERE company LIKE 'AGRO%' -ORDER BY company; - --- Revert changes -ROLLBACK; - --- See restored state -SELECT company -FROM meat_poultry_egg_inspect -WHERE company LIKE 'AGRO%' -ORDER BY company; - --- Alternately, commit changes at the end: -START TRANSACTION; - -UPDATE meat_poultry_egg_inspect -SET company = 'AGRO Merchants Oakland LLC' -WHERE company = 'AGRO Merchants Oakland, LLC'; - -COMMIT; - --- Listing 9-25: Backing up a table while adding and filling a new column - -CREATE TABLE meat_poultry_egg_inspect_backup AS -SELECT *, - '2018-02-07'::date AS reviewed_date -FROM meat_poultry_egg_inspect; - --- Listing 9-26: Swapping table names using ALTER TABLE - -ALTER TABLE meat_poultry_egg_inspect RENAME TO meat_poultry_egg_inspect_temp; -ALTER TABLE meat_poultry_egg_inspect_backup RENAME TO meat_poultry_egg_inspect; -ALTER TABLE meat_poultry_egg_inspect_temp RENAME TO meat_poultry_egg_inspect_backup; - - diff --git a/Chapter_10/README.md b/Chapter_10/README.md index c6d4d67..2ee02cb 100644 --- a/Chapter_10/README.md +++ b/Chapter_10/README.md @@ -2,7 +2,7 @@ [PracticalSQL](https://www.nostarch.com/practicalSQL) by Anthony DeBarros is [available from No Starch Press](https://www.nostarch.com/practicalSQL). -### Chapter 9: Inspecting and Modifying Data +### Chapter 10: Inspecting and Modifying Data -Explores how to nd and x incomplete or inaccurate data using a collection of records about meat, egg, and poultry producers as an example. +Explores how to find and remedy incomplete or inaccurate data using a collection of records about meat, egg, and poultry producers as an example. diff --git a/Chapter_11/Chapter_10.sql b/Chapter_11/Chapter_10.sql deleted file mode 100644 index ec41dbf..0000000 --- a/Chapter_11/Chapter_10.sql +++ /dev/null @@ -1,167 +0,0 @@ --------------------------------------------------------------- --- Practical SQL: A Beginner's Guide to Storytelling with Data --- by Anthony DeBarros - --- Chapter 10 Code Examples --------------------------------------------------------------- - --- Listing 10-1: Create Census 2011-2015 ACS 5-Year stats table and import data - -CREATE TABLE acs_2011_2015_stats ( - geoid varchar(14) CONSTRAINT geoid_key PRIMARY KEY, - county varchar(50) NOT NULL, - st varchar(20) NOT NULL, - pct_travel_60_min numeric(5,3) NOT NULL, - pct_bachelors_higher numeric(5,3) NOT NULL, - pct_masters_higher numeric(5,3) NOT NULL, - median_hh_income integer, - CHECK (pct_masters_higher <= pct_bachelors_higher) -); - -COPY acs_2011_2015_stats -FROM 'C:\YourDirectory\acs_2011_2015_stats.csv' -WITH (FORMAT CSV, HEADER, DELIMITER ','); - -SELECT * FROM acs_2011_2015_stats; - --- Listing 10-2: Using corr(Y, X) to measure the relationship between --- education and income - -SELECT corr(median_hh_income, pct_bachelors_higher) - AS bachelors_income_r -FROM acs_2011_2015_stats; - --- Listing 10-3: Using corr(Y, X) on additional variables - -SELECT - round( - corr(median_hh_income, pct_bachelors_higher)::numeric, 2 - ) AS bachelors_income_r, - round( - corr(pct_travel_60_min, median_hh_income)::numeric, 2 - ) AS income_travel_r, - round( - corr(pct_travel_60_min, pct_bachelors_higher)::numeric, 2 - ) AS bachelors_travel_r -FROM acs_2011_2015_stats; - --- Listing 10-4: Regression slope and intercept functions - -SELECT - round( - regr_slope(median_hh_income, pct_bachelors_higher)::numeric, 2 - ) AS slope, - round( - regr_intercept(median_hh_income, pct_bachelors_higher)::numeric, 2 - ) AS y_intercept -FROM acs_2011_2015_stats; - --- Listing 10-5: Calculating the coefficient of determination, or r-squared - -SELECT round( - regr_r2(median_hh_income, pct_bachelors_higher)::numeric, 3 - ) AS r_squared -FROM acs_2011_2015_stats; - --- Bonus: Additional stats functions. --- Variance -SELECT var_pop(median_hh_income) -FROM acs_2011_2015_stats; - --- Standard deviation of the entire population -SELECT stddev_pop(median_hh_income) -FROM acs_2011_2015_stats; - --- Covariance -SELECT covar_pop(median_hh_income, pct_bachelors_higher) -FROM acs_2011_2015_stats; - --- Listing 10-6: The rank() and dense_rank() window functions - -CREATE TABLE widget_companies ( - id bigserial, - company varchar(30) NOT NULL, - widget_output integer NOT NULL -); - -INSERT INTO widget_companies (company, widget_output) -VALUES - ('Morse Widgets', 125000), - ('Springfield Widget Masters', 143000), - ('Best Widgets', 196000), - ('Acme Inc.', 133000), - ('District Widget Inc.', 201000), - ('Clarke Amalgamated', 620000), - ('Stavesacre Industries', 244000), - ('Bowers Widget Emporium', 201000); - -SELECT - company, - widget_output, - rank() OVER (ORDER BY widget_output DESC), - dense_rank() OVER (ORDER BY widget_output DESC) -FROM widget_companies; - --- Listing 10-7: Applying rank() within groups using PARTITION BY - -CREATE TABLE store_sales ( - store varchar(30), - category varchar(30) NOT NULL, - unit_sales bigint NOT NULL, - CONSTRAINT store_category_key PRIMARY KEY (store, category) -); - -INSERT INTO store_sales (store, category, unit_sales) -VALUES - ('Broders', 'Cereal', 1104), - ('Wallace', 'Ice Cream', 1863), - ('Broders', 'Ice Cream', 2517), - ('Cramers', 'Ice Cream', 2112), - ('Broders', 'Beer', 641), - ('Cramers', 'Cereal', 1003), - ('Cramers', 'Beer', 640), - ('Wallace', 'Cereal', 980), - ('Wallace', 'Beer', 988); - -SELECT - category, - store, - unit_sales, - rank() OVER (PARTITION BY category ORDER BY unit_sales DESC) -FROM store_sales; - --- Listing 10-8: Create and fill a 2015 FBI crime data table - -CREATE TABLE fbi_crime_data_2015 ( - st varchar(20), - city varchar(50), - population integer, - violent_crime integer, - property_crime integer, - burglary integer, - larceny_theft integer, - motor_vehicle_theft integer, - CONSTRAINT st_city_key PRIMARY KEY (st, city) -); - -COPY fbi_crime_data_2015 -FROM 'C:\YourDirectory\fbi_crime_data_2015.csv' -WITH (FORMAT CSV, HEADER, DELIMITER ','); - -SELECT * FROM fbi_crime_data_2015 -ORDER BY population DESC; - --- Listing 10-9: Find property crime rates per thousand in cities with 500,000 --- or more people - -SELECT - city, - st, - population, - property_crime, - round( - (property_crime::numeric / population) * 1000, 1 - ) AS pc_per_1000 -FROM fbi_crime_data_2015 -WHERE population >= 500000 -ORDER BY (property_crime::numeric / population) DESC; diff --git a/Chapter_11/README.md b/Chapter_11/README.md index 8fdb16f..fcc0c3a 100644 --- a/Chapter_11/README.md +++ b/Chapter_11/README.md @@ -2,7 +2,7 @@ [PracticalSQL](https://www.nostarch.com/practicalSQL) by Anthony DeBarros is [available from No Starch Press](https://www.nostarch.com/practicalSQL). -### Chapter 10: Statistical Functions in SQL +### Chapter 11: Statistical Functions in SQL Introduces correlation, regression, and ranking functions in SQL to help you derive more meaning from data sets. diff --git a/Chapter_12/Chapter_11.sql b/Chapter_12/Chapter_11.sql deleted file mode 100644 index e2de8d1..0000000 --- a/Chapter_12/Chapter_11.sql +++ /dev/null @@ -1,233 +0,0 @@ --------------------------------------------------------------- --- Practical SQL: A Beginner's Guide to Storytelling with Data --- by Anthony DeBarros - --- Chapter 11 Code Examples --------------------------------------------------------------- - --- Listing 11-1: Extracting components of a timestamp value using date_part() - -SELECT - date_part('year', '2019-12-01 18:37:12 EST'::timestamptz) AS "year", - date_part('month', '2019-12-01 18:37:12 EST'::timestamptz) AS "month", - date_part('day', '2019-12-01 18:37:12 EST'::timestamptz) AS "day", - date_part('hour', '2019-12-01 18:37:12 EST'::timestamptz) AS "hour", - date_part('minute', '2019-12-01 18:37:12 EST'::timestamptz) AS "minute", - date_part('seconds', '2019-12-01 18:37:12 EST'::timestamptz) AS "seconds", - date_part('timezone_hour', '2019-12-01 18:37:12 EST'::timestamptz) AS "tz", - date_part('week', '2019-12-01 18:37:12 EST'::timestamptz) AS "week", - date_part('quarter', '2019-12-01 18:37:12 EST'::timestamptz) AS "quarter", - date_part('epoch', '2019-12-01 18:37:12 EST'::timestamptz) AS "epoch"; - --- Bonus: Using the SQL-standard extract() for similar datetime parsing: - -SELECT extract('year' from '2019-12-01 18:37:12 EST'::timestamptz) AS "year"; - --- Listing 11-2: Three functions for making datetimes from components - --- make a date -SELECT make_date(2018, 2, 22); --- make a time -SELECT make_time(18, 4, 30.3); --- make a timestamp with time zone -SELECT make_timestamptz(2018, 2, 22, 18, 4, 30.3, 'Europe/Lisbon'); - --- Bonus: Retrieving the current date and time - -SELECT - current_date, - current_time, - current_timestamp, - localtime, - localtimestamp, - now(); - --- Listing 11-3: Comparing current_timestamp and clock_timestamp() during row insert - -CREATE TABLE current_time_example ( - time_id bigserial, - current_timestamp_col timestamp with time zone, - clock_timestamp_col timestamp with time zone -); - -INSERT INTO current_time_example (current_timestamp_col, clock_timestamp_col) - (SELECT current_timestamp, - clock_timestamp() - FROM generate_series(1,1000)); - -SELECT * FROM current_time_example; - --- Time Zones - --- Listing 11-4: Showing your PostgreSQL server's default time zone - -SHOW timezone; -- Note: You can see all run-time defaults with SHOW ALL; - --- Listing 11-5: Showing time zone abbreviations and names - -SELECT * FROM pg_timezone_abbrevs; -SELECT * FROM pg_timezone_names; - --- Filter to find one -SELECT * FROM pg_timezone_names -WHERE name LIKE 'Europe%'; - --- Listing 11-6: Setting the time zone for a client session - -SET timezone TO 'US/Pacific'; - -CREATE TABLE time_zone_test ( - test_date timestamp with time zone -); -INSERT INTO time_zone_test VALUES ('2020-01-01 4:00'); - -SELECT test_date -FROM time_zone_test; - -SET timezone TO 'US/Eastern'; - -SELECT test_date -FROM time_zone_test; - -SELECT test_date AT TIME ZONE 'Asia/Seoul' -FROM time_zone_test; - - --- Math with dates! - -SELECT '9/30/1929'::date - '9/27/1929'::date; -SELECT '9/30/1929'::date + '5 years'::interval; - - --- Taxi Rides - --- Listing 11-7: Creating a table and importing NYC yellow taxi data - -CREATE TABLE nyc_yellow_taxi_trips_2016_06_01 ( - trip_id bigserial PRIMARY KEY, - vendor_id varchar(1) NOT NULL, - tpep_pickup_datetime timestamp with time zone NOT NULL, - tpep_dropoff_datetime timestamp with time zone NOT NULL, - passenger_count integer NOT NULL, - trip_distance numeric(8,2) NOT NULL, - pickup_longitude numeric(18,15) NOT NULL, - pickup_latitude numeric(18,15) NOT NULL, - rate_code_id varchar(2) NOT NULL, - store_and_fwd_flag varchar(1) NOT NULL, - dropoff_longitude numeric(18,15) NOT NULL, - dropoff_latitude numeric(18,15) NOT NULL, - payment_type varchar(1) NOT NULL, - fare_amount numeric(9,2) NOT NULL, - extra numeric(9,2) NOT NULL, - mta_tax numeric(5,2) NOT NULL, - tip_amount numeric(9,2) NOT NULL, - tolls_amount numeric(9,2) NOT NULL, - improvement_surcharge numeric(9,2) NOT NULL, - total_amount numeric(9,2) NOT NULL -); - -COPY nyc_yellow_taxi_trips_2016_06_01 ( - vendor_id, - tpep_pickup_datetime, - tpep_dropoff_datetime, - passenger_count, - trip_distance, - pickup_longitude, - pickup_latitude, - rate_code_id, - store_and_fwd_flag, - dropoff_longitude, - dropoff_latitude, - payment_type, - fare_amount, - extra, - mta_tax, - tip_amount, - tolls_amount, - improvement_surcharge, - total_amount - ) -FROM 'C:\YourDirectory\yellow_tripdata_2016_06_01.csv' -WITH (FORMAT CSV, HEADER, DELIMITER ','); - -CREATE INDEX tpep_pickup_idx -ON nyc_yellow_taxi_trips_2016_06_01 (tpep_pickup_datetime); - -SELECT count(*) FROM nyc_yellow_taxi_trips_2016_06_01; - --- Listing 11-8: Counting taxi trips by hour - -SELECT - date_part('hour', tpep_pickup_datetime) AS trip_hour, - count(*) -FROM nyc_yellow_taxi_trips_2016_06_01 -GROUP BY trip_hour -ORDER BY trip_hour; - --- Listing 11-9: Exporting taxi pickups per hour to a CSV file - -COPY - (SELECT - date_part('hour', tpep_pickup_datetime) AS trip_hour, - count(*) - FROM nyc_yellow_taxi_trips_2016_06_01 - GROUP BY trip_hour - ORDER BY trip_hour - ) -TO 'C:\YourDirectory\hourly_pickups_2016_06_01.csv' -WITH (FORMAT CSV, HEADER, DELIMITER ','); - --- Listing 11-10: Calculating median trip time by hour - -SELECT - date_part('hour', tpep_pickup_datetime) AS trip_hour, - percentile_cont(.5) - WITHIN GROUP (ORDER BY - tpep_dropoff_datetime - tpep_pickup_datetime) AS median_trip -FROM nyc_yellow_taxi_trips_2016_06_01 -GROUP BY trip_hour -ORDER BY trip_hour; - --- Listing 11-11: Creating a table to hold train trip data - -SET timezone TO 'US/Central'; - -CREATE TABLE train_rides ( - trip_id bigserial PRIMARY KEY, - segment varchar(50) NOT NULL, - departure timestamp with time zone NOT NULL, - arrival timestamp with time zone NOT NULL -); - -INSERT INTO train_rides (segment, departure, arrival) -VALUES - ('Chicago to New York', '2017-11-13 21:30 CST', '2017-11-14 18:23 EST'), - ('New York to New Orleans', '2017-11-15 14:15 EST', '2017-11-16 19:32 CST'), - ('New Orleans to Los Angeles', '2017-11-17 13:45 CST', '2017-11-18 9:00 PST'), - ('Los Angeles to San Francisco', '2017-11-19 10:10 PST', '2017-11-19 21:24 PST'), - ('San Francisco to Denver', '2017-11-20 9:10 PST', '2017-11-21 18:38 MST'), - ('Denver to Chicago', '2017-11-22 19:10 MST', '2017-11-23 14:50 CST'); - -SELECT * FROM train_rides; - --- Listing 11-12: Calculating the length of each trip segment - -SELECT segment, - to_char(departure, 'YYYY-MM-DD HH12:MI a.m. TZ') AS departure, - arrival - departure AS segment_time -FROM train_rides; - --- Listing 11-13: Calculating cumulative intervals using OVER - -SELECT segment, - arrival - departure AS segment_time, - sum(arrival - departure) OVER (ORDER BY trip_id) AS cume_time -FROM train_rides; - --- Listing 11-14: Better formatting for cumulative trip time - -SELECT segment, - arrival - departure AS segment_time, - sum(date_part('epoch', (arrival - departure))) - OVER (ORDER BY trip_id) * interval '1 second' AS cume_time -FROM train_rides; diff --git a/Chapter_12/README.md b/Chapter_12/README.md index a5ccb7f..5876666 100644 --- a/Chapter_12/README.md +++ b/Chapter_12/README.md @@ -2,7 +2,7 @@ [PracticalSQL](https://www.nostarch.com/practicalSQL) by Anthony DeBarros is [available from No Starch Press](https://www.nostarch.com/practicalSQL). -### Chapter 11: Working with Dates and Times +### Chapter 12: Working with Dates and Times Explains how to create, manipulate, and query dates and times in your database, including working with time zones, using data on New York City taxi trips and Amtrak train schedules. diff --git a/Chapter_13/Chapter_12.sql b/Chapter_13/Chapter_12.sql deleted file mode 100644 index 3738b39..0000000 --- a/Chapter_13/Chapter_12.sql +++ /dev/null @@ -1,310 +0,0 @@ --------------------------------------------------------------- --- Practical SQL: A Beginner's Guide to Storytelling with Data --- by Anthony DeBarros - --- Chapter 12 Code Examples --------------------------------------------------------------- - --- Listing 12-1: Using a subquery in a WHERE clause - -SELECT geo_name, - state_us_abbreviation, - p0010001 -FROM us_counties_2010 -WHERE p0010001 >= ( - SELECT percentile_cont(.9) WITHIN GROUP (ORDER BY p0010001) - FROM us_counties_2010 - ) -ORDER BY p0010001 DESC; - --- Listing 12-2: Using a subquery in a WHERE clause for DELETE - -CREATE TABLE us_counties_2010_top10 AS -SELECT * FROM us_counties_2010; - -DELETE FROM us_counties_2010_top10 -WHERE p0010001 < ( - SELECT percentile_cont(.9) WITHIN GROUP (ORDER BY p0010001) - FROM us_counties_2010_top10 - ); - -SELECT count(*) FROM us_counties_2010_top10; - --- Listing 12-3: Subquery as a derived table in a FROM clause - -SELECT round(calcs.average, 0) as average, - calcs.median, - round(calcs.average - calcs.median, 0) AS median_average_diff -FROM ( - SELECT avg(p0010001) AS average, - percentile_cont(.5) - WITHIN GROUP (ORDER BY p0010001)::numeric(10,1) AS median - FROM us_counties_2010 - ) -AS calcs; - --- Listing 12-4: Joining two derived tables - -SELECT census.state_us_abbreviation AS st, - census.st_population, - plants.plant_count, - round((plants.plant_count/census.st_population::numeric(10,1)) * 1000000, 1) - AS plants_per_million -FROM - ( - SELECT st, - count(*) AS plant_count - FROM meat_poultry_egg_inspect - GROUP BY st - ) - AS plants -JOIN - ( - SELECT state_us_abbreviation, - sum(p0010001) AS st_population - FROM us_counties_2010 - GROUP BY state_us_abbreviation - ) - AS census -ON plants.st = census.state_us_abbreviation -ORDER BY plants_per_million DESC; - --- Listing 12-5: Adding a subquery to a column list - -SELECT geo_name, - state_us_abbreviation AS st, - p0010001 AS total_pop, - (SELECT percentile_cont(.5) WITHIN GROUP (ORDER BY p0010001) - FROM us_counties_2010) AS us_median -FROM us_counties_2010; - --- Listing 12-6: Using a subquery expression in a calculation - -SELECT geo_name, - state_us_abbreviation AS st, - p0010001 AS total_pop, - (SELECT percentile_cont(.5) WITHIN GROUP (ORDER BY p0010001) - FROM us_counties_2010) AS us_median, - p0010001 - (SELECT percentile_cont(.5) WITHIN GROUP (ORDER BY p0010001) - FROM us_counties_2010) AS diff_from_median -FROM us_counties_2010 -WHERE (p0010001 - (SELECT percentile_cont(.5) WITHIN GROUP (ORDER BY p0010001) - FROM us_counties_2010)) - BETWEEN -1000 AND 1000; - - --- BONUS: Subquery expressions --- If you'd like to try the IN, EXISTS, and NOT EXISTS expressions on pages 199-200, --- here's the code to create a retirees table. The queries below are similar --- to the hypothetical examples on pages 199 and 200. You will need the --- employees table you created in Chapter 6. - --- Create table and insert data -CREATE TABLE retirees ( - id int, - first_name varchar(50), - last_name varchar(50) -); - -INSERT INTO retirees -VALUES (2, 'Lee', 'Smith'), - (4, 'Janet', 'King'); - --- Generating values for the IN operator -SELECT first_name, last_name -FROM employees -WHERE emp_id IN ( - SELECT id - FROM retirees); - --- Checking whether values exist (returns all rows from employees --- if the expression evaluates as true) -SELECT first_name, last_name -FROM employees -WHERE EXISTS ( - SELECT id - FROM retirees); - --- Using a correlated subquery to find matching values from employees --- in retirees. -SELECT first_name, last_name -FROM employees -WHERE EXISTS ( - SELECT id - FROM retirees - WHERE id = employees.emp_id); - - - --- Listing 12-7: Using a simple CTE to find large counties - -WITH - large_counties (geo_name, st, p0010001) -AS - ( - SELECT geo_name, state_us_abbreviation, p0010001 - FROM us_counties_2010 - WHERE p0010001 >= 100000 - ) -SELECT st, count(*) -FROM large_counties -GROUP BY st -ORDER BY count(*) DESC; - --- Bonus: You can also write this query as: -SELECT state_us_abbreviation, count(*) -FROM us_counties_2010 -WHERE p0010001 >= 100000 -GROUP BY state_us_abbreviation -ORDER BY count(*) DESC; - --- Listing 12-8: Using CTEs in a table join - -WITH - counties (st, population) AS - (SELECT state_us_abbreviation, sum(population_count_100_percent) - FROM us_counties_2010 - GROUP BY state_us_abbreviation), - - plants (st, plants) AS - (SELECT st, count(*) AS plants - FROM meat_poultry_egg_inspect - GROUP BY st) - -SELECT counties.st, - population, - plants, - round((plants/population::numeric(10,1))*1000000, 1) AS per_million -FROM counties JOIN plants -ON counties.st = plants.st -ORDER BY per_million DESC; - --- Listing 12-9: Using CTEs to minimize redundant code - -WITH us_median AS - (SELECT percentile_cont(.5) - WITHIN GROUP (ORDER BY p0010001) AS us_median_pop - FROM us_counties_2010) - -SELECT geo_name, - state_us_abbreviation AS st, - p0010001 AS total_pop, - us_median_pop, - p0010001 - us_median_pop AS diff_from_median -FROM us_counties_2010 CROSS JOIN us_median -WHERE (p0010001 - us_median_pop) - BETWEEN -1000 AND 1000; - - --- Cross tabulations --- Install the crosstab() function via the tablefunc module - -CREATE EXTENSION tablefunc; - --- Listing 12-10: Creating and filling the ice_cream_survey table - -CREATE TABLE ice_cream_survey ( - response_id integer PRIMARY KEY, - office varchar(20), - flavor varchar(20) -); - -COPY ice_cream_survey -FROM 'C:\YourDirectory\ice_cream_survey.csv' -WITH (FORMAT CSV, HEADER); - --- Listing 12-11: Generating the ice cream survey crosstab - -SELECT * -FROM crosstab('SELECT office, - flavor, - count(*) - FROM ice_cream_survey - GROUP BY office, flavor - ORDER BY office', - - 'SELECT flavor - FROM ice_cream_survey - GROUP BY flavor - ORDER BY flavor') - -AS (office varchar(20), - chocolate bigint, - strawberry bigint, - vanilla bigint); - --- Listing 12-12: Creating and filling a temperature_readings table - -CREATE TABLE temperature_readings ( - reading_id bigserial PRIMARY KEY, - station_name varchar(50), - observation_date date, - max_temp integer, - min_temp integer -); - -COPY temperature_readings - (station_name, observation_date, max_temp, min_temp) -FROM 'C:\YourDirectory\temperature_readings.csv' -WITH (FORMAT CSV, HEADER); - --- Listing 12-13: Generating the temperature readings crosstab - -SELECT * -FROM crosstab('SELECT - station_name, - date_part(''month'', observation_date), - percentile_cont(.5) - WITHIN GROUP (ORDER BY max_temp) - FROM temperature_readings - GROUP BY station_name, - date_part(''month'', observation_date) - ORDER BY station_name', - - 'SELECT month - FROM generate_series(1,12) month') - -AS (station varchar(50), - jan numeric(3,0), - feb numeric(3,0), - mar numeric(3,0), - apr numeric(3,0), - may numeric(3,0), - jun numeric(3,0), - jul numeric(3,0), - aug numeric(3,0), - sep numeric(3,0), - oct numeric(3,0), - nov numeric(3,0), - dec numeric(3,0) -); - --- Listing 12-14: Re-classifying temperature data with CASE - -SELECT max_temp, - CASE WHEN max_temp >= 90 THEN 'Hot' - WHEN max_temp BETWEEN 70 AND 89 THEN 'Warm' - WHEN max_temp BETWEEN 50 AND 69 THEN 'Pleasant' - WHEN max_temp BETWEEN 33 AND 49 THEN 'Cold' - WHEN max_temp BETWEEN 20 AND 32 THEN 'Freezing' - ELSE 'Inhumane' - END AS temperature_group -FROM temperature_readings; - --- Listing 12-15: Using CASE in a Common Table Expression - -WITH temps_collapsed (station_name, max_temperature_group) AS - (SELECT station_name, - CASE WHEN max_temp >= 90 THEN 'Hot' - WHEN max_temp BETWEEN 70 AND 89 THEN 'Warm' - WHEN max_temp BETWEEN 50 AND 69 THEN 'Pleasant' - WHEN max_temp BETWEEN 33 AND 49 THEN 'Cold' - WHEN max_temp BETWEEN 20 AND 32 THEN 'Freezing' - ELSE 'Inhumane' - END - FROM temperature_readings) - -SELECT station_name, max_temperature_group, count(*) -FROM temps_collapsed -GROUP BY station_name, max_temperature_group -ORDER BY station_name, count(*) DESC; diff --git a/Chapter_13/README.md b/Chapter_13/README.md index 667332d..5054063 100644 --- a/Chapter_13/README.md +++ b/Chapter_13/README.md @@ -2,7 +2,7 @@ [PracticalSQL](https://www.nostarch.com/practicalSQL) by Anthony DeBarros is [available from No Starch Press](https://www.nostarch.com/practicalSQL). -### Chapter 12: Advanced Query Techniques +### Chapter 13: Advanced Query Techniques Explains how to use more complex SQL operations, such as subqueries and cross tabulations, and the CASE statement to reclassify values in a data set on temperature readings. diff --git a/Chapter_14/Chapter_13.sql b/Chapter_14/Chapter_13.sql deleted file mode 100644 index 83dc578..0000000 --- a/Chapter_14/Chapter_13.sql +++ /dev/null @@ -1,363 +0,0 @@ --------------------------------------------------------------- --- Practical SQL: A Beginner's Guide to Storytelling with Data --- by Anthony DeBarros - --- Chapter 13 Code Examples --------------------------------------------------------------- - --- Commonly used string functions --- Full list at https://www.postgresql.org/docs/current/static/functions-string.html - --- Case formatting -SELECT upper('Neal7'); -SELECT lower('Randy'); -SELECT initcap('at the end of the day'); --- Note initcap's imperfect for acronyms -SELECT initcap('Practical SQL'); - --- Character Information -SELECT char_length(' Pat '); -SELECT length(' Pat '); -SELECT position(', ' in 'Tan, Bella'); - --- Removing characters -SELECT trim('s' from 'socks'); -SELECT trim(trailing 's' from 'socks'); -SELECT trim(' Pat '); -SELECT char_length(trim(' Pat ')); -- note the length change -SELECT ltrim('socks', 's'); -SELECT rtrim('socks', 's'); - --- Extracting and replacing characters -SELECT left('703-555-1212', 3); -SELECT right('703-555-1212', 8); -SELECT replace('bat', 'b', 'c'); - - --- Table 13-2: Regular Expression Matching Examples - --- Any character one or more times -SELECT substring('The game starts at 7 p.m. on May 2, 2019.' from '.+'); --- One or two digits followed by a space and p.m. -SELECT substring('The game starts at 7 p.m. on May 2, 2019.' from '\d{1,2} (?:a.m.|p.m.)'); --- One or more word characters at the start -SELECT substring('The game starts at 7 p.m. on May 2, 2019.' from '^\w+'); --- One or more word characters followed by any character at the end. -SELECT substring('The game starts at 7 p.m. on May 2, 2019.' from '\w+.$'); --- The words May or June -SELECT substring('The game starts at 7 p.m. on May 2, 2019.' from 'May|June'); --- Four digits -SELECT substring('The game starts at 7 p.m. on May 2, 2019.' from '\d{4}'); --- May followed by a space, digit, comma, space, and four digits. -SELECT substring('The game starts at 7 p.m. on May 2, 2019.' from 'May \d, \d{4}'); - - --- Turning Text to Data with Regular Expression Functions - --- Listing 13-2: Creating and loading the crime_reports table --- Data from https://sheriff.loudoun.gov/dailycrime - -CREATE TABLE crime_reports ( - crime_id bigserial PRIMARY KEY, - date_1 timestamp with time zone, - date_2 timestamp with time zone, - street varchar(250), - city varchar(100), - crime_type varchar(100), - description text, - case_number varchar(50), - original_text text NOT NULL -); - -COPY crime_reports (original_text) -FROM 'C:\YourDirectory\crime_reports.csv' -WITH (FORMAT CSV, HEADER OFF, QUOTE '"'); - -SELECT original_text FROM crime_reports; - --- Listing 13-3: Using regexp_match() to find the first date -SELECT crime_id, - regexp_match(original_text, '\d{1,2}\/\d{1,2}\/\d{2}') -FROM crime_reports; - --- Listing 13-4: Using the regexp_matches() function with the 'g' flag -SELECT crime_id, - regexp_matches(original_text, '\d{1,2}\/\d{1,2}\/\d{2}', 'g') -FROM crime_reports; - --- Listing 13-5: Using regexp_match() to find the second date --- Note that the result includes an unwanted hyphen -SELECT crime_id, - regexp_match(original_text, '-\d{1,2}\/\d{1,2}\/\d{1,2}') -FROM crime_reports; - --- Listing 13-6: Using a capture group to return only the date --- Eliminates the hyphen -SELECT crime_id, - regexp_match(original_text, '-(\d{1,2}\/\d{1,2}\/\d{1,2})') -FROM crime_reports; - --- Listing 13-7: Matching case number, date, crime type, and city - -SELECT - regexp_match(original_text, '(?:C0|SO)[0-9]+') AS case_number, - regexp_match(original_text, '\d{1,2}\/\d{1,2}\/\d{2}') AS date_1, - regexp_match(original_text, '\n(?:\w+ \w+|\w+)\n(.*):') AS crime_type, - regexp_match(original_text, '(?:Sq.|Plz.|Dr.|Ter.|Rd.)\n(\w+ \w+|\w+)\n') - AS city -FROM crime_reports; - --- Bonus: Get all parsed elements at once - -SELECT crime_id, - regexp_match(original_text, '\d{1,2}\/\d{1,2}\/\d{2}') AS date_1, - CASE WHEN EXISTS (SELECT regexp_matches(original_text, '-(\d{1,2}\/\d{1,2}\/\d{1,2})')) - THEN regexp_match(original_text, '-(\d{1,2}\/\d{1,2}\/\d{1,2})') - ELSE NULL - END AS date_2, - regexp_match(original_text, '\/\d{2}\n(\d{4})') AS hour_1, - CASE WHEN EXISTS (SELECT regexp_matches(original_text, '\/\d{2}\n\d{4}-(\d{4})')) - THEN regexp_match(original_text, '\/\d{2}\n\d{4}-(\d{4})') - ELSE NULL - END AS hour_2, - regexp_match(original_text, 'hrs.\n(\d+ .+(?:Sq.|Plz.|Dr.|Ter.|Rd.))') AS street, - regexp_match(original_text, '(?:Sq.|Plz.|Dr.|Ter.|Rd.)\n(\w+ \w+|\w+)\n') AS city, - regexp_match(original_text, '\n(?:\w+ \w+|\w+)\n(.*):') AS crime_type, - regexp_match(original_text, ':\s(.+)(?:C0|SO)') AS description, - regexp_match(original_text, '(?:C0|SO)[0-9]+') AS case_number -FROM crime_reports; - --- Listing 13-8: Retrieving a value from within an array - -SELECT - crime_id, - (regexp_match(original_text, '(?:C0|SO)[0-9]+'))[1] - AS case_number -FROM crime_reports; - --- Listing 13-9: Updating the crime_reports date_1 column - -UPDATE crime_reports -SET date_1 = -( - (regexp_match(original_text, '\d{1,2}\/\d{1,2}\/\d{2}'))[1] - || ' ' || - (regexp_match(original_text, '\/\d{2}\n(\d{4})'))[1] - ||' US/Eastern' -)::timestamptz; - -SELECT crime_id, - date_1, - original_text -FROM crime_reports; - --- Listing 13-10: Updating all crime_reports columns - -UPDATE crime_reports -SET date_1 = - ( - (regexp_match(original_text, '\d{1,2}\/\d{1,2}\/\d{2}'))[1] - || ' ' || - (regexp_match(original_text, '\/\d{2}\n(\d{4})'))[1] - ||' US/Eastern' - )::timestamptz, - - date_2 = - CASE - -- if there is no second date but there is a second hour - WHEN (SELECT regexp_match(original_text, '-(\d{1,2}\/\d{1,2}\/\d{1,2})') IS NULL) - AND (SELECT regexp_match(original_text, '\/\d{2}\n\d{4}-(\d{4})') IS NOT NULL) - THEN - ((regexp_match(original_text, '\d{1,2}\/\d{1,2}\/\d{2}'))[1] - || ' ' || - (regexp_match(original_text, '\/\d{2}\n\d{4}-(\d{4})'))[1] - ||' US/Eastern' - )::timestamptz - - -- if there is both a second date and second hour - WHEN (SELECT regexp_match(original_text, '-(\d{1,2}\/\d{1,2}\/\d{1,2})') IS NOT NULL) - AND (SELECT regexp_match(original_text, '\/\d{2}\n\d{4}-(\d{4})') IS NOT NULL) - THEN - ((regexp_match(original_text, '-(\d{1,2}\/\d{1,2}\/\d{1,2})'))[1] - || ' ' || - (regexp_match(original_text, '\/\d{2}\n\d{4}-(\d{4})'))[1] - ||' US/Eastern' - )::timestamptz - -- if neither of those conditions exist, provide a NULL - ELSE NULL - END, - street = (regexp_match(original_text, 'hrs.\n(\d+ .+(?:Sq.|Plz.|Dr.|Ter.|Rd.))'))[1], - city = (regexp_match(original_text, - '(?:Sq.|Plz.|Dr.|Ter.|Rd.)\n(\w+ \w+|\w+)\n'))[1], - crime_type = (regexp_match(original_text, '\n(?:\w+ \w+|\w+)\n(.*):'))[1], - description = (regexp_match(original_text, ':\s(.+)(?:C0|SO)'))[1], - case_number = (regexp_match(original_text, '(?:C0|SO)[0-9]+'))[1]; - --- Listing 13-11: Viewing selected crime data - -SELECT date_1, - street, - city, - crime_type -FROM crime_reports; - --- Listing 13-12: Using regular expressions in a WHERE clause - -SELECT geo_name -FROM us_counties_2010 -WHERE geo_name ~* '(.+lade.+|.+lare.+)' -ORDER BY geo_name; - -SELECT geo_name -FROM us_counties_2010 -WHERE geo_name ~* '.+ash.+' AND geo_name !~ 'Wash.+' -ORDER BY geo_name; - - --- Listing 13-13: Regular expression functions to replace and split - -SELECT regexp_replace('05/12/2018', '\d{4}', '2017'); - -SELECT regexp_split_to_table('Four,score,and,seven,years,ago', ','); - -SELECT regexp_split_to_array('Phil Mike Tony Steve', ' '); - --- Listing 13-14: Finding an array length - -SELECT array_length(regexp_split_to_array('Phil Mike Tony Steve', ' '), 1); - - --- FULL TEXT SEARCH - --- Full-text search operators: --- & (AND) --- | (OR) --- ! (NOT) - --- Listing 13-15: Converting text to tsvector data - -SELECT to_tsvector('I am walking across the sitting room to sit with you.'); - --- Listing 13-16: Converting search terms to tsquery data - -SELECT to_tsquery('walking & sitting'); - --- Listing 13-17: Querying a tsvector type with a tsquery - -SELECT to_tsvector('I am walking across the sitting room') @@ to_tsquery('walking & sitting'); - -SELECT to_tsvector('I am walking across the sitting room') @@ to_tsquery('walking & running'); - --- Listing 13-18: Creating and filling the president_speeches table - --- Sources: --- https://archive.org/details/State-of-the-Union-Addresses-1945-2006 --- http://www.presidency.ucsb.edu/ws/index.php --- https://www.eisenhower.archives.gov/all_about_ike/speeches.html - -CREATE TABLE president_speeches ( - sotu_id serial PRIMARY KEY, - president varchar(100) NOT NULL, - title varchar(250) NOT NULL, - speech_date date NOT NULL, - speech_text text NOT NULL, - search_speech_text tsvector -); - -COPY president_speeches (president, title, speech_date, speech_text) -FROM 'C:\YourDirectory\sotu-1946-1977.csv' -WITH (FORMAT CSV, DELIMITER '|', HEADER OFF, QUOTE '@'); - -SELECT * FROM president_speeches; - --- Listing 13-19: Converting speeches to tsvector in the search_speech_text column - -UPDATE president_speeches -SET search_speech_text = to_tsvector('english', speech_text); - --- Listing 13-20: Creating a GIN index for text search - -CREATE INDEX search_idx ON president_speeches USING gin(search_speech_text); - --- Listing 13-21: Finding speeches containing the word "Vietnam" - -SELECT president, speech_date -FROM president_speeches -WHERE search_speech_text @@ to_tsquery('Vietnam') -ORDER BY speech_date; - --- Listing 13-22: Displaying search results with ts_headline() - -SELECT president, - speech_date, - ts_headline(speech_text, to_tsquery('Vietnam'), - 'StartSel = <, - StopSel = >, - MinWords=5, - MaxWords=7, - MaxFragments=1') -FROM president_speeches -WHERE search_speech_text @@ to_tsquery('Vietnam'); - --- Listing 13-23: Finding speeches with the word "transportation" but not "roads" - -SELECT president, - speech_date, - ts_headline(speech_text, to_tsquery('transportation & !roads'), - 'StartSel = <, - StopSel = >, - MinWords=5, - MaxWords=7, - MaxFragments=1') -FROM president_speeches -WHERE search_speech_text @@ to_tsquery('transportation & !roads'); - --- Listing 13-24: Find speeches where "defense" follows "military" - -SELECT president, - speech_date, - ts_headline(speech_text, to_tsquery('military <-> defense'), - 'StartSel = <, - StopSel = >, - MinWords=5, - MaxWords=7, - MaxFragments=1') -FROM president_speeches -WHERE search_speech_text @@ to_tsquery('military <-> defense'); - --- Bonus: Example with a distance of 2: -SELECT president, - speech_date, - ts_headline(speech_text, to_tsquery('military <2> defense'), - 'StartSel = <, - StopSel = >, - MinWords=5, - MaxWords=7, - MaxFragments=2') -FROM president_speeches -WHERE search_speech_text @@ to_tsquery('military <2> defense'); - --- Listing 13-25: Scoring relevance with ts_rank() - -SELECT president, - speech_date, - ts_rank(search_speech_text, - to_tsquery('war & security & threat & enemy')) AS score -FROM president_speeches -WHERE search_speech_text @@ to_tsquery('war & security & threat & enemy') -ORDER BY score DESC -LIMIT 5; - --- Listing 13-26: Normalizing ts_rank() by speech length - -SELECT president, - speech_date, - ts_rank(search_speech_text, - to_tsquery('war & security & threat & enemy'), 2)::numeric - AS score -FROM president_speeches -WHERE search_speech_text @@ to_tsquery('war & security & threat & enemy') -ORDER BY score DESC -LIMIT 5; - - diff --git a/Chapter_14/README.md b/Chapter_14/README.md index 9381bc9..97a4214 100644 --- a/Chapter_14/README.md +++ b/Chapter_14/README.md @@ -2,7 +2,7 @@ [PracticalSQL](https://www.nostarch.com/practicalSQL) by Anthony DeBarros is [available from No Starch Press](https://www.nostarch.com/practicalSQL). -### Chapter 13: Mining Text to Find Meaningful Data +### Chapter 14: Mining Text to Find Meaningful Data Covers how to use PostgreSQL’s full text search engine and regular expressions to extract data from unstructured text, using a collection of speeches by U.S. presidents as an example. diff --git a/Chapter_15/Chapter_14.sql b/Chapter_15/Chapter_14.sql deleted file mode 100644 index 8e07dda..0000000 --- a/Chapter_15/Chapter_14.sql +++ /dev/null @@ -1,252 +0,0 @@ --------------------------------------------------------------- --- Practical SQL: A Beginner's Guide to Storytelling with Data --- by Anthony DeBarros - --- Chapter 14 Code Examples --------------------------------------------------------------- - --- Listing 14-1: Creating a gis_analysis database - -CREATE DATABASE gis_analysis; --- Note: Switch to this new database before continuing the examples - --- Listing 14-2: Loading the PostGIS extension - -CREATE EXTENSION postgis; - -SELECT postgis_full_version(); -- shows PostGIS version - --- Listing 14-3: Retrieving the well-known text for SRID 4326 - -SELECT srtext -FROM spatial_ref_sys -WHERE srid = 4326; - --- Listing 14-4: Using ST_GeomFromText() to create spatial objects - -SELECT ST_GeomFromText('POINT(-74.9233606 42.699992)', 4326); - -SELECT ST_GeomFromText('LINESTRING(-74.9 42.7, -75.1 42.7)', 4326); - -SELECT ST_GeomFromText('POLYGON((-74.9 42.7, -75.1 42.7, - -75.1 42.6, -74.9 42.7))', 4326); - -SELECT ST_GeomFromText('MULTIPOINT (-74.9 42.7, -75.1 42.7)', 4326); - -SELECT ST_GeomFromText('MULTILINESTRING((-76.27 43.1, -76.06 43.08), - (-76.2 43.3, -76.2 43.4, - -76.4 43.1))', 4326); - -SELECT ST_GeomFromText('MULTIPOLYGON(( - (-74.92 42.7, -75.06 42.71, - -75.07 42.64, -74.92 42.7), - (-75.0 42.66, -75.0 42.64, - -74.98 42.64, -74.98 42.66, - -75.0 42.66)))', 4326); - --- Listing 14-5: Using ST_GeogFromText() to create spatial objects - -SELECT -ST_GeogFromText('SRID=4326;MULTIPOINT(-74.9 42.7, -75.1 42.7, -74.924 42.6)'); - --- Listing 14-6: Functions specific to making points - -SELECT ST_PointFromText('POINT(-74.9233606 42.699992)', 4326); - -SELECT ST_MakePoint(-74.9233606, 42.699992); -SELECT ST_SetSRID(ST_MakePoint(-74.9233606, 42.699992), 4326); - --- Listing 14-7: Functions specific to making LineStrings - -SELECT ST_LineFromText('LINESTRING(-105.90 35.67,-105.91 35.67)', 4326); -SELECT ST_MakeLine(ST_MakePoint(-74.92, 42.69), ST_MakePoint(-74.12, 42.45)); - --- Listing 14-8: Functions specific to making Polygons - -SELECT ST_PolygonFromText('POLYGON((-74.9 42.7, -75.1 42.7, - -75.1 42.6, -74.9 42.7))', 4326); - -SELECT ST_MakePolygon( - ST_GeomFromText('LINESTRING(-74.92 42.7, -75.06 42.71, - -75.07 42.64, -74.92 42.7)', 4326)); - -SELECT ST_MPolyFromText('MULTIPOLYGON(( - (-74.92 42.7, -75.06 42.71, - -75.07 42.64, -74.92 42.7), - (-75.0 42.66, -75.0 42.64, - -74.98 42.64, -74.98 42.66, - -75.0 42.66) - ))', 4326); - - --- ANALYZING FARMERS MARKETS DATA --- https://catalog.data.gov/dataset/farmers-markets-geographic-data --- https://www.ams.usda.gov/local-food-directories/farmersmarkets - - --- Listing 14-9: Create and load the farmers_markets table - -CREATE TABLE farmers_markets ( - fmid bigint PRIMARY KEY, - market_name varchar(100) NOT NULL, - street varchar(180), - city varchar(60), - county varchar(25), - st varchar(20) NOT NULL, - zip varchar(10), - longitude numeric(10,7), - latitude numeric(10,7), - organic varchar(1) NOT NULL -); - -COPY farmers_markets -FROM 'C:\YourDirectory\farmers_markets.csv' -WITH (FORMAT CSV, HEADER); - -SELECT count(*) FROM farmers_markets; -- should return 8,681 rows - --- Listing 14-10: Creating and indexing a geography column --- There's also a function: https://postgis.net/docs/AddGeometryColumn.html - --- Add column -ALTER TABLE farmers_markets ADD COLUMN geog_point geography(POINT,4326); - --- Now fill that column with the lat/long -UPDATE farmers_markets -SET geog_point = ST_SetSRID( - ST_MakePoint(longitude,latitude),4326 - )::geography; - --- Add a GiST index -CREATE INDEX market_pts_idx ON farmers_markets USING GIST (geog_point); - --- View the geography column -SELECT longitude, - latitude, - geog_point, - ST_AsText(geog_point) -FROM farmers_markets -WHERE longitude IS NOT NULL -LIMIT 5; - --- Listing 14-11: Using ST_DWithin() to locate farmers' markets within 10 kilometers of a point - -SELECT market_name, - city, - st -FROM farmers_markets -WHERE ST_DWithin(geog_point, - ST_GeogFromText('POINT(-93.6204386 41.5853202)'), - 10000) -ORDER BY market_name; - --- Listing 14-12: Using ST_Distance() to calculate the miles between Yankee Stadium --- and Citi Field (Mets) --- 1609.344 meters/mile - -SELECT ST_Distance( - ST_GeogFromText('POINT(-73.9283685 40.8296466)'), - ST_GeogFromText('POINT(-73.8480153 40.7570917)') - ) / 1609.344 AS mets_to_yanks; - --- Listing 14-13: Using ST_Distance() for each row in farmers_markets - -SELECT market_name, - city, - round( - (ST_Distance(geog_point, - ST_GeogFromText('POINT(-93.6204386 41.5853202)') - ) / 1609.344)::numeric(8,5), 2 - ) AS miles_from_dt -FROM farmers_markets -WHERE ST_DWithin(geog_point, - ST_GeogFromText('POINT(-93.6204386 41.5853202)'), - 10000) -ORDER BY miles_from_dt ASC; - - --- WORKING WITH SHAPEFILES - --- Resources: - -- TIGER/Line® Shapefiles and TIGER/Line® Files - -- https://www.census.gov/geo/maps-data/data/tiger-line.html - -- Cartographic Boundary Shapefiles - Counties - -- https://www.census.gov/geo/maps-data/data/cbf/cbf_counties.html - --- Listing 14-14: Checking the geom column's well-known text representation - -SELECT ST_AsText(geom) -FROM us_counties_2010_shp -LIMIT 1; - --- Listing 14-15: Find the largest counties by area using ST_Area() - -SELECT name10, - statefp10 AS st, - round( - ( ST_Area(geom::geography) / 2589988.110336 )::numeric, 2 - ) AS square_miles -FROM us_counties_2010_shp -ORDER BY square_miles DESC -LIMIT 5; - --- Listing 14-16: Using ST_Within() to find the county belonging to a pair of coordinates - -SELECT name10, - statefp10 -FROM us_counties_2010_shp -WHERE ST_Within('SRID=4269;POINT(-118.3419063 34.0977076)'::geometry, geom); - --- SPATIAL JOINS --- SANTA FE WATER AND ROAD DATA --- http://www.santafenm.gov/santa_fe_river - --- Census 2016 Tiger/Line roads, water --- https://www.census.gov/geo/maps-data/data/tiger-line.html --- https://www.census.gov/cgi-bin/geo/shapefiles/index.php?year=2016&layergroup=Roads --- https://www.census.gov/cgi-bin/geo/shapefiles/index.php?year=2016&layergroup=Water - --- RTTYP - Route Type Code Description --- https://www.census.gov/geo/reference/rttyp.html --- C County --- I Interstate --- M Common Name --- O Other --- S State recognized --- U U.S. - --- MTFCC MAF/TIGER feature class code --- https://www.census.gov/geo/reference/mtfcc.html --- Here, H3010: A natural flowing waterway - --- Listing 14-17: Using ST_GeometryType() to determine geometry - -SELECT ST_GeometryType(geom) -FROM santafe_linearwater_2016 -LIMIT 1; - -SELECT ST_GeometryType(geom) -FROM santafe_roads_2016 -LIMIT 1; - --- Listing 14-18: Spatial join with ST_Intersects() to find roads crossing the Santa Fe river - -SELECT water.fullname AS waterway, - roads.rttyp, - roads.fullname AS road -FROM santafe_linearwater_2016 water JOIN santafe_roads_2016 roads - ON ST_Intersects(water.geom, roads.geom) -WHERE water.fullname = 'Santa Fe Riv' -ORDER BY roads.fullname; - --- Listing 14-19: Using ST_Intersection() to show where roads cross the river - -SELECT water.fullname AS waterway, - roads.rttyp, - roads.fullname AS road, - ST_AsText(ST_Intersection(water.geom, roads.geom)) -FROM santafe_linearwater_2016 water JOIN santafe_roads_2016 roads - ON ST_Intersects(water.geom, roads.geom) -WHERE water.fullname = 'Santa Fe Riv' -ORDER BY roads.fullname -LIMIT 5; diff --git a/Chapter_15/README.md b/Chapter_15/README.md index 4f1176e..fb6d697 100644 --- a/Chapter_15/README.md +++ b/Chapter_15/README.md @@ -2,7 +2,7 @@ [PracticalSQL](https://www.nostarch.com/practicalSQL) by Anthony DeBarros is [available from No Starch Press](https://www.nostarch.com/practicalSQL). -### Chapter 14: Analyzing Spatial Data with PostGIS +### Chapter 15: Analyzing Spatial Data with PostGIS Introduces data types and queries related to spatial objects, which will let you analyze geographical features like states, roads, and rivers. diff --git a/Chapter_17/Chapter_15.sql b/Chapter_17/Chapter_15.sql deleted file mode 100644 index 44a408a..0000000 --- a/Chapter_17/Chapter_15.sql +++ /dev/null @@ -1,321 +0,0 @@ --------------------------------------------------------------- --- Practical SQL: A Beginner's Guide to Storytelling with Data --- by Anthony DeBarros - --- Chapter 15 Code Examples --------------------------------------------------------------- - --- VIEWS - --- Listing 15-1: Creating a view that displays Nevada 2010 counties - -CREATE OR REPLACE VIEW nevada_counties_pop_2010 AS - SELECT geo_name, - state_fips, - county_fips, - p0010001 AS pop_2010 - FROM us_counties_2010 - WHERE state_us_abbreviation = 'NV' - ORDER BY county_fips; - --- Listing 15-2: Querying the nevada_counties_pop_2010 view - -SELECT * -FROM nevada_counties_pop_2010 -LIMIT 5; - --- Listing 15-3: Creating a view showing population change for US counties - -CREATE OR REPLACE VIEW county_pop_change_2010_2000 AS - SELECT c2010.geo_name, - c2010.state_us_abbreviation AS st, - c2010.state_fips, - c2010.county_fips, - c2010.p0010001 AS pop_2010, - c2000.p0010001 AS pop_2000, - round( (CAST(c2010.p0010001 AS numeric(8,1)) - c2000.p0010001) - / c2000.p0010001 * 100, 1 ) AS pct_change_2010_2000 - FROM us_counties_2010 c2010 INNER JOIN us_counties_2000 c2000 - ON c2010.state_fips = c2000.state_fips - AND c2010.county_fips = c2000.county_fips - ORDER BY c2010.state_fips, c2010.county_fips; - --- Listing 15-4: Selecting columns from the county_pop_change_2010_2000 view - -SELECT geo_name, - st, - pop_2010, - pct_change_2010_2000 -FROM county_pop_change_2010_2000 -WHERE st = 'NV' -LIMIT 5; - --- Listing 15-5: Creating a view on the employees table - -CREATE OR REPLACE VIEW employees_tax_dept AS - SELECT emp_id, - first_name, - last_name, - dept_id - FROM employees - WHERE dept_id = 1 - ORDER BY emp_id - WITH LOCAL CHECK OPTION; - -SELECT * FROM employees_tax_dept; - --- Listing 15-6: Successful and rejected inserts via the employees_tax_dept view - -INSERT INTO employees_tax_dept (first_name, last_name, dept_id) -VALUES ('Suzanne', 'Legere', 1); - -INSERT INTO employees_tax_dept (first_name, last_name, dept_id) -VALUES ('Jamil', 'White', 2); - --- optional: -SELECT * FROM employees_tax_dept; - -SELECT * FROM employees; - --- Listing 15-7: Updating a row via the employees_tax_dept view - -UPDATE employees_tax_dept -SET last_name = 'Le Gere' -WHERE emp_id = 5; - -SELECT * FROM employees_tax_dept; - --- Bonus: This will fail because the salary column is not in the view -UPDATE employees_tax_dept -SET salary = 100000 -WHERE emp_id = 5; - --- Listing 15-8: Deleting a row via the employees_tax_dept view - -DELETE FROM employees_tax_dept -WHERE emp_id = 5; - - --- FUNCTIONS --- https://www.postgresql.org/docs/current/static/plpgsql.html - --- Listing 15-9: Creating a percent_change function --- To delete this function: DROP FUNCTION percent_change(numeric,numeric,integer); - -CREATE OR REPLACE FUNCTION -percent_change(new_value numeric, - old_value numeric, - decimal_places integer DEFAULT 1) -RETURNS numeric AS -'SELECT round( - ((new_value - old_value) / old_value) * 100, decimal_places -);' -LANGUAGE SQL -IMMUTABLE -RETURNS NULL ON NULL INPUT; - --- Listing 15-10: Testing the percent_change() function - -SELECT percent_change(110, 108, 2); - --- Listing 15-11: Testing percent_change() on Census data - -SELECT c2010.geo_name, - c2010.state_us_abbreviation AS st, - c2010.p0010001 AS pop_2010, - percent_change(c2010.p0010001, c2000.p0010001) AS pct_chg_func, - round( (CAST(c2010.p0010001 AS numeric(8,1)) - c2000.p0010001) - / c2000.p0010001 * 100, 1 ) AS pct_chg_formula -FROM us_counties_2010 c2010 INNER JOIN us_counties_2000 c2000 -ON c2010.state_fips = c2000.state_fips - AND c2010.county_fips = c2000.county_fips -ORDER BY pct_chg_func DESC -LIMIT 5; - --- Listing 15-12: Adding a column to the teachers table and seeing the data - -ALTER TABLE teachers ADD COLUMN personal_days integer; - -SELECT first_name, - last_name, - hire_date, - personal_days -FROM teachers; - --- Listing 15-13: Creating an update_personal_days() function - -CREATE OR REPLACE FUNCTION update_personal_days() -RETURNS void AS $$ -BEGIN - UPDATE teachers - SET personal_days = - CASE WHEN (now() - hire_date) BETWEEN '5 years'::interval - AND '10 years'::interval THEN 4 - WHEN (now() - hire_date) > '10 years'::interval THEN 5 - ELSE 3 - END; - RAISE NOTICE 'personal_days updated!'; -END; -$$ LANGUAGE plpgsql; - --- To run the function: -SELECT update_personal_days(); - --- Listing 15-14: Enabling the PL/Python procedural language - -CREATE EXTENSION plpythonu; - --- Listing 15-15: Using PL/Python to create the trim_county() function - -CREATE OR REPLACE FUNCTION trim_county(input_string text) -RETURNS text AS $$ - import re - cleaned = re.sub(r' County', '', input_string) - return cleaned -$$ LANGUAGE plpythonu; - --- Listing 15-16: Testing the trim_county() function - -SELECT geo_name, - trim_county(geo_name) -FROM us_counties_2010 -ORDER BY state_fips, county_fips -LIMIT 5; - - --- TRIGGERS - --- Listing 15-17: Creating the grades and grades_history tables - -CREATE TABLE grades ( - student_id bigint, - course_id bigint, - course varchar(30) NOT NULL, - grade varchar(5) NOT NULL, -PRIMARY KEY (student_id, course_id) -); - -INSERT INTO grades -VALUES - (1, 1, 'Biology 2', 'F'), - (1, 2, 'English 11B', 'D'), - (1, 3, 'World History 11B', 'C'), - (1, 4, 'Trig 2', 'B'); - -CREATE TABLE grades_history ( - student_id bigint NOT NULL, - course_id bigint NOT NULL, - change_time timestamp with time zone NOT NULL, - course varchar(30) NOT NULL, - old_grade varchar(5) NOT NULL, - new_grade varchar(5) NOT NULL, -PRIMARY KEY (student_id, course_id, change_time) -); - --- Listing 15-18: Creating the record_if_grade_changed() function - -CREATE OR REPLACE FUNCTION record_if_grade_changed() - RETURNS trigger AS -$$ -BEGIN - IF NEW.grade <> OLD.grade THEN - INSERT INTO grades_history ( - student_id, - course_id, - change_time, - course, - old_grade, - new_grade) - VALUES - (OLD.student_id, - OLD.course_id, - now(), - OLD.course, - OLD.grade, - NEW.grade); - END IF; - RETURN NEW; -END; -$$ LANGUAGE plpgsql; - --- Listing 15-19: Creating the grades_update trigger - -CREATE TRIGGER grades_update - AFTER UPDATE - ON grades - FOR EACH ROW - EXECUTE PROCEDURE record_if_grade_changed(); - --- Listing 15-20: Testing the grades_update trigger - --- Initially, there should be 0 records in the history -SELECT * FROM grades_history; - --- Check the grades -SELECT * FROM grades; - --- Update a grade -UPDATE grades -SET grade = 'C' -WHERE student_id = 1 AND course_id = 1; - --- Now check the history -SELECT student_id, - change_time, - course, - old_grade, - new_grade -FROM grades_history; - --- Listing 15-21: Creating a temperature_test table - -CREATE TABLE temperature_test ( - station_name varchar(50), - observation_date date, - max_temp integer, - min_temp integer, - max_temp_group varchar(40), -PRIMARY KEY (station_name, observation_date) -); - --- Listing 15-22: Creating the classify_max_temp() function - -CREATE OR REPLACE FUNCTION classify_max_temp() - RETURNS trigger AS -$$ -BEGIN - CASE - WHEN NEW.max_temp >= 90 THEN - NEW.max_temp_group := 'Hot'; - WHEN NEW.max_temp BETWEEN 70 AND 89 THEN - NEW.max_temp_group := 'Warm'; - WHEN NEW.max_temp BETWEEN 50 AND 69 THEN - NEW.max_temp_group := 'Pleasant'; - WHEN NEW.max_temp BETWEEN 33 AND 49 THEN - NEW.max_temp_group := 'Cold'; - WHEN NEW.max_temp BETWEEN 20 AND 32 THEN - NEW.max_temp_group := 'Freezing'; - ELSE NEW.max_temp_group := 'Inhumane'; - END CASE; - RETURN NEW; -END; -$$ LANGUAGE plpgsql; - --- Listing 15-23: Creating the temperature_insert trigger - -CREATE TRIGGER temperature_insert - BEFORE INSERT - ON temperature_test - FOR EACH ROW - EXECUTE PROCEDURE classify_max_temp(); - --- Listing 15-24: Inserting rows to test the temperature_update trigger - -INSERT INTO temperature_test (station_name, observation_date, max_temp, min_temp) -VALUES - ('North Station', '1/19/2019', 10, -3), - ('North Station', '3/20/2019', 28, 19), - ('North Station', '5/2/2019', 65, 42), - ('North Station', '8/9/2019', 93, 74); - -SELECT * FROM temperature_test; diff --git a/Chapter_17/README.md b/Chapter_17/README.md index 11063f3..3a217d6 100644 --- a/Chapter_17/README.md +++ b/Chapter_17/README.md @@ -2,7 +2,7 @@ [PracticalSQL](https://www.nostarch.com/practicalSQL) by Anthony DeBarros is [available from No Starch Press](https://www.nostarch.com/practicalSQL). -### Chapter 15: Saving Time with Views, Functions, and Triggers: +### Chapter 17: Saving Time with Views, Functions, and Triggers: Explains how to automate database tasks so you can avoid repeating routine work. diff --git a/Chapter_18/README.md b/Chapter_18/README.md index c302f49..b051359 100644 --- a/Chapter_18/README.md +++ b/Chapter_18/README.md @@ -2,7 +2,7 @@ [PracticalSQL](https://www.nostarch.com/practicalSQL) by Anthony DeBarros is [available from No Starch Press](https://www.nostarch.com/practicalSQL). -### Chapter 16: Using PostgreSQL from the Command Line +### Chapter 18: Using PostgreSQL from the Command Line Covers how to use text commands at your computer’s command prompt to connect to your database and run queries. diff --git a/Chapter_19/Chapter_17.sql b/Chapter_19/Chapter_17.sql deleted file mode 100644 index db7d99e..0000000 --- a/Chapter_19/Chapter_17.sql +++ /dev/null @@ -1,104 +0,0 @@ --------------------------------------------------------------- --- Practical SQL: A Beginner's Guide to Storytelling with Data --- by Anthony DeBarros - --- Chapter 17 Code Examples --------------------------------------------------------------- - --- VACUUM - --- Listing 17-1: Creating a table to test vacuuming - -CREATE TABLE vacuum_test ( - integer_column integer -); - --- Listing 17-2: Determining the size of vacuum_test - -SELECT pg_size_pretty( - pg_total_relation_size('vacuum_test') - ); - --- optional: Determine database size -SELECT pg_size_pretty( - pg_database_size('analysis') - ); - --- Listing 17-3: Inserting 500,000 rows into vacuum_test - -INSERT INTO vacuum_test -SELECT * FROM generate_series(1,500000); - --- Test its size again -SELECT pg_size_pretty( - pg_table_size('vacuum_test') - ); - --- Listing 17-4: Updating all rows in vacuum_test - -UPDATE vacuum_test -SET integer_column = integer_column + 1; - --- Test its size again (35 MB) -SELECT pg_size_pretty( - pg_table_size('vacuum_test') - ); - --- Listing 17-5: Viewing autovacuum statistics for vacuum_test - -SELECT relname, - last_vacuum, - last_autovacuum, - vacuum_count, - autovacuum_count -FROM pg_stat_all_tables -WHERE relname = 'vacuum_test'; - --- To see all columns available -SELECT * -FROM pg_stat_all_tables -WHERE relname = 'vacuum_test'; - --- Listing 17-6: Running VACUUM manually - -VACUUM vacuum_test; - -VACUUM; -- vacuums the whole database - -VACUUM VERBOSE; -- provides messages - --- Listing 17-7: Using VACUUM FULL to reclaim disk space - -VACUUM FULL vacuum_test; - --- Test its size again -SELECT pg_size_pretty( - pg_table_size('vacuum_test') - ); - --- SETTINGS - --- Listing 17-8: Showing the location of postgresql.conf - -SHOW config_file; - --- Listing 17-10: Show the location of the data directory - -SHOW data_directory; - --- reload settings --- Mac and Linux: pg_ctl reload -D '/path/to/data/directory/' --- Windows: pg_ctl reload -D "C:\path\to\data\directory\" - - --- BACKUP AND RESTORE - --- Listing 17-11: Backing up the analysis database with pg_dump -pg_dump -d analysis -U [user_name] -Fc > analysis_backup.sql - --- Back up just a table -pg_dump -t 'train_rides' -d analysis -U [user_name] -Fc > train_backup.sql - --- Listing 17-12: Restoring the analysis database with pg_restore - -pg_restore -C -d postgres -U postgres analysis_backup_custom.sql diff --git a/Chapter_19/README.md b/Chapter_19/README.md index 85518d0..c245ce1 100644 --- a/Chapter_19/README.md +++ b/Chapter_19/README.md @@ -2,7 +2,7 @@ [PracticalSQL](https://www.nostarch.com/practicalSQL) by Anthony DeBarros is [available from No Starch Press](https://www.nostarch.com/practicalSQL). -### Chapter 17: Maintaining Your Database: +### Chapter 19: Maintaining Your Database: Provides tips and procedures for tracking the size of your database, customizing settings, and backing up data.