More renumbering

2020-02-26 21:43:21 -05:00 · 2020-02-26 21:43:21 -05:00 · 0d2ffa4aad
commit 0d2ffa4aad
parent 204d3cc55d
16 changed files with 3335 additions and 0 deletions
--- a/Chapter_02/Chapter_02.sql
+++ b/Chapter_02/Chapter_02.sql
@ -0,0 +1,34 @@
 --------------------------------------------------------------
 -- Practical SQL: A Beginner's Guide to Storytelling with Data
 -- by Anthony DeBarros
 -- Chapter 1 Code Examples
 --------------------------------------------------------------
 -- Listing 1-1: Creating a database named analysis
 CREATE DATABASE analysis;
 -- Listing 1-2: Creating a table named teachers with six columns
 CREATE TABLE teachers (
    id bigserial,
    first_name varchar(25),
    last_name varchar(50),
    school varchar(50),
    hire_date date,
    salary numeric
 );
 -- This command will remove (drop) the table.
 -- DROP TABLE teachers;
 -- Listing 1-3 Inserting data into the teachers table
 INSERT INTO teachers (first_name, last_name, school, hire_date, salary)
 VALUES ('Janet', 'Smith', 'F.D. Roosevelt HS', '2011-10-30', 36200),
       ('Lee', 'Reynolds', 'F.D. Roosevelt HS', '1993-05-22', 65000),
       ('Samuel', 'Cole', 'Myers Middle School', '2005-08-01', 43500),
       ('Samantha', 'Bush', 'Myers Middle School', '2011-10-30', 36200),
       ('Betty', 'Diaz', 'Myers Middle School', '2005-08-30', 43500),
       ('Kathleen', 'Roush', 'F.D. Roosevelt HS', '2010-10-22', 38500);
--- a/Chapter_03/Chapter_03.sql
+++ b/Chapter_03/Chapter_03.sql
@ -0,0 +1,104 @@
 --------------------------------------------------------------
 -- Practical SQL: A Beginner's Guide to Storytelling with Data
 -- by Anthony DeBarros
 -- Chapter 2 Code Examples
 --------------------------------------------------------------
 -- Listing 2-1: Querying all rows and columns from the teachers table
 SELECT * FROM teachers;
 -- Listing 2-2: Querying a subset of columns
 SELECT last_name, first_name, salary FROM teachers;
 -- Listing 2-3: Querying distinct values in the school column
 SELECT DISTINCT school
 FROM teachers;
 -- Listing 2-4: Querying distinct pairs of values in the school and salary
 -- columns
 SELECT DISTINCT school, salary
 FROM teachers;
 -- Listing 2-5: Sorting a column with ORDER BY
 SELECT first_name, last_name, salary
 FROM teachers
 ORDER BY salary DESC;
 -- Listing 2-6: Sorting multiple columns with ORDER BY
 SELECT last_name, school, hire_date
 FROM teachers
 ORDER BY school ASC, hire_date DESC;
 -- Listing 2-7: Filtering rows using WHERE
 SELECT last_name, school, hire_date
 FROM teachers
 WHERE school = 'Myers Middle School';
 -- Examples of WHERE comparison operators
 -- Teachers with first name of Janet
 SELECT first_name, last_name, school
 FROM teachers
 WHERE first_name = 'Janet';
 -- School names not equal to F.D. Roosevelt HS
 SELECT school
 FROM teachers
 WHERE school != 'F.D. Roosevelt HS';
 -- Teachers hired before Jan. 1, 2000
 SELECT first_name, last_name, hire_date
 FROM teachers
 WHERE hire_date < '2000-01-01';
 -- Teachers earning 43,500 or more
 SELECT first_name, last_name, salary
 FROM teachers
 WHERE salary >= 43500;
 -- Teachers who earn between $40,000 and $65,000
 SELECT first_name, last_name, school, salary
 FROM teachers
 WHERE salary BETWEEN 40000 AND 65000;
 -- Listing 2-8: Filtering with LIKE AND ILIKE
 SELECT first_name
 FROM teachers
 WHERE first_name LIKE 'sam%';
 SELECT first_name
 FROM teachers
 WHERE first_name ILIKE 'sam%';
 -- Listing 2-9: Combining operators using AND and OR
 SELECT *
 FROM teachers
 WHERE school = 'Myers Middle School'
      AND salary < 40000;
 SELECT *
 FROM teachers
 WHERE last_name = 'Cole'
      OR last_name = 'Bush';
 SELECT *
 FROM teachers
 WHERE school = 'F.D. Roosevelt HS'
      AND (salary < 38000 OR salary > 40000);
 -- Listing 2-10: A SELECT statement including WHERE and ORDER BY
 SELECT first_name, last_name, school, hire_date, salary
 FROM teachers
 WHERE school LIKE '%Roos%'
 ORDER BY hire_date DESC;
--- a/Chapter_04/Chapter_04.sql
+++ b/Chapter_04/Chapter_04.sql
@ -0,0 +1,90 @@
 --------------------------------------------------------------
 -- Practical SQL: A Beginner's Guide to Storytelling with Data
 -- by Anthony DeBarros
 -- Chapter 3 Code Examples
 --------------------------------------------------------------
 -- Listing 3-1: Character data types in action
 CREATE TABLE char_data_types (
    varchar_column varchar(10),
    char_column char(10),
    text_column text
 );
 INSERT INTO char_data_types
 VALUES
    ('abc', 'abc', 'abc'),
    ('defghi', 'defghi', 'defghi');
 COPY char_data_types TO 'C:\YourDirectory\typetest.txt'
 WITH (FORMAT CSV, HEADER, DELIMITER '|');
 -- Listing 3-2: Number data types in action
 CREATE TABLE number_data_types (
    numeric_column numeric(20,5),
    real_column real,
    double_column double precision
 );
 INSERT INTO number_data_types
 VALUES
    (.7, .7, .7),
    (2.13579, 2.13579, 2.13579),
    (2.1357987654, 2.1357987654, 2.1357987654);
 SELECT * FROM number_data_types;
 -- Listing 3-3: Rounding issues with float columns
 -- Assumes table created and loaded with Listing 3-2
 SELECT
    numeric_column * 10000000 AS "Fixed",
    real_column * 10000000 AS "Float"
 FROM number_data_types
 WHERE numeric_column = .7;
 -- Listing 3-4: Timestamp and interval types in action
 CREATE TABLE date_time_types (
    timestamp_column timestamp with time zone,
    interval_column interval
 );
 INSERT INTO date_time_types
 VALUES
    ('2018-12-31 01:00 EST','2 days'),
    ('2018-12-31 01:00 PST','1 month'),
    ('2018-12-31 01:00 Australia/Melbourne','1 century'),
    (now(),'1 week');
 SELECT * FROM date_time_types;
 -- Listing 3-5: Using the interval data type
 -- Assumes script 3-4 has been run
 SELECT
    timestamp_column,
    interval_column,
    timestamp_column - interval_column AS new_date
 FROM date_time_types;
 -- Listing 3-6: Three CAST() examples
 SELECT timestamp_column, CAST(timestamp_column AS varchar(10))
 FROM date_time_types;
 SELECT numeric_column,
       CAST(numeric_column AS integer),
       CAST(numeric_column AS varchar(6))
 FROM number_data_types;
 -- Does not work:
 SELECT CAST(char_column AS integer) FROM char_data_types;
 -- Alternate notation for CAST is the double-colon:
 SELECT timestamp_column::varchar(10)
 FROM date_time_types;
--- a/Chapter_05/Chapter_05.sql
+++ b/Chapter_05/Chapter_05.sql
@ -0,0 +1,212 @@
 --------------------------------------------------------------
 -- Practical SQL: A Beginner's Guide to Storytelling with Data
 -- by Anthony DeBarros
 -- Chapter 4 Code Examples
 --------------------------------------------------------------
 -- Listing 4-1: Using COPY for data import
 -- This is example syntax only; running it will produce an error
 COPY table_name
 FROM 'C:\YourDirectory\your_file.csv'
 WITH (FORMAT CSV, HEADER);
 -- Listing 4-2: A CREATE TABLE statement for Census county data
 -- Full data dictionary available at: http://www.census.gov/prod/cen2010/doc/pl94-171.pdf
 -- Note: Some columns have been given more descriptive names
 CREATE TABLE us_counties_2010 (
    geo_name varchar(90),                    -- Name of the geography
    state_us_abbreviation varchar(2),        -- State/U.S. abbreviation
    summary_level varchar(3),                -- Summary Level
    region smallint,                         -- Region
    division smallint,                       -- Division
    state_fips varchar(2),                   -- State FIPS code
    county_fips varchar(3),                  -- County code
    area_land bigint,                        -- Area (Land) in square meters
    area_water bigint,                        -- Area (Water) in square meters
    population_count_100_percent integer,    -- Population count (100%)
    housing_unit_count_100_percent integer,  -- Housing Unit count (100%)
    internal_point_lat numeric(10,7),        -- Internal point (latitude)
    internal_point_lon numeric(10,7),        -- Internal point (longitude)
    -- This section is referred to as P1. Race:
    p0010001 integer,   -- Total population
    p0010002 integer,   -- Population of one race:
    p0010003 integer,       -- White Alone
    p0010004 integer,       -- Black or African American alone
    p0010005 integer,       -- American Indian and Alaska Native alone
    p0010006 integer,       -- Asian alone
    p0010007 integer,       -- Native Hawaiian and Other Pacific Islander alone
    p0010008 integer,       -- Some Other Race alone
    p0010009 integer,   -- Population of two or more races
    p0010010 integer,   -- Population of two races:
    p0010011 integer,       -- White; Black or African American
    p0010012 integer,       -- White; American Indian and Alaska Native
    p0010013 integer,       -- White; Asian
    p0010014 integer,       -- White; Native Hawaiian and Other Pacific Islander
    p0010015 integer,       -- White; Some Other Race
    p0010016 integer,       -- Black or African American; American Indian and Alaska Native
    p0010017 integer,       -- Black or African American; Asian
    p0010018 integer,       -- Black or African American; Native Hawaiian and Other Pacific Islander
    p0010019 integer,       -- Black or African American; Some Other Race
    p0010020 integer,       -- American Indian and Alaska Native; Asian
    p0010021 integer,       -- American Indian and Alaska Native; Native Hawaiian and Other Pacific Islander
    p0010022 integer,       -- American Indian and Alaska Native; Some Other Race
    p0010023 integer,       -- Asian; Native Hawaiian and Other Pacific Islander
    p0010024 integer,       -- Asian; Some Other Race
    p0010025 integer,       -- Native Hawaiian and Other Pacific Islander; Some Other Race
    p0010026 integer,   -- Population of three races
    p0010047 integer,   -- Population of four races
    p0010063 integer,   -- Population of five races
    p0010070 integer,   -- Population of six races
    -- This section is referred to as P2. HISPANIC OR LATINO, AND NOT HISPANIC OR LATINO BY RACE
    p0020001 integer,   -- Total
    p0020002 integer,   -- Hispanic or Latino
    p0020003 integer,   -- Not Hispanic or Latino:
    p0020004 integer,   -- Population of one race:
    p0020005 integer,       -- White Alone
    p0020006 integer,       -- Black or African American alone
    p0020007 integer,       -- American Indian and Alaska Native alone
    p0020008 integer,       -- Asian alone
    p0020009 integer,       -- Native Hawaiian and Other Pacific Islander alone
    p0020010 integer,       -- Some Other Race alone
    p0020011 integer,   -- Two or More Races
    p0020012 integer,   -- Population of two races
    p0020028 integer,   -- Population of three races
    p0020049 integer,   -- Population of four races
    p0020065 integer,   -- Population of five races
    p0020072 integer,   -- Population of six races
    -- This section is referred to as P3. RACE FOR THE POPULATION 18 YEARS AND OVER
    p0030001 integer,   -- Total
    p0030002 integer,   -- Population of one race:
    p0030003 integer,       -- White alone
    p0030004 integer,       -- Black or African American alone
    p0030005 integer,       -- American Indian and Alaska Native alone
    p0030006 integer,       -- Asian alone
    p0030007 integer,       -- Native Hawaiian and Other Pacific Islander alone
    p0030008 integer,       -- Some Other Race alone
    p0030009 integer,   -- Two or More Races
    p0030010 integer,   -- Population of two races
    p0030026 integer,   -- Population of three races
    p0030047 integer,   -- Population of four races
    p0030063 integer,   -- Population of five races
    p0030070 integer,   -- Population of six races
    -- This section is referred to as P4. HISPANIC OR LATINO, AND NOT HISPANIC OR LATINO BY RACE
    -- FOR THE POPULATION 18 YEARS AND OVER
    p0040001 integer,   -- Total
    p0040002 integer,   -- Hispanic or Latino
    p0040003 integer,   -- Not Hispanic or Latino:
    p0040004 integer,   -- Population of one race:
    p0040005 integer,   -- White alone
    p0040006 integer,   -- Black or African American alone
    p0040007 integer,   -- American Indian and Alaska Native alone
    p0040008 integer,   -- Asian alone
    p0040009 integer,   -- Native Hawaiian and Other Pacific Islander alone
    p0040010 integer,   -- Some Other Race alone
    p0040011 integer,   -- Two or More Races
    p0040012 integer,   -- Population of two races
    p0040028 integer,   -- Population of three races
    p0040049 integer,   -- Population of four races
    p0040065 integer,   -- Population of five races
    p0040072 integer,   -- Population of six races
    -- This section is referred to as H1. OCCUPANCY STATUS
    h0010001 integer,   -- Total housing units
    h0010002 integer,   -- Occupied
    h0010003 integer    -- Vacant
 );
 SELECT * FROM us_counties_2010;
 -- Listing 4-3: Importing Census data using COPY
 -- Note! If you run into an import error here, be sure you downloaded the code and
 -- data for the book according to the steps listed on page xxvii in the Introduction.
 -- Windows users: Please check the Note on page xxvii as well.
 COPY us_counties_2010
 FROM 'C:\YourDirectory\us_counties_2010.csv'
 WITH (FORMAT CSV, HEADER);
 -- Checking the data
 SELECT * FROM us_counties_2010;
 SELECT geo_name, state_us_abbreviation, area_land
 FROM us_counties_2010
 ORDER BY area_land DESC
 LIMIT 3;
 SELECT geo_name, state_us_abbreviation, internal_point_lon
 FROM us_counties_2010
 ORDER BY internal_point_lon DESC
 LIMIT 5;
 -- Listing 4-4: Creating a table to track supervisor salaries
 CREATE TABLE supervisor_salaries (
    town varchar(30),
    county varchar(30),
    supervisor varchar(30),
    start_date date,
    salary money,
    benefits money
 );
 -- Listing 4-5: Importing salaries data from CSV to three table columns
 COPY supervisor_salaries (town, supervisor, salary)
 FROM 'C:\YourDirectory\supervisor_salaries.csv'
 WITH (FORMAT CSV, HEADER);
 -- Check the data
 SELECT * FROM supervisor_salaries LIMIT 2;
 -- Listing 4-6 Use a temporary table to add a default value to a column during
 -- import
 DELETE FROM supervisor_salaries;
 CREATE TEMPORARY TABLE supervisor_salaries_temp (LIKE supervisor_salaries);
 COPY supervisor_salaries_temp (town, supervisor, salary)
 FROM 'C:\YourDirectory\supervisor_salaries.csv'
 WITH (FORMAT CSV, HEADER);
 INSERT INTO supervisor_salaries (town, county, supervisor, salary)
 SELECT town, 'Some County', supervisor, salary
 FROM supervisor_salaries_temp;
 DROP TABLE supervisor_salaries_temp;
 -- Check the data
 SELECT * FROM supervisor_salaries LIMIT 2;
 -- Listing 4-7: Export an entire table with COPY
 COPY us_counties_2010
 TO 'C:\YourDirectory\us_counties_export.txt'
 WITH (FORMAT CSV, HEADER, DELIMITER '|');
 -- Listing 4-8: Exporting selected columns from a table with COPY
 COPY us_counties_2010 (geo_name, internal_point_lat, internal_point_lon)
 TO 'C:\YourDirectory\us_counties_latlon_export.txt'
 WITH (FORMAT CSV, HEADER, DELIMITER '|');
 -- Listing 4-9: Exporting query results with COPY
 COPY (
    SELECT geo_name, state_us_abbreviation
    FROM us_counties_2010
    WHERE geo_name ILIKE '%mill%'
     )
 TO 'C:\YourDirectory\us_counties_mill_export.txt'
 WITH (FORMAT CSV, HEADER, DELIMITER '|');
--- a/Chapter_06/Chapter_06.sql
+++ b/Chapter_06/Chapter_06.sql
@ -0,0 +1,207 @@
 --------------------------------------------------------------
 -- Practical SQL: A Beginner's Guide to Storytelling with Data
 -- by Anthony DeBarros
 -- Chapter 5 Code Examples
 --------------------------------------------------------------
 -- Listing 5-1: Basic addition, subtraction and multiplication with SQL
 SELECT 2 + 2;    -- addition
 SELECT 9 - 1;    -- subtraction
 SELECT 3 * 4;    -- multiplication
 -- Listing 5-2: Integer and decimal division with SQL
 SELECT 11 / 6;   -- integer division
 SELECT 11 % 6;   -- modulo division
 SELECT 11.0 / 6; -- decimal division
 SELECT CAST(11 AS numeric(3,1)) / 6;
 -- Listing 5-3: Exponents, roots and factorials with SQL
 SELECT 3 ^ 4;    -- exponentiation
 SELECT |/ 10;    -- square root (operator)
 SELECT sqrt(10); -- square root (function)
 SELECT ||/ 10;   -- cube root
 SELECT 4 !;      -- factorial
 -- Order of operations
 SELECT 7 + 8 * 9; 	-- answer: 79
 SELECT (7 + 8) * 9;	-- answer: 135
 SELECT 3 ^ 3 - 1;   -- answer: 26
 SELECT 3 ^ (3 - 1); -- answer: 9
 -- Listing 5-4: Selecting Census population columns by race with aliases
 SELECT geo_name,
       state_us_abbreviation AS "st",
       p0010001 AS "Total Population",
       p0010003 AS "White Alone",
       p0010004 AS "Black or African American Alone",
       p0010005 AS "Am Indian/Alaska Native Alone",
       p0010006 AS "Asian Alone",
       p0010007 AS "Native Hawaiian and Other Pacific Islander Alone",
       p0010008 AS "Some Other Race Alone",
       p0010009 AS "Two or More Races"
 FROM us_counties_2010;
 -- Listing 5-5: Adding two columns in us_counties_2010
 SELECT geo_name,
       state_us_abbreviation AS "st",
       p0010003 AS "White Alone",
       p0010004 AS "Black Alone",
       p0010003 + p0010004 AS "Total White and Black"
 FROM us_counties_2010;
 -- Listing 5-6: Checking Census data totals
 SELECT geo_name,
       state_us_abbreviation AS "st",
       p0010001 AS "Total",
       p0010003 + p0010004 + p0010005 + p0010006 + p0010007
           + p0010008 + p0010009 AS "All Races",
       (p0010003 + p0010004 + p0010005 + p0010006 + p0010007
           + p0010008 + p0010009) - p0010001 AS "Difference"
 FROM us_counties_2010
 ORDER BY "Difference" DESC;
 -- Listing 5-7: Calculating the percent of the population that is 
 -- Asian by county (percent of the whole)
 SELECT geo_name,
       state_us_abbreviation AS "st",
       (CAST(p0010006 AS numeric(8,1)) / p0010001) * 100 AS "pct_asian"
 FROM us_counties_2010
 ORDER BY "pct_asian" DESC;
 -- Listing 5-8: Calculating percent change
 CREATE TABLE percent_change (
    department varchar(20),
    spend_2014 numeric(10,2),
    spend_2017 numeric(10,2)
 );
 INSERT INTO percent_change
 VALUES
    ('Building', 250000, 289000),
    ('Assessor', 178556, 179500),
    ('Library', 87777, 90001),
    ('Clerk', 451980, 650000),
    ('Police', 250000, 223000),
    ('Recreation', 199000, 195000);
 SELECT department,
       spend_2014,
       spend_2017,
       round( (spend_2017 - spend_2014) /
                    spend_2014 * 100, 1 ) AS "pct_change"
 FROM percent_change;
 -- Listing 5-9: Using sum() and avg() aggregate functions
 SELECT sum(p0010001) AS "County Sum",
       round(avg(p0010001), 0) AS "County Average"
 FROM us_counties_2010;
 -- Listing 5-10: Testing SQL percentile functions
 CREATE TABLE percentile_test (
    numbers integer
 );
 INSERT INTO percentile_test (numbers) VALUES
    (1), (2), (3), (4), (5), (6);
 SELECT
    percentile_cont(.5)
    WITHIN GROUP (ORDER BY numbers),
    percentile_disc(.5)
    WITHIN GROUP (ORDER BY numbers)
 FROM percentile_test;
 -- Listing 5-11: Using sum(), avg(), and percentile_cont() aggregate functions
 SELECT sum(p0010001) AS "County Sum",
       round(avg(p0010001), 0) AS "County Average",
       percentile_cont(.5)
       WITHIN GROUP (ORDER BY p0010001) AS "County Median"
 FROM us_counties_2010;
 -- Listing 5-12: Passing an array of values to percentile_cont()
 -- quartiles
 SELECT percentile_cont(array[.25,.5,.75])
       WITHIN GROUP (ORDER BY p0010001) AS "quartiles"
 FROM us_counties_2010;
 -- Extra:
 -- quintiles
 SELECT percentile_cont(array[.2,.4,.6,.8])
       WITHIN GROUP (ORDER BY p0010001) AS "quintiles"
 FROM us_counties_2010;
 -- deciles
 SELECT percentile_cont(array[.1,.2,.3,.4,.5,.6,.7,.8,.9])
       WITHIN GROUP (ORDER BY p0010001) AS "deciles"
 FROM us_counties_2010;
 -- Listing 5-13: Using unnest() to turn an array into rows
 SELECT unnest(
            percentile_cont(array[.25,.5,.75])
            WITHIN GROUP (ORDER BY p0010001)
            ) AS "quartiles"
 FROM us_counties_2010;
 -- Listing 5-14: Creating a median() aggregate function in PostgreSQL
 -- Source: https://wiki.postgresql.org/wiki/Aggregate_Median
 CREATE OR REPLACE FUNCTION _final_median(anyarray)
   RETURNS float8 AS
 $$
  WITH q AS
  (
     SELECT val
     FROM unnest($1) val
     WHERE VAL IS NOT NULL
     ORDER BY 1
  ),
  cnt AS
  (
    SELECT COUNT(*) AS c FROM q
  )
  SELECT AVG(val)::float8
  FROM
  (
    SELECT val FROM q
    LIMIT  2 - MOD((SELECT c FROM cnt), 2)
    OFFSET GREATEST(CEIL((SELECT c FROM cnt) / 2.0) - 1,0)
  ) q2;
 $$
 LANGUAGE sql IMMUTABLE;
 CREATE AGGREGATE median(anyelement) (
  SFUNC=array_append,
  STYPE=anyarray,
  FINALFUNC=_final_median,
  INITCOND='{}'
 );
 -- Listing 5-15: Using a median() aggregate function
 SELECT sum(p0010001) AS "County Sum",
       round(avg(p0010001), 0) AS "County Average",
       median(p0010001) AS "County Median",
       percentile_cont(.5)
       WITHIN GROUP (ORDER BY P0010001) AS "50th Percentile"
 FROM us_counties_2010;
 -- Listing 5-16: Finding the most-frequent value with mode()
 SELECT mode() WITHIN GROUP (ORDER BY p0010001)
 FROM us_counties_2010;
--- a/Chapter_07/Chapter_07.sql
+++ b/Chapter_07/Chapter_07.sql
@ -0,0 +1,196 @@
 --------------------------------------------------------------
 -- Practical SQL: A Beginner's Guide to Storytelling with Data
 -- by Anthony DeBarros
 -- Chapter 6 Code Examples
 --------------------------------------------------------------
 -- Listing 6-1: Creating the departments and employees tables
 CREATE TABLE departments (
    dept_id bigserial,
    dept varchar(100),
    city varchar(100),
    CONSTRAINT dept_key PRIMARY KEY (dept_id),
    CONSTRAINT dept_city_unique UNIQUE (dept, city)
 );
 CREATE TABLE employees (
    emp_id bigserial,
    first_name varchar(100),
    last_name varchar(100),
    salary integer,
    dept_id integer REFERENCES departments (dept_id),
    CONSTRAINT emp_key PRIMARY KEY (emp_id),
    CONSTRAINT emp_dept_unique UNIQUE (emp_id, dept_id)
 );
 INSERT INTO departments (dept, city)
 VALUES
    ('Tax', 'Atlanta'),
    ('IT', 'Boston');
 INSERT INTO employees (first_name, last_name, salary, dept_id)
 VALUES
    ('Nancy', 'Jones', 62500, 1),
    ('Lee', 'Smith', 59300, 1),
    ('Soo', 'Nguyen', 83000, 2),
    ('Janet', 'King', 95000, 2);
 -- Listing 6-2: Joining the employees and departments tables
 SELECT *
 FROM employees JOIN departments
 ON employees.dept_id = departments.dept_id;
 -- Listing 6-3: Creating two tables to explore JOIN types
 CREATE TABLE schools_left (
    id integer CONSTRAINT left_id_key PRIMARY KEY,
    left_school varchar(30)
 );
 CREATE TABLE schools_right (
    id integer CONSTRAINT right_id_key PRIMARY KEY,
    right_school varchar(30)
 );
 INSERT INTO schools_left (id, left_school) VALUES
    (1, 'Oak Street School'),
    (2, 'Roosevelt High School'),
    (5, 'Washington Middle School'),
    (6, 'Jefferson High School');
 INSERT INTO schools_right (id, right_school) VALUES
    (1, 'Oak Street School'),
    (2, 'Roosevelt High School'),
    (3, 'Morrison Elementary'),
    (4, 'Chase Magnet Academy'),
    (6, 'Jefferson High School');
 -- Listing 6-4: Using JOIN
 SELECT *
 FROM schools_left JOIN schools_right
 ON schools_left.id = schools_right.id;
 -- Bonus: Also can be specified as INNER JOIN
 SELECT *
 FROM schools_left INNER JOIN schools_right
 ON schools_left.id = schools_right.id;
 -- Listing 6-5: Using LEFT JOIN
 SELECT *
 FROM schools_left LEFT JOIN schools_right
 ON schools_left.id = schools_right.id;
 -- Listing 6-6: Using RIGHT JOIN
 SELECT *
 FROM schools_left RIGHT JOIN schools_right
 ON schools_left.id = schools_right.id;
 -- Listing 6-7: Using FULL OUTER JOIN
 SELECT *
 FROM schools_left FULL OUTER JOIN schools_right
 ON schools_left.id = schools_right.id;
 -- Listing 6-8: Using CROSS JOIN
 SELECT *
 FROM schools_left CROSS JOIN schools_right;
 -- Listing 6-9: Filtering to show missing values with IS NULL
 SELECT *
 FROM schools_left LEFT JOIN schools_right
 ON schools_left.id = schools_right.id
 WHERE schools_right.id IS NULL;
 -- Listing 6-10: Querying specific columns in a join
 SELECT schools_left.id,
       schools_left.left_school,
       schools_right.right_school
 FROM schools_left LEFT JOIN schools_right
 ON schools_left.id = schools_right.id;
 -- Listing 6-11: Simplifying code with table aliases
 SELECT lt.id,
       lt.left_school,
       rt.right_school
 FROM schools_left AS lt LEFT JOIN schools_right AS rt
 ON lt.id = rt.id;
 -- Listing 6-12: Joining multiple tables
 CREATE TABLE schools_enrollment (
    id integer,
    enrollment integer
 );
 CREATE TABLE schools_grades (
    id integer,
    grades varchar(10)
 );
 INSERT INTO schools_enrollment (id, enrollment)
 VALUES
    (1, 360),
    (2, 1001),
    (5, 450),
    (6, 927);
 INSERT INTO schools_grades (id, grades)
 VALUES
    (1, 'K-3'),
    (2, '9-12'),
    (5, '6-8'),
    (6, '9-12');
 SELECT lt.id, lt.left_school, en.enrollment, gr.grades
 FROM schools_left AS lt LEFT JOIN schools_enrollment AS en
    ON lt.id = en.id
 LEFT JOIN schools_grades AS gr
    ON lt.id = gr.id;
 -- Listing 6-13: Performing math on joined Census tables
 -- Decennial Census 2000. Full data dictionary at https://www.census.gov/prod/cen2000/doc/pl94-171.pdf
 -- Note: Some non-number columns have been given more descriptive names
 CREATE TABLE us_counties_2000 (
    geo_name varchar(90),              -- County/state name,
    state_us_abbreviation varchar(2),  -- State/U.S. abbreviation
    state_fips varchar(2),             -- State FIPS code
    county_fips varchar(3),            -- County code
    p0010001 integer,                  -- Total population
    p0010002 integer,                  -- Population of one race:
    p0010003 integer,                      -- White Alone
    p0010004 integer,                      -- Black or African American alone
    p0010005 integer,                      -- American Indian and Alaska Native alone
    p0010006 integer,                      -- Asian alone
    p0010007 integer,                      -- Native Hawaiian and Other Pacific Islander alone
    p0010008 integer,                      -- Some Other Race alone
    p0010009 integer,                  -- Population of two or more races
    p0010010 integer,                  -- Population of two races
    p0020002 integer,                  -- Hispanic or Latino
    p0020003 integer                   -- Not Hispanic or Latino:
 );
 COPY us_counties_2000
 FROM 'C:\YourDirectory\us_counties_2000.csv'
 WITH (FORMAT CSV, HEADER);
 SELECT c2010.geo_name,
       c2010.state_us_abbreviation AS state,
       c2010.p0010001 AS pop_2010,
       c2000.p0010001 AS pop_2000,
       c2010.p0010001 - c2000.p0010001 AS raw_change,
       round( (CAST(c2010.p0010001 AS numeric(8,1)) - c2000.p0010001)
           / c2000.p0010001 * 100, 1 ) AS pct_change
 FROM us_counties_2010 c2010 INNER JOIN us_counties_2000 c2000
 ON c2010.state_fips = c2000.state_fips
   AND c2010.county_fips = c2000.county_fips
   AND c2010.p0010001 <> c2000.p0010001
 ORDER BY pct_change DESC;
--- a/Chapter_08/Chapter_08.sql
+++ b/Chapter_08/Chapter_08.sql
@ -0,0 +1,184 @@
 --------------------------------------------------------------
 -- Practical SQL: A Beginner's Guide to Storytelling with Data
 -- by Anthony DeBarros
 -- Chapter 7 Code Examples
 --------------------------------------------------------------
 -- Listing 7-1: Declaring a single-column natural key as primary key
 -- As a column constraint
 CREATE TABLE natural_key_example (
    license_id varchar(10) CONSTRAINT license_key PRIMARY KEY,
    first_name varchar(50),
    last_name varchar(50)
 );
 -- Drop the table before trying again
 DROP TABLE natural_key_example;
 -- As a table constraint
 CREATE TABLE natural_key_example (
    license_id varchar(10),
    first_name varchar(50),
    last_name varchar(50),
    CONSTRAINT license_key PRIMARY KEY (license_id)
 );
 -- Listing 7-2: Example of a primary key violation
 INSERT INTO natural_key_example (license_id, first_name, last_name)
 VALUES ('T229901', 'Lynn', 'Malero');
 INSERT INTO natural_key_example (license_id, first_name, last_name)
 VALUES ('T229901', 'Sam', 'Tracy');
 -- Listing 7-3: Declaring a composite primary key as a natural key
 CREATE TABLE natural_key_composite_example (
    student_id varchar(10),
    school_day date,
    present boolean,
    CONSTRAINT student_key PRIMARY KEY (student_id, school_day)
 );
 -- Listing 7-4: Example of a composite primary key violation
 INSERT INTO natural_key_composite_example (student_id, school_day, present)
 VALUES(775, '1/22/2017', 'Y');
 INSERT INTO natural_key_composite_example (student_id, school_day, present)
 VALUES(775, '1/23/2017', 'Y');
 INSERT INTO natural_key_composite_example (student_id, school_day, present)
 VALUES(775, '1/23/2017', 'N');
 -- Listing 7-5: Declaring a bigserial column as a surrogate key
 CREATE TABLE surrogate_key_example (
    order_number bigserial,
    product_name varchar(50),
    order_date date,
    CONSTRAINT order_key PRIMARY KEY (order_number)
 );
 INSERT INTO surrogate_key_example (product_name, order_date)
 VALUES ('Beachball Polish', '2015-03-17'),
       ('Wrinkle De-Atomizer', '2017-05-22'),
       ('Flux Capacitor', '1985-10-26');
 SELECT * FROM surrogate_key_example;
 -- Listing 7-6: A foreign key example
 CREATE TABLE licenses (
    license_id varchar(10),
    first_name varchar(50),
    last_name varchar(50),
    CONSTRAINT licenses_key PRIMARY KEY (license_id)
 );
 CREATE TABLE registrations (
    registration_id varchar(10),
    registration_date date,
    license_id varchar(10) REFERENCES licenses (license_id),
    CONSTRAINT registration_key PRIMARY KEY (registration_id, license_id)
 );
 INSERT INTO licenses (license_id, first_name, last_name)
 VALUES ('T229901', 'Lynn', 'Malero');
 INSERT INTO registrations (registration_id, registration_date, license_id)
 VALUES ('A203391', '3/17/2017', 'T229901');
 INSERT INTO registrations (registration_id, registration_date, license_id)
 VALUES ('A75772', '3/17/2017', 'T000001');
 -- Listing 7-7: CHECK constraint examples
 CREATE TABLE check_constraint_example (
    user_id bigserial,
    user_role varchar(50),
    salary integer,
    CONSTRAINT user_id_key PRIMARY KEY (user_id),
    CONSTRAINT check_role_in_list CHECK (user_role IN('Admin', 'Staff')),
    CONSTRAINT check_salary_not_zero CHECK (salary > 0)
 );
 -- Both of these will fail:
 INSERT INTO check_constraint_example (user_role)
 VALUES ('admin');
 INSERT INTO check_constraint_example (salary)
 VALUES (0);
 -- Listing 7-8: UNIQUE constraint example
 CREATE TABLE unique_constraint_example (
    contact_id bigserial CONSTRAINT contact_id_key PRIMARY KEY,
    first_name varchar(50),
    last_name varchar(50),
    email varchar(200),
    CONSTRAINT email_unique UNIQUE (email)
 );
 INSERT INTO unique_constraint_example (first_name, last_name, email)
 VALUES ('Samantha', 'Lee', 'slee@example.org');
 INSERT INTO unique_constraint_example (first_name, last_name, email)
 VALUES ('Betty', 'Diaz', 'bdiaz@example.org');
 INSERT INTO unique_constraint_example (first_name, last_name, email)
 VALUES ('Sasha', 'Lee', 'slee@example.org');
 -- Listing 7-9: NOT NULL constraint example
 CREATE TABLE not_null_example (
    student_id bigserial,
    first_name varchar(50) NOT NULL,
    last_name varchar(50) NOT NULL,
    CONSTRAINT student_id_key PRIMARY KEY (student_id)
 );
 -- Listing 7-10: Dropping and adding a primary key and a NOT NULL constraint
 -- Drop
 ALTER TABLE not_null_example DROP CONSTRAINT student_id_key;
 -- Add
 ALTER TABLE not_null_example ADD CONSTRAINT student_id_key PRIMARY KEY (student_id);
 -- Drop
 ALTER TABLE not_null_example ALTER COLUMN first_name DROP NOT NULL;
 -- Add
 ALTER TABLE not_null_example ALTER COLUMN first_name SET NOT NULL;
 -- Listing 7-11: Importing New York City address data
 CREATE TABLE new_york_addresses (
    longitude numeric(9,6),
    latitude numeric(9,6),
    street_number varchar(10),
    street varchar(32),
    unit varchar(7),
    postcode varchar(5),
    id integer CONSTRAINT new_york_key PRIMARY KEY
 );
 COPY new_york_addresses
 FROM 'C:\YourDirectory\city_of_new_york.csv'
 WITH (FORMAT CSV, HEADER);
 -- Listing 7-12: Benchmark queries for index performance
 EXPLAIN ANALYZE SELECT * FROM new_york_addresses
 WHERE street = 'BROADWAY';
 EXPLAIN ANALYZE SELECT * FROM new_york_addresses
 WHERE street = '52 STREET';
 EXPLAIN ANALYZE SELECT * FROM new_york_addresses
 WHERE street = 'ZWICKY AVENUE';
 -- Listing 7-13: Creating a B-Tree index on the new_york_addresses table
 CREATE INDEX street_idx ON new_york_addresses (street);
--- a/Chapter_09/Chapter_09.sql
+++ b/Chapter_09/Chapter_09.sql
@ -0,0 +1,299 @@
 --------------------------------------------------------------
 -- Practical SQL: A Beginner's Guide to Storytelling with Data
 -- by Anthony DeBarros
 -- Chapter 8 Code Examples
 --------------------------------------------------------------
 -- Listing 8-1: Creating and filling the 2014 Public Libraries Survey table
 CREATE TABLE pls_fy2014_pupld14a (
    stabr varchar(2) NOT NULL,
    fscskey varchar(6) CONSTRAINT fscskey2014_key PRIMARY KEY,
    libid varchar(20) NOT NULL,
    libname varchar(100) NOT NULL,
    obereg varchar(2) NOT NULL,
    rstatus integer NOT NULL,
    statstru varchar(2) NOT NULL,
    statname varchar(2) NOT NULL,
    stataddr varchar(2) NOT NULL,
    longitud numeric(10,7) NOT NULL,
    latitude numeric(10,7) NOT NULL,
    fipsst varchar(2) NOT NULL,
    fipsco varchar(3) NOT NULL,
    address varchar(35) NOT NULL,
    city varchar(20) NOT NULL,
    zip varchar(5) NOT NULL,
    zip4 varchar(4) NOT NULL,
    cnty varchar(20) NOT NULL,
    phone varchar(10) NOT NULL,
    c_relatn varchar(2) NOT NULL,
    c_legbas varchar(2) NOT NULL,
    c_admin varchar(2) NOT NULL,
    geocode varchar(3) NOT NULL,
    lsabound varchar(1) NOT NULL,
    startdat varchar(10),
    enddate varchar(10),
    popu_lsa integer NOT NULL,
    centlib integer NOT NULL,
    branlib integer NOT NULL,
    bkmob integer NOT NULL,
    master numeric(8,2) NOT NULL,
    libraria numeric(8,2) NOT NULL,
    totstaff numeric(8,2) NOT NULL,
    locgvt integer NOT NULL,
    stgvt integer NOT NULL,
    fedgvt integer NOT NULL,
    totincm integer NOT NULL,
    salaries integer,
    benefit integer,
    staffexp integer,
    prmatexp integer NOT NULL,
    elmatexp integer NOT NULL,
    totexpco integer NOT NULL,
    totopexp integer NOT NULL,
    lcap_rev integer NOT NULL,
    scap_rev integer NOT NULL,
    fcap_rev integer NOT NULL,
    cap_rev integer NOT NULL,
    capital integer NOT NULL,
    bkvol integer NOT NULL,
    ebook integer NOT NULL,
    audio_ph integer NOT NULL,
    audio_dl float NOT NULL,
    video_ph integer NOT NULL,
    video_dl float NOT NULL,
    databases integer NOT NULL,
    subscrip integer NOT NULL,
    hrs_open integer NOT NULL,
    visits integer NOT NULL,
    referenc integer NOT NULL,
    regbor integer NOT NULL,
    totcir integer NOT NULL,
    kidcircl integer NOT NULL,
    elmatcir integer NOT NULL,
    loanto integer NOT NULL,
    loanfm integer NOT NULL,
    totpro integer NOT NULL,
    totatten integer NOT NULL,
    gpterms integer NOT NULL,
    pitusr integer NOT NULL,
    wifisess integer NOT NULL,
    yr_sub integer NOT NULL
 );
 CREATE INDEX libname2014_idx ON pls_fy2014_pupld14a (libname);
 CREATE INDEX stabr2014_idx ON pls_fy2014_pupld14a (stabr);
 CREATE INDEX city2014_idx ON pls_fy2014_pupld14a (city);
 CREATE INDEX visits2014_idx ON pls_fy2014_pupld14a (visits);
 COPY pls_fy2014_pupld14a
 FROM 'C:\YourDirectory\pls_fy2014_pupld14a.csv'
 WITH (FORMAT CSV, HEADER);
 -- Listing 8-2: Creating and filling the 2009 Public Libraries Survey table
 CREATE TABLE pls_fy2009_pupld09a (
    stabr varchar(2) NOT NULL,
    fscskey varchar(6) CONSTRAINT fscskey2009_key PRIMARY KEY,
    libid varchar(20) NOT NULL,
    libname varchar(100) NOT NULL,
    address varchar(35) NOT NULL,
    city varchar(20) NOT NULL,
    zip varchar(5) NOT NULL,
    zip4 varchar(4) NOT NULL,
    cnty varchar(20) NOT NULL,
    phone varchar(10) NOT NULL,
    c_relatn varchar(2) NOT NULL,
    c_legbas varchar(2) NOT NULL,
    c_admin varchar(2) NOT NULL,
    geocode varchar(3) NOT NULL,
    lsabound varchar(1) NOT NULL,
    startdat varchar(10),
    enddate varchar(10),
    popu_lsa integer NOT NULL,
    centlib integer NOT NULL,
    branlib integer NOT NULL,
    bkmob integer NOT NULL,
    master numeric(8,2) NOT NULL,
    libraria numeric(8,2) NOT NULL,
    totstaff numeric(8,2) NOT NULL,
    locgvt integer NOT NULL,
    stgvt integer NOT NULL,
    fedgvt integer NOT NULL,
    totincm integer NOT NULL,
    salaries integer,
    benefit integer,
    staffexp integer,
    prmatexp integer NOT NULL,
    elmatexp integer NOT NULL,
    totexpco integer NOT NULL,
    totopexp integer NOT NULL,
    lcap_rev integer NOT NULL,
    scap_rev integer NOT NULL,
    fcap_rev integer NOT NULL,
    cap_rev integer NOT NULL,
    capital integer NOT NULL,
    bkvol integer NOT NULL,
    ebook integer NOT NULL,
    audio integer NOT NULL,
    video integer NOT NULL,
    databases integer NOT NULL,
    subscrip integer NOT NULL,
    hrs_open integer NOT NULL,
    visits integer NOT NULL,
    referenc integer NOT NULL,
    regbor integer NOT NULL,
    totcir integer NOT NULL,
    kidcircl integer NOT NULL,
    loanto integer NOT NULL,
    loanfm integer NOT NULL,
    totpro integer NOT NULL,
    totatten integer NOT NULL,
    gpterms integer NOT NULL,
    pitusr integer NOT NULL,
    yr_sub integer NOT NULL,
    obereg varchar(2) NOT NULL,
    rstatus integer NOT NULL,
    statstru varchar(2) NOT NULL,
    statname varchar(2) NOT NULL,
    stataddr varchar(2) NOT NULL,
    longitud numeric(10,7) NOT NULL,
    latitude numeric(10,7) NOT NULL,
    fipsst varchar(2) NOT NULL,
    fipsco varchar(3) NOT NULL
 );
 CREATE INDEX libname2009_idx ON pls_fy2009_pupld09a (libname);
 CREATE INDEX stabr2009_idx ON pls_fy2009_pupld09a (stabr);
 CREATE INDEX city2009_idx ON pls_fy2009_pupld09a (city);
 CREATE INDEX visits2009_idx ON pls_fy2009_pupld09a (visits);
 COPY pls_fy2009_pupld09a
 FROM 'C:\YourDirectory\pls_fy2009_pupld09a.csv'
 WITH (FORMAT CSV, HEADER);
 -- Listing 8-3: Using count() for table row counts
 SELECT count(*)
 FROM pls_fy2014_pupld14a;
 SELECT count(*)
 FROM pls_fy2009_pupld09a;
 -- Listing 8-4: Using count() for the number of values in a column
 SELECT count(salaries)
 FROM pls_fy2014_pupld14a;
 -- Listing 8-5: Using count() for the number of distinct values in a column
 SELECT count(libname)
 FROM pls_fy2014_pupld14a;
 SELECT count(DISTINCT libname)
 FROM pls_fy2014_pupld14a;
 -- Bonus: find duplicate libnames
 SELECT libname, count(libname)
 FROM pls_fy2014_pupld14a
 GROUP BY libname
 ORDER BY count(libname) DESC;
 -- Bonus: see location of every Oxford Public Library
 SELECT libname, city, stabr
 FROM pls_fy2014_pupld14a
 WHERE libname = 'OXFORD PUBLIC LIBRARY';
 -- Listing 8-6: Finding the most and fewest visits using max() and min()
 SELECT max(visits), min(visits)
 FROM pls_fy2014_pupld14a;
 -- Listing 8-7: Using GROUP BY on the stabr column
 -- There are 56 in 2014.
 SELECT stabr
 FROM pls_fy2014_pupld14a
 GROUP BY stabr
 ORDER BY stabr;
 -- Bonus: there are 55 in 2009.
 SELECT stabr
 FROM pls_fy2009_pupld09a
 GROUP BY stabr
 ORDER BY stabr;
 -- Listing 8-8: Using GROUP BY on the city and stabr columns
 SELECT city, stabr
 FROM pls_fy2014_pupld14a
 GROUP BY city, stabr
 ORDER BY city, stabr;
 -- Bonus: We can count some of the combos
 SELECT city, stabr, count(*)
 FROM pls_fy2014_pupld14a
 GROUP BY city, stabr
 ORDER BY count(*) DESC;
 -- Listing 8-9: GROUP BY with count() on the stabr column
 SELECT stabr, count(*)
 FROM pls_fy2014_pupld14a
 GROUP BY stabr
 ORDER BY count(*) DESC;
 -- Listing 8-10: GROUP BY with count() on the stabr and stataddr columns
 SELECT stabr, stataddr, count(*)
 FROM pls_fy2014_pupld14a
 GROUP BY stabr, stataddr
 ORDER BY stabr ASC, count(*) DESC;
 -- Listing 8-11: Using the sum() aggregate function to total visits to
 -- libraries in 2014 and 2009
 -- 2014
 SELECT sum(visits) AS visits_2014
 FROM pls_fy2014_pupld14a
 WHERE visits >= 0;
 -- 2009
 SELECT sum(visits) AS visits_2009
 FROM pls_fy2009_pupld09a
 WHERE visits >= 0;
 -- Listing 8-12: Using sum() to total visits on joined 2014 and 2009 library tables
 SELECT sum(pls14.visits) AS visits_2014,
       sum(pls09.visits) AS visits_2009
 FROM pls_fy2014_pupld14a pls14 JOIN pls_fy2009_pupld09a pls09
 ON pls14.fscskey = pls09.fscskey
 WHERE pls14.visits >= 0 AND pls09.visits >= 0;
 -- Listing 8-13: Using GROUP BY to track percent change in library visits by state
 SELECT pls14.stabr,
       sum(pls14.visits) AS visits_2014,
       sum(pls09.visits) AS visits_2009,
       round( (CAST(sum(pls14.visits) AS decimal(10,1)) - sum(pls09.visits)) /
                    sum(pls09.visits) * 100, 2 ) AS pct_change
 FROM pls_fy2014_pupld14a pls14 JOIN pls_fy2009_pupld09a pls09
 ON pls14.fscskey = pls09.fscskey
 WHERE pls14.visits >= 0 AND pls09.visits >= 0
 GROUP BY pls14.stabr
 ORDER BY pct_change DESC;
 -- Listing 8-14: Using HAVING to filter the results of an aggregate query
 SELECT pls14.stabr,
       sum(pls14.visits) AS visits_2014,
       sum(pls09.visits) AS visits_2009,
       round( (CAST(sum(pls14.visits) AS decimal(10,1)) - sum(pls09.visits)) /
                    sum(pls09.visits) * 100, 2 ) AS pct_change
 FROM pls_fy2014_pupld14a pls14 JOIN pls_fy2009_pupld09a pls09
 ON pls14.fscskey = pls09.fscskey
 WHERE pls14.visits >= 0 AND pls09.visits >= 0
 GROUP BY pls14.stabr
 HAVING sum(pls14.visits) > 50000000
 ORDER BY pct_change DESC;
--- a/Chapter_10/Chapter_10.sql
+++ b/Chapter_10/Chapter_10.sql
@ -0,0 +1,259 @@
 --------------------------------------------------------------
 -- Practical SQL: A Beginner's Guide to Storytelling with Data
 -- by Anthony DeBarros
 -- Chapter 9 Code Examples
 --------------------------------------------------------------
 -- Listing 9-1: Importing the FSIS Meat, Poultry, and Egg Inspection Directory
 -- https://catalog.data.gov/dataset/meat-poultry-and-egg-inspection-directory-by-establishment-name
 CREATE TABLE meat_poultry_egg_inspect (
    est_number varchar(50) CONSTRAINT est_number_key PRIMARY KEY,
    company varchar(100),
    street varchar(100),
    city varchar(30),
    st varchar(2),
    zip varchar(5),
    phone varchar(14),
    grant_date date,
    activities text,
    dbas text
 );
 COPY meat_poultry_egg_inspect
 FROM 'C:\YourDirectory\MPI_Directory_by_Establishment_Name.csv'
 WITH (FORMAT CSV, HEADER, DELIMITER ',');
 CREATE INDEX company_idx ON meat_poultry_egg_inspect (company);
 -- Count the rows imported:
 SELECT count(*) FROM meat_poultry_egg_inspect;
 -- Listing 9-2: Finding multiple companies at the same address
 SELECT company,
       street,
       city,
       st,
       count(*) AS address_count
 FROM meat_poultry_egg_inspect
 GROUP BY company, street, city, st
 HAVING count(*) > 1
 ORDER BY company, street, city, st;
 -- Listing 9-3: Grouping and counting states
 SELECT st, 
       count(*) AS st_count
 FROM meat_poultry_egg_inspect
 GROUP BY st
 ORDER BY st;
 -- Listing 9-4: Using IS NULL to find missing values in the st column
 SELECT est_number,
       company,
       city,
       st,
       zip
 FROM meat_poultry_egg_inspect
 WHERE st IS NULL;
 -- Listing 9-5: Using GROUP BY and count() to find inconsistent company names
 SELECT company,
       count(*) AS company_count
 FROM meat_poultry_egg_inspect
 GROUP BY company
 ORDER BY company ASC;
 -- Listing 9-6: Using length() and count() to test the zip column
 SELECT length(zip),
       count(*) AS length_count
 FROM meat_poultry_egg_inspect
 GROUP BY length(zip)
 ORDER BY length(zip) ASC;
 -- Listing 9-7: Filtering with length() to find short zip values
 SELECT st,
       count(*) AS st_count
 FROM meat_poultry_egg_inspect
 WHERE length(zip) < 5
 GROUP BY st
 ORDER BY st ASC;
 -- Listing 9-8: Backing up a table
 CREATE TABLE meat_poultry_egg_inspect_backup AS
 SELECT * FROM meat_poultry_egg_inspect;
 -- Check number of records:
 SELECT 
    (SELECT count(*) FROM meat_poultry_egg_inspect) AS original,
    (SELECT count(*) FROM meat_poultry_egg_inspect_backup) AS backup;
 -- Listing 9-9: Creating and filling the st_copy column with ALTER TABLE and UPDATE
 ALTER TABLE meat_poultry_egg_inspect ADD COLUMN st_copy varchar(2);
 UPDATE meat_poultry_egg_inspect
 SET st_copy = st;
 -- Listing 9-10: Checking values in the st and st_copy columns
 SELECT st,
       st_copy
 FROM meat_poultry_egg_inspect
 ORDER BY st;
 -- Listing 9-11: Updating the st column for three establishments
 UPDATE meat_poultry_egg_inspect
 SET st = 'MN'
 WHERE est_number = 'V18677A';
 UPDATE meat_poultry_egg_inspect
 SET st = 'AL'
 WHERE est_number = 'M45319+P45319';
 UPDATE meat_poultry_egg_inspect
 SET st = 'WI'
 WHERE est_number = 'M263A+P263A+V263A';
 -- Listing 9-12: Restoring original st column values
 -- Restoring from the column backup
 UPDATE meat_poultry_egg_inspect
 SET st = st_copy;
 -- Restoring from the table backup
 UPDATE meat_poultry_egg_inspect original
 SET st = backup.st
 FROM meat_poultry_egg_inspect_backup backup
 WHERE original.est_number = backup.est_number; 
 -- Listing 9-13: Creating and filling the company_standard column
 ALTER TABLE meat_poultry_egg_inspect ADD COLUMN company_standard varchar(100);
 UPDATE meat_poultry_egg_inspect
 SET company_standard = company;
 -- Listing 9-14: Use UPDATE to modify field values that match a string
 UPDATE meat_poultry_egg_inspect
 SET company_standard = 'Armour-Eckrich Meats'
 WHERE company LIKE 'Armour%';
 SELECT company, company_standard
 FROM meat_poultry_egg_inspect
 WHERE company LIKE 'Armour%';
 -- Listing 9-15: Creating and filling the zip_copy column
 ALTER TABLE meat_poultry_egg_inspect ADD COLUMN zip_copy varchar(5);
 UPDATE meat_poultry_egg_inspect
 SET zip_copy = zip;
 -- Listing 9-16: Modify codes in the zip column missing two leading zeros
 UPDATE meat_poultry_egg_inspect
 SET zip = '00' || zip
 WHERE st IN('PR','VI') AND length(zip) = 3;
 -- Listing 9-17: Modify codes in the zip column missing one leading zero
 UPDATE meat_poultry_egg_inspect
 SET zip = '0' || zip
 WHERE st IN('CT','MA','ME','NH','NJ','RI','VT') AND length(zip) = 4;
 -- Listing 9-18: Creating and filling a state_regions table
 CREATE TABLE state_regions (
    st varchar(2) CONSTRAINT st_key PRIMARY KEY,
    region varchar(20) NOT NULL
 );
 COPY state_regions
 FROM 'C:\YourDirectory\state_regions.csv'
 WITH (FORMAT CSV, HEADER, DELIMITER ',');
 -- Listing 9-19: Adding and updating an inspection_date column
 ALTER TABLE meat_poultry_egg_inspect ADD COLUMN inspection_date date;
 UPDATE meat_poultry_egg_inspect inspect
 SET inspection_date = '2019-12-01'
 WHERE EXISTS (SELECT state_regions.region
              FROM state_regions
              WHERE inspect.st = state_regions.st 
                    AND state_regions.region = 'New England');
 -- Listing 9-20: Viewing updated inspection_date values
 SELECT st, inspection_date
 FROM meat_poultry_egg_inspect
 GROUP BY st, inspection_date
 ORDER BY st;
 -- Listing 9-21: Delete rows matching an expression
 DELETE FROM meat_poultry_egg_inspect
 WHERE st IN('PR','VI');
 -- Listing 9-22: Remove a column from a table using DROP
 ALTER TABLE meat_poultry_egg_inspect DROP COLUMN zip_copy;
 -- Listing 9-23: Remove a table from a database using DROP
 DROP TABLE meat_poultry_egg_inspect_backup;
 -- Listing 9-24: Demonstrating a transaction block
 -- Start transaction and perform update
 START TRANSACTION;
 UPDATE meat_poultry_egg_inspect
 SET company = 'AGRO Merchantss Oakland LLC'
 WHERE company = 'AGRO Merchants Oakland, LLC';
 -- view changes
 SELECT company
 FROM meat_poultry_egg_inspect
 WHERE company LIKE 'AGRO%'
 ORDER BY company;
 -- Revert changes
 ROLLBACK;
 -- See restored state
 SELECT company
 FROM meat_poultry_egg_inspect
 WHERE company LIKE 'AGRO%'
 ORDER BY company;
 -- Alternately, commit changes at the end:
 START TRANSACTION;
 UPDATE meat_poultry_egg_inspect
 SET company = 'AGRO Merchants Oakland LLC'
 WHERE company = 'AGRO Merchants Oakland, LLC';
 COMMIT;
 -- Listing 9-25: Backing up a table while adding and filling a new column
 CREATE TABLE meat_poultry_egg_inspect_backup AS
 SELECT *,
       '2018-02-07'::date AS reviewed_date
 FROM meat_poultry_egg_inspect;
 -- Listing 9-26: Swapping table names using ALTER TABLE
 ALTER TABLE meat_poultry_egg_inspect RENAME TO meat_poultry_egg_inspect_temp;
 ALTER TABLE meat_poultry_egg_inspect_backup RENAME TO meat_poultry_egg_inspect;
 ALTER TABLE meat_poultry_egg_inspect_temp RENAME TO meat_poultry_egg_inspect_backup;
--- a/Chapter_11/Chapter_11.sql
+++ b/Chapter_11/Chapter_11.sql
@ -0,0 +1,167 @@
 --------------------------------------------------------------
 -- Practical SQL: A Beginner's Guide to Storytelling with Data
 -- by Anthony DeBarros
 -- Chapter 10 Code Examples
 --------------------------------------------------------------
 -- Listing 10-1: Create Census 2011-2015 ACS 5-Year stats table and import data
 CREATE TABLE acs_2011_2015_stats (
    geoid varchar(14) CONSTRAINT geoid_key PRIMARY KEY,
    county varchar(50) NOT NULL,
    st varchar(20) NOT NULL,
    pct_travel_60_min numeric(5,3) NOT NULL,
    pct_bachelors_higher numeric(5,3) NOT NULL,
    pct_masters_higher numeric(5,3) NOT NULL,
    median_hh_income integer,
    CHECK (pct_masters_higher <= pct_bachelors_higher)
 );
 COPY acs_2011_2015_stats
 FROM 'C:\YourDirectory\acs_2011_2015_stats.csv'
 WITH (FORMAT CSV, HEADER, DELIMITER ',');
 SELECT * FROM acs_2011_2015_stats;
 -- Listing 10-2: Using corr(Y, X) to measure the relationship between 
 -- education and income
 SELECT corr(median_hh_income, pct_bachelors_higher)
    AS bachelors_income_r
 FROM acs_2011_2015_stats;
 -- Listing 10-3: Using corr(Y, X) on additional variables
 SELECT
    round(
      corr(median_hh_income, pct_bachelors_higher)::numeric, 2
      ) AS bachelors_income_r,
    round(
      corr(pct_travel_60_min, median_hh_income)::numeric, 2
      ) AS income_travel_r,
    round(
      corr(pct_travel_60_min, pct_bachelors_higher)::numeric, 2
      ) AS bachelors_travel_r
 FROM acs_2011_2015_stats;
 -- Listing 10-4: Regression slope and intercept functions
 SELECT
    round(
        regr_slope(median_hh_income, pct_bachelors_higher)::numeric, 2
        ) AS slope,
    round(
        regr_intercept(median_hh_income, pct_bachelors_higher)::numeric, 2
        ) AS y_intercept
 FROM acs_2011_2015_stats;
 -- Listing 10-5: Calculating the coefficient of determination, or r-squared
 SELECT round(
        regr_r2(median_hh_income, pct_bachelors_higher)::numeric, 3
        ) AS r_squared
 FROM acs_2011_2015_stats;
 -- Bonus: Additional stats functions.
 -- Variance
 SELECT var_pop(median_hh_income)
 FROM acs_2011_2015_stats;
 -- Standard deviation of the entire population
 SELECT stddev_pop(median_hh_income)
 FROM acs_2011_2015_stats;
 -- Covariance
 SELECT covar_pop(median_hh_income, pct_bachelors_higher)
 FROM acs_2011_2015_stats;
 -- Listing 10-6: The rank() and dense_rank() window functions
 CREATE TABLE widget_companies (
    id bigserial,
    company varchar(30) NOT NULL,
    widget_output integer NOT NULL
 );
 INSERT INTO widget_companies (company, widget_output)
 VALUES
    ('Morse Widgets', 125000),
    ('Springfield Widget Masters', 143000),
    ('Best Widgets', 196000),
    ('Acme Inc.', 133000),
    ('District Widget Inc.', 201000),
    ('Clarke Amalgamated', 620000),
    ('Stavesacre Industries', 244000),
    ('Bowers Widget Emporium', 201000);
 SELECT
    company,
    widget_output,
    rank() OVER (ORDER BY widget_output DESC),
    dense_rank() OVER (ORDER BY widget_output DESC)
 FROM widget_companies;
 -- Listing 10-7: Applying rank() within groups using PARTITION BY
 CREATE TABLE store_sales (
    store varchar(30),
    category varchar(30) NOT NULL,
    unit_sales bigint NOT NULL,
    CONSTRAINT store_category_key PRIMARY KEY (store, category)
 );
 INSERT INTO store_sales (store, category, unit_sales)
 VALUES
    ('Broders', 'Cereal', 1104),
    ('Wallace', 'Ice Cream', 1863),
    ('Broders', 'Ice Cream', 2517),
    ('Cramers', 'Ice Cream', 2112),
    ('Broders', 'Beer', 641),
    ('Cramers', 'Cereal', 1003),
    ('Cramers', 'Beer', 640),
    ('Wallace', 'Cereal', 980),
    ('Wallace', 'Beer', 988);
 SELECT
    category,
    store,
    unit_sales,
    rank() OVER (PARTITION BY category ORDER BY unit_sales DESC)
 FROM store_sales;
 -- Listing 10-8: Create and fill a 2015 FBI crime data table
 CREATE TABLE fbi_crime_data_2015 (
    st varchar(20),
    city varchar(50),
    population integer,
    violent_crime integer,
    property_crime integer,
    burglary integer,
    larceny_theft integer,
    motor_vehicle_theft integer,
    CONSTRAINT st_city_key PRIMARY KEY (st, city)
 );
 COPY fbi_crime_data_2015
 FROM 'C:\YourDirectory\fbi_crime_data_2015.csv'
 WITH (FORMAT CSV, HEADER, DELIMITER ',');
 SELECT * FROM fbi_crime_data_2015
 ORDER BY population DESC;
 -- Listing 10-9: Find property crime rates per thousand in cities with 500,000
 -- or more people
 SELECT
    city,
    st,
    population,
    property_crime,
    round(
        (property_crime::numeric / population) * 1000, 1
        ) AS pc_per_1000
 FROM fbi_crime_data_2015
 WHERE population >= 500000
 ORDER BY (property_crime::numeric / population) DESC;
--- a/Chapter_12/Chapter_12.sql
+++ b/Chapter_12/Chapter_12.sql
@ -0,0 +1,233 @@
 --------------------------------------------------------------
 -- Practical SQL: A Beginner's Guide to Storytelling with Data
 -- by Anthony DeBarros
 -- Chapter 11 Code Examples
 --------------------------------------------------------------
 -- Listing 11-1: Extracting components of a timestamp value using date_part()
 SELECT
    date_part('year', '2019-12-01 18:37:12 EST'::timestamptz) AS "year",
    date_part('month', '2019-12-01 18:37:12 EST'::timestamptz) AS "month",
    date_part('day', '2019-12-01 18:37:12 EST'::timestamptz) AS "day",
    date_part('hour', '2019-12-01 18:37:12 EST'::timestamptz) AS "hour",
    date_part('minute', '2019-12-01 18:37:12 EST'::timestamptz) AS "minute",
    date_part('seconds', '2019-12-01 18:37:12 EST'::timestamptz) AS "seconds",
    date_part('timezone_hour', '2019-12-01 18:37:12 EST'::timestamptz) AS "tz",
    date_part('week', '2019-12-01 18:37:12 EST'::timestamptz) AS "week",
    date_part('quarter', '2019-12-01 18:37:12 EST'::timestamptz) AS "quarter",
    date_part('epoch', '2019-12-01 18:37:12 EST'::timestamptz) AS "epoch";
 -- Bonus: Using the SQL-standard extract() for similar datetime parsing:
 SELECT extract('year' from '2019-12-01 18:37:12 EST'::timestamptz) AS "year";
 -- Listing 11-2: Three functions for making datetimes from components
 -- make a date
 SELECT make_date(2018, 2, 22);
 -- make a time
 SELECT make_time(18, 4, 30.3);
 -- make a timestamp with time zone
 SELECT make_timestamptz(2018, 2, 22, 18, 4, 30.3, 'Europe/Lisbon');
 -- Bonus: Retrieving the current date and time
 SELECT
    current_date,
    current_time,
    current_timestamp,
    localtime,
    localtimestamp,
    now();
 -- Listing 11-3: Comparing current_timestamp and clock_timestamp() during row insert
 CREATE TABLE current_time_example (
    time_id bigserial,
    current_timestamp_col timestamp with time zone,
    clock_timestamp_col timestamp with time zone
 );
 INSERT INTO current_time_example (current_timestamp_col, clock_timestamp_col)
    (SELECT current_timestamp,
            clock_timestamp()
     FROM generate_series(1,1000));
 SELECT * FROM current_time_example;
 -- Time Zones
 -- Listing 11-4: Showing your PostgreSQL server's default time zone
 SHOW timezone; -- Note: You can see all run-time defaults with SHOW ALL;
 -- Listing 11-5: Showing time zone abbreviations and names
 SELECT * FROM pg_timezone_abbrevs;
 SELECT * FROM pg_timezone_names;
 -- Filter to find one
 SELECT * FROM pg_timezone_names
 WHERE name LIKE 'Europe%';
 -- Listing 11-6: Setting the time zone for a client session
 SET timezone TO 'US/Pacific';
 CREATE TABLE time_zone_test (
    test_date timestamp with time zone
 );
 INSERT INTO time_zone_test VALUES ('2020-01-01 4:00');
 SELECT test_date
 FROM time_zone_test;
 SET timezone TO 'US/Eastern';
 SELECT test_date
 FROM time_zone_test;
 SELECT test_date AT TIME ZONE 'Asia/Seoul'
 FROM time_zone_test;
 -- Math with dates!
 SELECT '9/30/1929'::date - '9/27/1929'::date;
 SELECT '9/30/1929'::date + '5 years'::interval;
 -- Taxi Rides
 -- Listing 11-7: Creating a table and importing NYC yellow taxi data
 CREATE TABLE nyc_yellow_taxi_trips_2016_06_01 (
    trip_id bigserial PRIMARY KEY,
    vendor_id varchar(1) NOT NULL,
    tpep_pickup_datetime timestamp with time zone NOT NULL,
    tpep_dropoff_datetime timestamp with time zone NOT NULL,
    passenger_count integer NOT NULL,
    trip_distance numeric(8,2) NOT NULL,
    pickup_longitude numeric(18,15) NOT NULL,
    pickup_latitude numeric(18,15) NOT NULL,
    rate_code_id varchar(2) NOT NULL,
    store_and_fwd_flag varchar(1) NOT NULL,
    dropoff_longitude numeric(18,15) NOT NULL,
    dropoff_latitude numeric(18,15) NOT NULL,
    payment_type varchar(1) NOT NULL,
    fare_amount numeric(9,2) NOT NULL,
    extra numeric(9,2) NOT NULL,
    mta_tax numeric(5,2) NOT NULL,
    tip_amount numeric(9,2) NOT NULL,
    tolls_amount numeric(9,2) NOT NULL,
    improvement_surcharge numeric(9,2) NOT NULL,
    total_amount numeric(9,2) NOT NULL
 );
 COPY nyc_yellow_taxi_trips_2016_06_01 (
    vendor_id,
    tpep_pickup_datetime,
    tpep_dropoff_datetime,
    passenger_count,
    trip_distance,
    pickup_longitude,
    pickup_latitude,
    rate_code_id,
    store_and_fwd_flag,
    dropoff_longitude,
    dropoff_latitude,
    payment_type,
    fare_amount,
    extra,
    mta_tax,
    tip_amount,
    tolls_amount,
    improvement_surcharge,
    total_amount
   )
 FROM 'C:\YourDirectory\yellow_tripdata_2016_06_01.csv'
 WITH (FORMAT CSV, HEADER, DELIMITER ',');
 CREATE INDEX tpep_pickup_idx
 ON nyc_yellow_taxi_trips_2016_06_01 (tpep_pickup_datetime);
 SELECT count(*) FROM nyc_yellow_taxi_trips_2016_06_01;
 -- Listing 11-8: Counting taxi trips by hour
 SELECT
    date_part('hour', tpep_pickup_datetime) AS trip_hour,
    count(*)
 FROM nyc_yellow_taxi_trips_2016_06_01
 GROUP BY trip_hour
 ORDER BY trip_hour;
 -- Listing 11-9: Exporting taxi pickups per hour to a CSV file
 COPY
    (SELECT
        date_part('hour', tpep_pickup_datetime) AS trip_hour,
        count(*)
    FROM nyc_yellow_taxi_trips_2016_06_01
    GROUP BY trip_hour
    ORDER BY trip_hour
    )
 TO 'C:\YourDirectory\hourly_pickups_2016_06_01.csv'
 WITH (FORMAT CSV, HEADER, DELIMITER ',');
 -- Listing 11-10: Calculating median trip time by hour
 SELECT
    date_part('hour', tpep_pickup_datetime) AS trip_hour,
    percentile_cont(.5)
        WITHIN GROUP (ORDER BY
            tpep_dropoff_datetime - tpep_pickup_datetime) AS median_trip
 FROM nyc_yellow_taxi_trips_2016_06_01
 GROUP BY trip_hour
 ORDER BY trip_hour;
 -- Listing 11-11: Creating a table to hold train trip data
 SET timezone TO 'US/Central';
 CREATE TABLE train_rides (
    trip_id bigserial PRIMARY KEY,
    segment varchar(50) NOT NULL,
    departure timestamp with time zone NOT NULL,
    arrival timestamp with time zone NOT NULL
 );
 INSERT INTO train_rides (segment, departure, arrival)
 VALUES
    ('Chicago to New York', '2017-11-13 21:30 CST', '2017-11-14 18:23 EST'),
    ('New York to New Orleans', '2017-11-15 14:15 EST', '2017-11-16 19:32 CST'),
    ('New Orleans to Los Angeles', '2017-11-17 13:45 CST', '2017-11-18 9:00 PST'),
    ('Los Angeles to San Francisco', '2017-11-19 10:10 PST', '2017-11-19 21:24 PST'),
    ('San Francisco to Denver', '2017-11-20 9:10 PST', '2017-11-21 18:38 MST'),
    ('Denver to Chicago', '2017-11-22 19:10 MST', '2017-11-23 14:50 CST');
 SELECT * FROM train_rides;
 -- Listing 11-12: Calculating the length of each trip segment
 SELECT segment,
       to_char(departure, 'YYYY-MM-DD HH12:MI a.m. TZ') AS departure,
       arrival - departure AS segment_time
 FROM train_rides;
 -- Listing 11-13: Calculating cumulative intervals using OVER
 SELECT segment,
       arrival - departure AS segment_time,
       sum(arrival - departure) OVER (ORDER BY trip_id) AS cume_time
 FROM train_rides;
 -- Listing 11-14: Better formatting for cumulative trip time
 SELECT segment,
       arrival - departure AS segment_time,
       sum(date_part('epoch', (arrival - departure)))
           OVER (ORDER BY trip_id) * interval '1 second' AS cume_time
 FROM train_rides;
--- a/Chapter_13/Chapter_13.sql
+++ b/Chapter_13/Chapter_13.sql
@ -0,0 +1,310 @@
 --------------------------------------------------------------
 -- Practical SQL: A Beginner's Guide to Storytelling with Data
 -- by Anthony DeBarros
 -- Chapter 12 Code Examples
 --------------------------------------------------------------
 -- Listing 12-1: Using a subquery in a WHERE clause
 SELECT geo_name,
       state_us_abbreviation,
       p0010001
 FROM us_counties_2010
 WHERE p0010001 >= (
    SELECT percentile_cont(.9) WITHIN GROUP (ORDER BY p0010001)
    FROM us_counties_2010
    )
 ORDER BY p0010001 DESC;
 -- Listing 12-2: Using a subquery in a WHERE clause for DELETE
 CREATE TABLE us_counties_2010_top10 AS
 SELECT * FROM us_counties_2010;
 DELETE FROM us_counties_2010_top10
 WHERE p0010001 < (
    SELECT percentile_cont(.9) WITHIN GROUP (ORDER BY p0010001)
    FROM us_counties_2010_top10
    );
 SELECT count(*) FROM us_counties_2010_top10;
 -- Listing 12-3: Subquery as a derived table in a FROM clause
 SELECT round(calcs.average, 0) as average,
       calcs.median,
       round(calcs.average - calcs.median, 0) AS median_average_diff
 FROM (
     SELECT avg(p0010001) AS average,
            percentile_cont(.5)
                WITHIN GROUP (ORDER BY p0010001)::numeric(10,1) AS median
     FROM us_counties_2010
     )
 AS calcs;
 -- Listing 12-4: Joining two derived tables
 SELECT census.state_us_abbreviation AS st,
       census.st_population,
       plants.plant_count,
       round((plants.plant_count/census.st_population::numeric(10,1)) * 1000000, 1)
           AS plants_per_million
 FROM
    (
         SELECT st,
                count(*) AS plant_count
         FROM meat_poultry_egg_inspect
         GROUP BY st
    )
    AS plants
 JOIN
    (
        SELECT state_us_abbreviation,
               sum(p0010001) AS st_population
        FROM us_counties_2010
        GROUP BY state_us_abbreviation
    )
    AS census
 ON plants.st = census.state_us_abbreviation
 ORDER BY plants_per_million DESC;
 -- Listing 12-5: Adding a subquery to a column list
 SELECT geo_name,
       state_us_abbreviation AS st,
       p0010001 AS total_pop,
       (SELECT percentile_cont(.5) WITHIN GROUP (ORDER BY p0010001)
        FROM us_counties_2010) AS us_median
 FROM us_counties_2010;
 -- Listing 12-6: Using a subquery expression in a calculation
 SELECT geo_name,
       state_us_abbreviation AS st,
       p0010001 AS total_pop,
       (SELECT percentile_cont(.5) WITHIN GROUP (ORDER BY p0010001)
        FROM us_counties_2010) AS us_median,
       p0010001 - (SELECT percentile_cont(.5) WITHIN GROUP (ORDER BY p0010001)
                   FROM us_counties_2010) AS diff_from_median
 FROM us_counties_2010
 WHERE (p0010001 - (SELECT percentile_cont(.5) WITHIN GROUP (ORDER BY p0010001)
                   FROM us_counties_2010))
       BETWEEN -1000 AND 1000;
 -- BONUS: Subquery expressions
 -- If you'd like to try the IN, EXISTS, and NOT EXISTS expressions on pages 199-200,
 -- here's the code to create a retirees table. The queries below are similar
 -- to the hypothetical examples on pages 199 and 200. You will need the
 -- employees table you created in Chapter 6.
 -- Create table and insert data
 CREATE TABLE retirees (
    id int,
    first_name varchar(50),
    last_name varchar(50)
 );
 INSERT INTO retirees 
 VALUES (2, 'Lee', 'Smith'),
       (4, 'Janet', 'King');
 -- Generating values for the IN operator
 SELECT first_name, last_name
 FROM employees
 WHERE emp_id IN (
    SELECT id
    FROM retirees);
 -- Checking whether values exist (returns all rows from employees
 -- if the expression evaluates as true)
 SELECT first_name, last_name
 FROM employees
 WHERE EXISTS (
    SELECT id
    FROM retirees);
 -- Using a correlated subquery to find matching values from employees
 -- in retirees.
 SELECT first_name, last_name
 FROM employees
 WHERE EXISTS (
    SELECT id
    FROM retirees
    WHERE id = employees.emp_id);
 -- Listing 12-7: Using a simple CTE to find large counties
 WITH
    large_counties (geo_name, st, p0010001)
 AS
    (
        SELECT geo_name, state_us_abbreviation, p0010001
        FROM us_counties_2010
        WHERE p0010001 >= 100000
    )
 SELECT st, count(*)
 FROM large_counties
 GROUP BY st
 ORDER BY count(*) DESC;
 -- Bonus: You can also write this query as:
 SELECT state_us_abbreviation, count(*)
 FROM us_counties_2010
 WHERE p0010001 >= 100000
 GROUP BY state_us_abbreviation
 ORDER BY count(*) DESC;
 -- Listing 12-8: Using CTEs in a table join
 WITH
    counties (st, population) AS
    (SELECT state_us_abbreviation, sum(population_count_100_percent)
     FROM us_counties_2010
     GROUP BY state_us_abbreviation),
    plants (st, plants) AS
    (SELECT st, count(*) AS plants
     FROM meat_poultry_egg_inspect
     GROUP BY st)
 SELECT counties.st,
       population,
       plants,
       round((plants/population::numeric(10,1))*1000000, 1) AS per_million
 FROM counties JOIN plants
 ON counties.st = plants.st
 ORDER BY per_million DESC;
 -- Listing 12-9: Using CTEs to minimize redundant code
 WITH us_median AS 
    (SELECT percentile_cont(.5) 
     WITHIN GROUP (ORDER BY p0010001) AS us_median_pop
     FROM us_counties_2010)
 SELECT geo_name,
       state_us_abbreviation AS st,
       p0010001 AS total_pop,
       us_median_pop,
       p0010001 - us_median_pop AS diff_from_median 
 FROM us_counties_2010 CROSS JOIN us_median
 WHERE (p0010001 - us_median_pop)
       BETWEEN -1000 AND 1000;
 -- Cross tabulations
 -- Install the crosstab() function via the tablefunc module
 CREATE EXTENSION tablefunc;
 -- Listing 12-10: Creating and filling the ice_cream_survey table
 CREATE TABLE ice_cream_survey (
    response_id integer PRIMARY KEY,
    office varchar(20),
    flavor varchar(20)
 );
 COPY ice_cream_survey
 FROM 'C:\YourDirectory\ice_cream_survey.csv'
 WITH (FORMAT CSV, HEADER);
 -- Listing 12-11: Generating the ice cream survey crosstab
 SELECT *
 FROM crosstab('SELECT office,
                      flavor,
                      count(*)
               FROM ice_cream_survey
               GROUP BY office, flavor
               ORDER BY office',
              'SELECT flavor
               FROM ice_cream_survey
               GROUP BY flavor
               ORDER BY flavor')
 AS (office varchar(20),
    chocolate bigint,
    strawberry bigint,
    vanilla bigint);
 -- Listing 12-12: Creating and filling a temperature_readings table
 CREATE TABLE temperature_readings (
    reading_id bigserial PRIMARY KEY,
    station_name varchar(50),
    observation_date date,
    max_temp integer,
    min_temp integer
 );
 COPY temperature_readings 
     (station_name, observation_date, max_temp, min_temp)
 FROM 'C:\YourDirectory\temperature_readings.csv'
 WITH (FORMAT CSV, HEADER);
 -- Listing 12-13: Generating the temperature readings crosstab
 SELECT *
 FROM crosstab('SELECT
                  station_name,
                  date_part(''month'', observation_date),
                  percentile_cont(.5)
                      WITHIN GROUP (ORDER BY max_temp)
               FROM temperature_readings
               GROUP BY station_name,
                        date_part(''month'', observation_date)
               ORDER BY station_name',
              'SELECT month
               FROM generate_series(1,12) month')
 AS (station varchar(50),
    jan numeric(3,0),
    feb numeric(3,0),
    mar numeric(3,0),
    apr numeric(3,0),
    may numeric(3,0),
    jun numeric(3,0),
    jul numeric(3,0),
    aug numeric(3,0),
    sep numeric(3,0),
    oct numeric(3,0),
    nov numeric(3,0),
    dec numeric(3,0)
 );
 -- Listing 12-14: Re-classifying temperature data with CASE
 SELECT max_temp,
       CASE WHEN max_temp >= 90 THEN 'Hot'
            WHEN max_temp BETWEEN 70 AND 89 THEN 'Warm'
            WHEN max_temp BETWEEN 50 AND 69 THEN 'Pleasant'
            WHEN max_temp BETWEEN 33 AND 49 THEN 'Cold'
            WHEN max_temp BETWEEN 20 AND 32 THEN 'Freezing'
            ELSE 'Inhumane'
        END AS temperature_group
 FROM temperature_readings;
 -- Listing 12-15: Using CASE in a Common Table Expression
 WITH temps_collapsed (station_name, max_temperature_group) AS
    (SELECT station_name,
           CASE WHEN max_temp >= 90 THEN 'Hot'
                WHEN max_temp BETWEEN 70 AND 89 THEN 'Warm'
                WHEN max_temp BETWEEN 50 AND 69 THEN 'Pleasant'
                WHEN max_temp BETWEEN 33 AND 49 THEN 'Cold'
                WHEN max_temp BETWEEN 20 AND 32 THEN 'Freezing'
                ELSE 'Inhumane'
            END
    FROM temperature_readings)
 SELECT station_name, max_temperature_group, count(*)
 FROM temps_collapsed
 GROUP BY station_name, max_temperature_group
 ORDER BY station_name, count(*) DESC;
--- a/Chapter_14/Chapter_14.sql
+++ b/Chapter_14/Chapter_14.sql
@ -0,0 +1,363 @@
 --------------------------------------------------------------
 -- Practical SQL: A Beginner's Guide to Storytelling with Data
 -- by Anthony DeBarros
 -- Chapter 13 Code Examples
 --------------------------------------------------------------
 -- Commonly used string functions
 -- Full list at https://www.postgresql.org/docs/current/static/functions-string.html
 -- Case formatting
 SELECT upper('Neal7');
 SELECT lower('Randy');
 SELECT initcap('at the end of the day');
 -- Note initcap's imperfect for acronyms
 SELECT initcap('Practical SQL');
 -- Character Information
 SELECT char_length(' Pat ');
 SELECT length(' Pat ');
 SELECT position(', ' in 'Tan, Bella');
 -- Removing characters
 SELECT trim('s' from 'socks');
 SELECT trim(trailing 's' from 'socks');
 SELECT trim(' Pat ');
 SELECT char_length(trim(' Pat ')); -- note the length change
 SELECT ltrim('socks', 's');
 SELECT rtrim('socks', 's');
 -- Extracting and replacing characters
 SELECT left('703-555-1212', 3);
 SELECT right('703-555-1212', 8);
 SELECT replace('bat', 'b', 'c');
 -- Table 13-2: Regular Expression Matching Examples
 -- Any character one or more times
 SELECT substring('The game starts at 7 p.m. on May 2, 2019.' from '.+');
 -- One or two digits followed by a space and p.m.
 SELECT substring('The game starts at 7 p.m. on May 2, 2019.' from '\d{1,2} (?:a.m.|p.m.)');
 -- One or more word characters at the start
 SELECT substring('The game starts at 7 p.m. on May 2, 2019.' from '^\w+');
 -- One or more word characters followed by any character at the end.
 SELECT substring('The game starts at 7 p.m. on May 2, 2019.' from '\w+.$');
 -- The words May or June
 SELECT substring('The game starts at 7 p.m. on May 2, 2019.' from 'May|June');
 -- Four digits
 SELECT substring('The game starts at 7 p.m. on May 2, 2019.' from '\d{4}');
 -- May followed by a space, digit, comma, space, and four digits.
 SELECT substring('The game starts at 7 p.m. on May 2, 2019.' from 'May \d, \d{4}');
 -- Turning Text to Data with Regular Expression Functions
 -- Listing 13-2: Creating and loading the crime_reports table
 -- Data from https://sheriff.loudoun.gov/dailycrime
 CREATE TABLE crime_reports (
    crime_id bigserial PRIMARY KEY,
    date_1 timestamp with time zone,
    date_2 timestamp with time zone,
    street varchar(250),
    city varchar(100),
    crime_type varchar(100),
    description text,
    case_number varchar(50),
    original_text text NOT NULL
 );
 COPY crime_reports (original_text)
 FROM 'C:\YourDirectory\crime_reports.csv'
 WITH (FORMAT CSV, HEADER OFF, QUOTE '"');
 SELECT original_text FROM crime_reports;
 -- Listing 13-3: Using regexp_match() to find the first date
 SELECT crime_id,
       regexp_match(original_text, '\d{1,2}\/\d{1,2}\/\d{2}')
 FROM crime_reports;
 -- Listing 13-4: Using the regexp_matches() function with the 'g' flag
 SELECT crime_id,
       regexp_matches(original_text, '\d{1,2}\/\d{1,2}\/\d{2}', 'g')
 FROM crime_reports;
 -- Listing 13-5: Using regexp_match() to find the second date
 -- Note that the result includes an unwanted hyphen
 SELECT crime_id,
       regexp_match(original_text, '-\d{1,2}\/\d{1,2}\/\d{1,2}')
 FROM crime_reports;
 -- Listing 13-6: Using a capture group to return only the date
 -- Eliminates the hyphen
 SELECT crime_id,
       regexp_match(original_text, '-(\d{1,2}\/\d{1,2}\/\d{1,2})')
 FROM crime_reports;
 -- Listing 13-7: Matching case number, date, crime type, and city
 SELECT
    regexp_match(original_text, '(?:C0|SO)[0-9]+') AS case_number,
    regexp_match(original_text, '\d{1,2}\/\d{1,2}\/\d{2}') AS date_1,
    regexp_match(original_text, '\n(?:\w+ \w+|\w+)\n(.*):') AS crime_type,
    regexp_match(original_text, '(?:Sq.|Plz.|Dr.|Ter.|Rd.)\n(\w+ \w+|\w+)\n')
        AS city
 FROM crime_reports;
 -- Bonus: Get all parsed elements at once
 SELECT crime_id,
       regexp_match(original_text, '\d{1,2}\/\d{1,2}\/\d{2}') AS date_1,
       CASE WHEN EXISTS (SELECT regexp_matches(original_text, '-(\d{1,2}\/\d{1,2}\/\d{1,2})'))
            THEN regexp_match(original_text, '-(\d{1,2}\/\d{1,2}\/\d{1,2})')
            ELSE NULL
            END AS date_2,
       regexp_match(original_text, '\/\d{2}\n(\d{4})') AS hour_1,
       CASE WHEN EXISTS (SELECT regexp_matches(original_text, '\/\d{2}\n\d{4}-(\d{4})'))
            THEN regexp_match(original_text, '\/\d{2}\n\d{4}-(\d{4})')
            ELSE NULL
            END AS hour_2,
       regexp_match(original_text, 'hrs.\n(\d+ .+(?:Sq.|Plz.|Dr.|Ter.|Rd.))') AS street,
       regexp_match(original_text, '(?:Sq.|Plz.|Dr.|Ter.|Rd.)\n(\w+ \w+|\w+)\n') AS city,
       regexp_match(original_text, '\n(?:\w+ \w+|\w+)\n(.*):') AS crime_type,
       regexp_match(original_text, ':\s(.+)(?:C0|SO)') AS description,
       regexp_match(original_text, '(?:C0|SO)[0-9]+') AS case_number
 FROM crime_reports;
 -- Listing 13-8: Retrieving a value from within an array
 SELECT
    crime_id,
    (regexp_match(original_text, '(?:C0|SO)[0-9]+'))[1]
        AS case_number
 FROM crime_reports;
 -- Listing 13-9: Updating the crime_reports date_1 column
 UPDATE crime_reports
 SET date_1 = 
 (
    (regexp_match(original_text, '\d{1,2}\/\d{1,2}\/\d{2}'))[1]
        || ' ' ||
    (regexp_match(original_text, '\/\d{2}\n(\d{4})'))[1] 
        ||' US/Eastern'
 )::timestamptz;
 SELECT crime_id,
       date_1,
       original_text
 FROM crime_reports;
 -- Listing 13-10: Updating all crime_reports columns
 UPDATE crime_reports
 SET date_1 = 
    (
      (regexp_match(original_text, '\d{1,2}\/\d{1,2}\/\d{2}'))[1]
          || ' ' ||
      (regexp_match(original_text, '\/\d{2}\n(\d{4})'))[1] 
          ||' US/Eastern'
    )::timestamptz,
    date_2 = 
    CASE 
    -- if there is no second date but there is a second hour
        WHEN (SELECT regexp_match(original_text, '-(\d{1,2}\/\d{1,2}\/\d{1,2})') IS NULL)
                     AND (SELECT regexp_match(original_text, '\/\d{2}\n\d{4}-(\d{4})') IS NOT NULL)
        THEN 
          ((regexp_match(original_text, '\d{1,2}\/\d{1,2}\/\d{2}'))[1]
              || ' ' ||
          (regexp_match(original_text, '\/\d{2}\n\d{4}-(\d{4})'))[1] 
              ||' US/Eastern'
          )::timestamptz 
    -- if there is both a second date and second hour
        WHEN (SELECT regexp_match(original_text, '-(\d{1,2}\/\d{1,2}\/\d{1,2})') IS NOT NULL)
              AND (SELECT regexp_match(original_text, '\/\d{2}\n\d{4}-(\d{4})') IS NOT NULL)
        THEN 
          ((regexp_match(original_text, '-(\d{1,2}\/\d{1,2}\/\d{1,2})'))[1]
              || ' ' ||
          (regexp_match(original_text, '\/\d{2}\n\d{4}-(\d{4})'))[1] 
              ||' US/Eastern'
          )::timestamptz 
    -- if neither of those conditions exist, provide a NULL
        ELSE NULL 
    END,
    street = (regexp_match(original_text, 'hrs.\n(\d+ .+(?:Sq.|Plz.|Dr.|Ter.|Rd.))'))[1],
    city = (regexp_match(original_text,
                           '(?:Sq.|Plz.|Dr.|Ter.|Rd.)\n(\w+ \w+|\w+)\n'))[1],
    crime_type = (regexp_match(original_text, '\n(?:\w+ \w+|\w+)\n(.*):'))[1],
    description = (regexp_match(original_text, ':\s(.+)(?:C0|SO)'))[1],
    case_number = (regexp_match(original_text, '(?:C0|SO)[0-9]+'))[1];
 -- Listing 13-11: Viewing selected crime data
 SELECT date_1,
       street,
       city,
       crime_type
 FROM crime_reports;
 -- Listing 13-12: Using regular expressions in a WHERE clause
 SELECT geo_name
 FROM us_counties_2010
 WHERE geo_name ~* '(.+lade.+|.+lare.+)'
 ORDER BY geo_name;
 SELECT geo_name
 FROM us_counties_2010
 WHERE geo_name ~* '.+ash.+' AND geo_name !~ 'Wash.+'
 ORDER BY geo_name;
 -- Listing 13-13: Regular expression functions to replace and split
 SELECT regexp_replace('05/12/2018', '\d{4}', '2017');
 SELECT regexp_split_to_table('Four,score,and,seven,years,ago', ',');
 SELECT regexp_split_to_array('Phil Mike Tony Steve', ' ');
 -- Listing 13-14: Finding an array length
 SELECT array_length(regexp_split_to_array('Phil Mike Tony Steve', ' '), 1);
 -- FULL TEXT SEARCH
 -- Full-text search operators:
 -- & (AND)
 -- | (OR)
 -- ! (NOT)
 -- Listing 13-15: Converting text to tsvector data
 SELECT to_tsvector('I am walking across the sitting room to sit with you.');
 -- Listing 13-16: Converting search terms to tsquery data
 SELECT to_tsquery('walking & sitting');
 -- Listing 13-17: Querying a tsvector type with a tsquery
 SELECT to_tsvector('I am walking across the sitting room') @@ to_tsquery('walking & sitting');
 SELECT to_tsvector('I am walking across the sitting room') @@ to_tsquery('walking & running');
 -- Listing 13-18: Creating and filling the president_speeches table
 -- Sources:
 -- https://archive.org/details/State-of-the-Union-Addresses-1945-2006
 -- http://www.presidency.ucsb.edu/ws/index.php
 -- https://www.eisenhower.archives.gov/all_about_ike/speeches.html
 CREATE TABLE president_speeches (
    sotu_id serial PRIMARY KEY,
    president varchar(100) NOT NULL,
    title varchar(250) NOT NULL,
    speech_date date NOT NULL,
    speech_text text NOT NULL,
    search_speech_text tsvector
 );
 COPY president_speeches (president, title, speech_date, speech_text)
 FROM 'C:\YourDirectory\sotu-1946-1977.csv'
 WITH (FORMAT CSV, DELIMITER '|', HEADER OFF, QUOTE '@');
 SELECT * FROM president_speeches;
 -- Listing 13-19: Converting speeches to tsvector in the search_speech_text column
 UPDATE president_speeches
 SET search_speech_text = to_tsvector('english', speech_text);
 -- Listing 13-20: Creating a GIN index for text search
 CREATE INDEX search_idx ON president_speeches USING gin(search_speech_text);
 -- Listing 13-21: Finding speeches containing the word "Vietnam"
 SELECT president, speech_date
 FROM president_speeches
 WHERE search_speech_text @@ to_tsquery('Vietnam')
 ORDER BY speech_date;
 -- Listing 13-22: Displaying search results with ts_headline()
 SELECT president,
       speech_date,
       ts_headline(speech_text, to_tsquery('Vietnam'),
                   'StartSel = <,
                    StopSel = >,
                    MinWords=5,
                    MaxWords=7,
                    MaxFragments=1')
 FROM president_speeches
 WHERE search_speech_text @@ to_tsquery('Vietnam');
 -- Listing 13-23: Finding speeches with the word "transportation" but not "roads"
 SELECT president,
       speech_date,
       ts_headline(speech_text, to_tsquery('transportation & !roads'),
                   'StartSel = <,
                    StopSel = >,
                    MinWords=5,
                    MaxWords=7,
                    MaxFragments=1')
 FROM president_speeches
 WHERE search_speech_text @@ to_tsquery('transportation & !roads');
 -- Listing 13-24: Find speeches where "defense" follows "military"
 SELECT president,
       speech_date,
       ts_headline(speech_text, to_tsquery('military <-> defense'),
                   'StartSel = <,
                    StopSel = >,
                    MinWords=5,
                    MaxWords=7,
                    MaxFragments=1')
 FROM president_speeches
 WHERE search_speech_text @@ to_tsquery('military <-> defense');
 -- Bonus: Example with a distance of 2:
 SELECT president,
       speech_date,
       ts_headline(speech_text, to_tsquery('military <2> defense'),
                   'StartSel = <,
                    StopSel = >,
                    MinWords=5,
                    MaxWords=7,
                    MaxFragments=2')
 FROM president_speeches
 WHERE search_speech_text @@ to_tsquery('military <2> defense');
 -- Listing 13-25: Scoring relevance with ts_rank()
 SELECT president,
       speech_date,
       ts_rank(search_speech_text,
               to_tsquery('war & security & threat & enemy')) AS score
 FROM president_speeches
 WHERE search_speech_text @@ to_tsquery('war & security & threat & enemy')
 ORDER BY score DESC
 LIMIT 5;
 -- Listing 13-26: Normalizing ts_rank() by speech length
 SELECT president,
       speech_date,
       ts_rank(search_speech_text,
               to_tsquery('war & security & threat & enemy'), 2)::numeric 
               AS score
 FROM president_speeches
 WHERE search_speech_text @@ to_tsquery('war & security & threat & enemy')
 ORDER BY score DESC
 LIMIT 5;
--- a/Chapter_15/Chapter_15.sql
+++ b/Chapter_15/Chapter_15.sql
@ -0,0 +1,252 @@
 --------------------------------------------------------------
 -- Practical SQL: A Beginner's Guide to Storytelling with Data
 -- by Anthony DeBarros
 -- Chapter 14 Code Examples
 --------------------------------------------------------------
 -- Listing 14-1: Creating a gis_analysis database
 CREATE DATABASE gis_analysis;
 -- Note: Switch to this new database before continuing the examples
 -- Listing 14-2: Loading the PostGIS extension
 CREATE EXTENSION postgis;
 SELECT postgis_full_version(); -- shows PostGIS version
 -- Listing 14-3: Retrieving the well-known text for SRID 4326
 SELECT srtext
 FROM spatial_ref_sys
 WHERE srid = 4326;
 -- Listing 14-4: Using ST_GeomFromText() to create spatial objects
 SELECT ST_GeomFromText('POINT(-74.9233606 42.699992)', 4326);
 SELECT ST_GeomFromText('LINESTRING(-74.9 42.7, -75.1 42.7)', 4326);
 SELECT ST_GeomFromText('POLYGON((-74.9 42.7, -75.1 42.7,
                                 -75.1 42.6, -74.9 42.7))', 4326);
 SELECT ST_GeomFromText('MULTIPOINT (-74.9 42.7, -75.1 42.7)', 4326);
 SELECT ST_GeomFromText('MULTILINESTRING((-76.27 43.1, -76.06 43.08),
                                        (-76.2 43.3, -76.2 43.4,
                                         -76.4 43.1))', 4326);
 SELECT ST_GeomFromText('MULTIPOLYGON((
                                     (-74.92 42.7, -75.06 42.71,
                                      -75.07 42.64, -74.92 42.7),
                                     (-75.0 42.66, -75.0 42.64,
                                      -74.98 42.64, -74.98 42.66,
                                      -75.0 42.66)))', 4326);
 -- Listing 14-5: Using ST_GeogFromText() to create spatial objects
 SELECT
 ST_GeogFromText('SRID=4326;MULTIPOINT(-74.9 42.7, -75.1 42.7, -74.924 42.6)');
 -- Listing 14-6: Functions specific to making points
 SELECT ST_PointFromText('POINT(-74.9233606 42.699992)', 4326);
 SELECT ST_MakePoint(-74.9233606, 42.699992);
 SELECT ST_SetSRID(ST_MakePoint(-74.9233606, 42.699992), 4326);
 -- Listing 14-7: Functions specific to making LineStrings
 SELECT ST_LineFromText('LINESTRING(-105.90 35.67,-105.91 35.67)', 4326);
 SELECT ST_MakeLine(ST_MakePoint(-74.92, 42.69), ST_MakePoint(-74.12, 42.45));
 -- Listing 14-8: Functions specific to making Polygons
 SELECT ST_PolygonFromText('POLYGON((-74.9 42.7, -75.1 42.7,
                                    -75.1 42.6, -74.9 42.7))', 4326);
 SELECT ST_MakePolygon(
           ST_GeomFromText('LINESTRING(-74.92 42.7, -75.06 42.71,
                                       -75.07 42.64, -74.92 42.7)', 4326));
 SELECT ST_MPolyFromText('MULTIPOLYGON((
                                       (-74.92 42.7, -75.06 42.71,
                                        -75.07 42.64, -74.92 42.7),
                                       (-75.0 42.66, -75.0 42.64,
                                        -74.98 42.64, -74.98 42.66,
                                        -75.0 42.66)
                                      ))', 4326);
 -- ANALYZING FARMERS MARKETS DATA
 -- https://catalog.data.gov/dataset/farmers-markets-geographic-data
 -- https://www.ams.usda.gov/local-food-directories/farmersmarkets
 -- Listing 14-9: Create and load the farmers_markets table
 CREATE TABLE farmers_markets (
    fmid bigint PRIMARY KEY,
    market_name varchar(100) NOT NULL,
    street varchar(180),
    city varchar(60),
    county varchar(25),
    st varchar(20) NOT NULL,
    zip varchar(10),
    longitude numeric(10,7),
    latitude numeric(10,7),
    organic varchar(1) NOT NULL
 );
 COPY farmers_markets
 FROM 'C:\YourDirectory\farmers_markets.csv'
 WITH (FORMAT CSV, HEADER);
 SELECT count(*) FROM farmers_markets; -- should return 8,681 rows
 -- Listing 14-10: Creating and indexing a geography column
 -- There's also a function: https://postgis.net/docs/AddGeometryColumn.html
 -- Add column
 ALTER TABLE farmers_markets ADD COLUMN geog_point geography(POINT,4326);
 -- Now fill that column with the lat/long
 UPDATE farmers_markets
 SET geog_point = ST_SetSRID(
                            ST_MakePoint(longitude,latitude),4326
                           )::geography;
 -- Add a GiST index
 CREATE INDEX market_pts_idx ON farmers_markets USING GIST (geog_point);
 -- View the geography column
 SELECT longitude,
       latitude,
       geog_point,
       ST_AsText(geog_point)
 FROM farmers_markets
 WHERE longitude IS NOT NULL
 LIMIT 5;
 -- Listing 14-11: Using ST_DWithin() to locate farmers' markets within 10 kilometers of a point
 SELECT market_name,
       city,
       st
 FROM farmers_markets
 WHERE ST_DWithin(geog_point,
                 ST_GeogFromText('POINT(-93.6204386 41.5853202)'),
                 10000)
 ORDER BY market_name;
 -- Listing 14-12: Using ST_Distance() to calculate the miles between Yankee Stadium
 -- and Citi Field (Mets)
 -- 1609.344 meters/mile
 SELECT ST_Distance(
                   ST_GeogFromText('POINT(-73.9283685 40.8296466)'),
                   ST_GeogFromText('POINT(-73.8480153 40.7570917)')
                   ) / 1609.344 AS mets_to_yanks;
 -- Listing 14-13: Using ST_Distance() for each row in farmers_markets
 SELECT market_name,
       city,
       round(
           (ST_Distance(geog_point,
                        ST_GeogFromText('POINT(-93.6204386 41.5853202)')
                        ) / 1609.344)::numeric(8,5), 2
            ) AS miles_from_dt
 FROM farmers_markets
 WHERE ST_DWithin(geog_point,
                 ST_GeogFromText('POINT(-93.6204386 41.5853202)'),
                 10000)
 ORDER BY miles_from_dt ASC;
 -- WORKING WITH SHAPEFILES
 -- Resources:
   -- TIGER/Line® Shapefiles and TIGER/Line® Files
   -- https://www.census.gov/geo/maps-data/data/tiger-line.html
   -- Cartographic Boundary Shapefiles - Counties
   -- https://www.census.gov/geo/maps-data/data/cbf/cbf_counties.html
 -- Listing 14-14: Checking the geom column's well-known text representation
 SELECT ST_AsText(geom)
 FROM us_counties_2010_shp
 LIMIT 1;
 -- Listing 14-15: Find the largest counties by area using ST_Area()
 SELECT name10,
       statefp10 AS st,
       round(
             ( ST_Area(geom::geography) / 2589988.110336 )::numeric, 2
            )  AS square_miles
 FROM us_counties_2010_shp
 ORDER BY square_miles DESC
 LIMIT 5;
 -- Listing 14-16: Using ST_Within() to find the county belonging to a pair of coordinates
 SELECT name10,
       statefp10
 FROM us_counties_2010_shp
 WHERE ST_Within('SRID=4269;POINT(-118.3419063 34.0977076)'::geometry, geom);
 -- SPATIAL JOINS
 -- SANTA FE WATER AND ROAD DATA
 -- http://www.santafenm.gov/santa_fe_river
 -- Census 2016 Tiger/Line roads, water
 -- https://www.census.gov/geo/maps-data/data/tiger-line.html
 -- https://www.census.gov/cgi-bin/geo/shapefiles/index.php?year=2016&layergroup=Roads
 -- https://www.census.gov/cgi-bin/geo/shapefiles/index.php?year=2016&layergroup=Water
 -- RTTYP - Route Type Code Description
 -- https://www.census.gov/geo/reference/rttyp.html
 -- C County
 -- I Interstate
 -- M Common Name
 -- O Other
 -- S State recognized
 -- U U.S.
 -- MTFCC MAF/TIGER feature class code
 -- https://www.census.gov/geo/reference/mtfcc.html
 -- Here, H3010: A natural flowing waterway
 -- Listing 14-17: Using ST_GeometryType() to determine geometry
 SELECT ST_GeometryType(geom)
 FROM santafe_linearwater_2016
 LIMIT 1;
 SELECT ST_GeometryType(geom)
 FROM santafe_roads_2016
 LIMIT 1;
 -- Listing 14-18: Spatial join with ST_Intersects() to find roads crossing the Santa Fe river
 SELECT water.fullname AS waterway,
       roads.rttyp,
       roads.fullname AS road
 FROM santafe_linearwater_2016 water JOIN santafe_roads_2016 roads
    ON ST_Intersects(water.geom, roads.geom)
 WHERE water.fullname = 'Santa Fe Riv'
 ORDER BY roads.fullname;
 -- Listing 14-19: Using ST_Intersection() to show where roads cross the river
 SELECT water.fullname AS waterway,
       roads.rttyp,
       roads.fullname AS road,
       ST_AsText(ST_Intersection(water.geom, roads.geom))
 FROM santafe_linearwater_2016 water JOIN santafe_roads_2016 roads
    ON ST_Intersects(water.geom, roads.geom)
 WHERE water.fullname = 'Santa Fe Riv'
 ORDER BY roads.fullname
 LIMIT 5;
--- a/Chapter_17/Chapter_17.sql
+++ b/Chapter_17/Chapter_17.sql
@ -0,0 +1,321 @@
 --------------------------------------------------------------
 -- Practical SQL: A Beginner's Guide to Storytelling with Data
 -- by Anthony DeBarros
 -- Chapter 15 Code Examples
 --------------------------------------------------------------
 -- VIEWS
 -- Listing 15-1: Creating a view that displays Nevada 2010 counties
 CREATE OR REPLACE VIEW nevada_counties_pop_2010 AS
    SELECT geo_name,
           state_fips,
           county_fips,
           p0010001 AS pop_2010
    FROM us_counties_2010
    WHERE state_us_abbreviation = 'NV'
    ORDER BY county_fips;
 -- Listing 15-2: Querying the nevada_counties_pop_2010 view
 SELECT *
 FROM nevada_counties_pop_2010
 LIMIT 5;
 -- Listing 15-3: Creating a view showing population change for US counties
 CREATE OR REPLACE VIEW county_pop_change_2010_2000 AS
    SELECT c2010.geo_name,
           c2010.state_us_abbreviation AS st,
           c2010.state_fips,
           c2010.county_fips,
           c2010.p0010001 AS pop_2010,
           c2000.p0010001 AS pop_2000,
           round( (CAST(c2010.p0010001 AS numeric(8,1)) - c2000.p0010001)
               / c2000.p0010001 * 100, 1 ) AS pct_change_2010_2000
    FROM us_counties_2010 c2010 INNER JOIN us_counties_2000 c2000
    ON c2010.state_fips = c2000.state_fips
       AND c2010.county_fips = c2000.county_fips
    ORDER BY c2010.state_fips, c2010.county_fips;
 -- Listing 15-4: Selecting columns from the county_pop_change_2010_2000 view
 SELECT geo_name,
       st,
       pop_2010,
       pct_change_2010_2000
 FROM county_pop_change_2010_2000
 WHERE st = 'NV'
 LIMIT 5;
 -- Listing 15-5: Creating a view on the employees table
 CREATE OR REPLACE VIEW employees_tax_dept AS
     SELECT emp_id,
            first_name,
            last_name,
            dept_id
     FROM employees
     WHERE dept_id = 1
     ORDER BY emp_id
     WITH LOCAL CHECK OPTION;
 SELECT * FROM employees_tax_dept;
 -- Listing 15-6: Successful and rejected inserts via the employees_tax_dept view
 INSERT INTO employees_tax_dept (first_name, last_name, dept_id)
 VALUES ('Suzanne', 'Legere', 1);
 INSERT INTO employees_tax_dept (first_name, last_name, dept_id)
 VALUES ('Jamil', 'White', 2);
 -- optional:
 SELECT * FROM employees_tax_dept;
 SELECT * FROM employees;
 -- Listing 15-7: Updating a row via the employees_tax_dept view
 UPDATE employees_tax_dept
 SET last_name = 'Le Gere'
 WHERE emp_id = 5;
 SELECT * FROM employees_tax_dept;
 -- Bonus: This will fail because the salary column is not in the view
 UPDATE employees_tax_dept
 SET salary = 100000
 WHERE emp_id = 5;
 -- Listing 15-8: Deleting a row via the employees_tax_dept view
 DELETE FROM employees_tax_dept
 WHERE emp_id = 5;
 -- FUNCTIONS
 -- https://www.postgresql.org/docs/current/static/plpgsql.html
 -- Listing 15-9: Creating a percent_change function
 -- To delete this function: DROP FUNCTION percent_change(numeric,numeric,integer);
 CREATE OR REPLACE FUNCTION
 percent_change(new_value numeric,
               old_value numeric,
               decimal_places integer DEFAULT 1)
 RETURNS numeric AS
 'SELECT round(
        ((new_value - old_value) / old_value) * 100, decimal_places
 );'
 LANGUAGE SQL
 IMMUTABLE
 RETURNS NULL ON NULL INPUT;
 -- Listing 15-10: Testing the percent_change() function
 SELECT percent_change(110, 108, 2);
 -- Listing 15-11: Testing percent_change() on Census data
 SELECT c2010.geo_name,
       c2010.state_us_abbreviation AS st,
       c2010.p0010001 AS pop_2010,
       percent_change(c2010.p0010001, c2000.p0010001) AS pct_chg_func,
       round( (CAST(c2010.p0010001 AS numeric(8,1)) - c2000.p0010001)
           / c2000.p0010001 * 100, 1 ) AS pct_chg_formula
 FROM us_counties_2010 c2010 INNER JOIN us_counties_2000 c2000
 ON c2010.state_fips = c2000.state_fips
   AND c2010.county_fips = c2000.county_fips
 ORDER BY pct_chg_func DESC
 LIMIT 5;
 -- Listing 15-12: Adding a column to the teachers table and seeing the data
 ALTER TABLE teachers ADD COLUMN personal_days integer;
 SELECT first_name,
       last_name,
       hire_date,
       personal_days
 FROM teachers;
 -- Listing 15-13: Creating an update_personal_days() function
 CREATE OR REPLACE FUNCTION update_personal_days()
 RETURNS void AS $$
 BEGIN
    UPDATE teachers
    SET personal_days =
        CASE WHEN (now() - hire_date) BETWEEN '5 years'::interval
                                      AND '10 years'::interval THEN 4
             WHEN (now() - hire_date) > '10 years'::interval THEN 5
             ELSE 3
        END;
    RAISE NOTICE 'personal_days updated!';
 END;
 $$ LANGUAGE plpgsql;
 -- To run the function:
 SELECT update_personal_days();
 -- Listing 15-14: Enabling the PL/Python procedural language
 CREATE EXTENSION plpythonu;
 -- Listing 15-15: Using PL/Python to create the trim_county() function
 CREATE OR REPLACE FUNCTION trim_county(input_string text)
 RETURNS text AS $$
    import re
    cleaned = re.sub(r' County', '', input_string)
    return cleaned
 $$ LANGUAGE plpythonu;
 -- Listing 15-16: Testing the trim_county() function
 SELECT geo_name,
       trim_county(geo_name)
 FROM us_counties_2010
 ORDER BY state_fips, county_fips
 LIMIT 5;
 -- TRIGGERS
 -- Listing 15-17: Creating the grades and grades_history tables
 CREATE TABLE grades (
    student_id bigint,
    course_id bigint,
    course varchar(30) NOT NULL,
    grade varchar(5) NOT NULL,
 PRIMARY KEY (student_id, course_id)
 );
 INSERT INTO grades
 VALUES
    (1, 1, 'Biology 2', 'F'),
    (1, 2, 'English 11B', 'D'),
    (1, 3, 'World History 11B', 'C'),
    (1, 4, 'Trig 2', 'B');
 CREATE TABLE grades_history (
    student_id bigint NOT NULL,
    course_id bigint NOT NULL,
    change_time timestamp with time zone NOT NULL,
    course varchar(30) NOT NULL,
    old_grade varchar(5) NOT NULL,
    new_grade varchar(5) NOT NULL,
 PRIMARY KEY (student_id, course_id, change_time)
 );  
 -- Listing 15-18: Creating the record_if_grade_changed() function
 CREATE OR REPLACE FUNCTION record_if_grade_changed()
    RETURNS trigger AS
 $$
 BEGIN
    IF NEW.grade <> OLD.grade THEN
    INSERT INTO grades_history (
        student_id,
        course_id,
        change_time,
        course,
        old_grade,
        new_grade)
    VALUES
        (OLD.student_id,
         OLD.course_id,
         now(),
         OLD.course,
         OLD.grade,
         NEW.grade);
    END IF;
    RETURN NEW;
 END;
 $$ LANGUAGE plpgsql;
 -- Listing 15-19: Creating the grades_update trigger
 CREATE TRIGGER grades_update
  AFTER UPDATE
  ON grades
  FOR EACH ROW
  EXECUTE PROCEDURE record_if_grade_changed();
 -- Listing 15-20: Testing the grades_update trigger
 -- Initially, there should be 0 records in the history
 SELECT * FROM grades_history;
 -- Check the grades
 SELECT * FROM grades;
 -- Update a grade
 UPDATE grades
 SET grade = 'C'
 WHERE student_id = 1 AND course_id = 1;
 -- Now check the history
 SELECT student_id,
       change_time,
       course,
       old_grade,
       new_grade
 FROM grades_history;
 -- Listing 15-21: Creating a temperature_test table
 CREATE TABLE temperature_test (
    station_name varchar(50),
    observation_date date,
    max_temp integer,
    min_temp integer,
    max_temp_group varchar(40),
 PRIMARY KEY (station_name, observation_date)
 );
 -- Listing 15-22: Creating the classify_max_temp() function
 CREATE OR REPLACE FUNCTION classify_max_temp()
    RETURNS trigger AS
 $$
 BEGIN
    CASE
       WHEN NEW.max_temp >= 90 THEN
           NEW.max_temp_group := 'Hot';
       WHEN NEW.max_temp BETWEEN 70 AND 89 THEN
           NEW.max_temp_group := 'Warm';
       WHEN NEW.max_temp BETWEEN 50 AND 69 THEN
           NEW.max_temp_group := 'Pleasant';
       WHEN NEW.max_temp BETWEEN 33 AND 49 THEN
           NEW.max_temp_group :=  'Cold';
       WHEN NEW.max_temp BETWEEN 20 AND 32 THEN
           NEW.max_temp_group :=  'Freezing';
       ELSE NEW.max_temp_group :=  'Inhumane';
    END CASE;
    RETURN NEW;
 END;
 $$ LANGUAGE plpgsql;
 -- Listing 15-23: Creating the temperature_insert trigger
 CREATE TRIGGER temperature_insert
    BEFORE INSERT
    ON temperature_test
    FOR EACH ROW
    EXECUTE PROCEDURE classify_max_temp();
 -- Listing 15-24: Inserting rows to test the temperature_update trigger
 INSERT INTO temperature_test (station_name, observation_date, max_temp, min_temp)
 VALUES
    ('North Station', '1/19/2019', 10, -3),
    ('North Station', '3/20/2019', 28, 19),
    ('North Station', '5/2/2019', 65, 42),
    ('North Station', '8/9/2019', 93, 74);
 SELECT * FROM temperature_test;
--- a/Chapter_19/Chapter_19.sql
+++ b/Chapter_19/Chapter_19.sql
@ -0,0 +1,104 @@
 --------------------------------------------------------------
 -- Practical SQL: A Beginner's Guide to Storytelling with Data
 -- by Anthony DeBarros
 -- Chapter 17 Code Examples
 --------------------------------------------------------------
 -- VACUUM
 -- Listing 17-1: Creating a table to test vacuuming
 CREATE TABLE vacuum_test (
    integer_column integer
 );
 -- Listing 17-2: Determining the size of vacuum_test
 SELECT pg_size_pretty(
           pg_total_relation_size('vacuum_test')
       );
 -- optional: Determine database size   
 SELECT pg_size_pretty(
           pg_database_size('analysis')
       );
 -- Listing 17-3: Inserting 500,000 rows into vacuum_test
 INSERT INTO vacuum_test
 SELECT * FROM generate_series(1,500000);
 -- Test its size again
 SELECT pg_size_pretty(
           pg_table_size('vacuum_test')
       );
 -- Listing 17-4: Updating all rows in vacuum_test
 UPDATE vacuum_test
 SET integer_column = integer_column + 1;
 -- Test its size again (35 MB)
 SELECT pg_size_pretty(
           pg_table_size('vacuum_test')
       );
 -- Listing 17-5: Viewing autovacuum statistics for vacuum_test
 SELECT relname,
       last_vacuum,
       last_autovacuum,
       vacuum_count,
       autovacuum_count
 FROM pg_stat_all_tables
 WHERE relname = 'vacuum_test';
 -- To see all columns available
 SELECT *
 FROM pg_stat_all_tables
 WHERE relname = 'vacuum_test';
 -- Listing 17-6: Running VACUUM manually
 VACUUM vacuum_test;
 VACUUM; -- vacuums the whole database
 VACUUM VERBOSE; -- provides messages
 -- Listing 17-7: Using VACUUM FULL to reclaim disk space
 VACUUM FULL vacuum_test;
 -- Test its size again
 SELECT pg_size_pretty(
           pg_table_size('vacuum_test')
       );
 -- SETTINGS
 -- Listing 17-8: Showing the location of postgresql.conf
 SHOW config_file;
 -- Listing 17-10: Show the location of the data directory
 SHOW data_directory;
 -- reload settings
 -- Mac and Linux: pg_ctl reload -D '/path/to/data/directory/'
 -- Windows: pg_ctl reload -D "C:\path\to\data\directory\"
 -- BACKUP AND RESTORE
 -- Listing 17-11: Backing up the analysis database with pg_dump
 pg_dump -d analysis -U [user_name] -Fc > analysis_backup.sql
 -- Back up just a table
 pg_dump -t 'train_rides' -d analysis -U [user_name] -Fc > train_backup.sql 
 -- Listing 17-12: Restoring the analysis database with pg_restore
 pg_restore -C -d postgres -U postgres analysis_backup_custom.sql