More renumbering
This commit is contained in:
parent
204d3cc55d
commit
0d2ffa4aad
34
Chapter_02/Chapter_02.sql
Normal file
34
Chapter_02/Chapter_02.sql
Normal file
@ -0,0 +1,34 @@
|
|||||||
|
--------------------------------------------------------------
|
||||||
|
-- Practical SQL: A Beginner's Guide to Storytelling with Data
|
||||||
|
-- by Anthony DeBarros
|
||||||
|
|
||||||
|
-- Chapter 1 Code Examples
|
||||||
|
--------------------------------------------------------------
|
||||||
|
|
||||||
|
-- Listing 1-1: Creating a database named analysis
|
||||||
|
|
||||||
|
CREATE DATABASE analysis;
|
||||||
|
|
||||||
|
-- Listing 1-2: Creating a table named teachers with six columns
|
||||||
|
|
||||||
|
CREATE TABLE teachers (
|
||||||
|
id bigserial,
|
||||||
|
first_name varchar(25),
|
||||||
|
last_name varchar(50),
|
||||||
|
school varchar(50),
|
||||||
|
hire_date date,
|
||||||
|
salary numeric
|
||||||
|
);
|
||||||
|
|
||||||
|
-- This command will remove (drop) the table.
|
||||||
|
-- DROP TABLE teachers;
|
||||||
|
|
||||||
|
-- Listing 1-3 Inserting data into the teachers table
|
||||||
|
|
||||||
|
INSERT INTO teachers (first_name, last_name, school, hire_date, salary)
|
||||||
|
VALUES ('Janet', 'Smith', 'F.D. Roosevelt HS', '2011-10-30', 36200),
|
||||||
|
('Lee', 'Reynolds', 'F.D. Roosevelt HS', '1993-05-22', 65000),
|
||||||
|
('Samuel', 'Cole', 'Myers Middle School', '2005-08-01', 43500),
|
||||||
|
('Samantha', 'Bush', 'Myers Middle School', '2011-10-30', 36200),
|
||||||
|
('Betty', 'Diaz', 'Myers Middle School', '2005-08-30', 43500),
|
||||||
|
('Kathleen', 'Roush', 'F.D. Roosevelt HS', '2010-10-22', 38500);
|
||||||
104
Chapter_03/Chapter_03.sql
Normal file
104
Chapter_03/Chapter_03.sql
Normal file
@ -0,0 +1,104 @@
|
|||||||
|
--------------------------------------------------------------
|
||||||
|
-- Practical SQL: A Beginner's Guide to Storytelling with Data
|
||||||
|
-- by Anthony DeBarros
|
||||||
|
|
||||||
|
-- Chapter 2 Code Examples
|
||||||
|
--------------------------------------------------------------
|
||||||
|
|
||||||
|
-- Listing 2-1: Querying all rows and columns from the teachers table
|
||||||
|
|
||||||
|
SELECT * FROM teachers;
|
||||||
|
|
||||||
|
-- Listing 2-2: Querying a subset of columns
|
||||||
|
|
||||||
|
SELECT last_name, first_name, salary FROM teachers;
|
||||||
|
|
||||||
|
-- Listing 2-3: Querying distinct values in the school column
|
||||||
|
|
||||||
|
SELECT DISTINCT school
|
||||||
|
FROM teachers;
|
||||||
|
|
||||||
|
-- Listing 2-4: Querying distinct pairs of values in the school and salary
|
||||||
|
-- columns
|
||||||
|
|
||||||
|
SELECT DISTINCT school, salary
|
||||||
|
FROM teachers;
|
||||||
|
|
||||||
|
-- Listing 2-5: Sorting a column with ORDER BY
|
||||||
|
|
||||||
|
SELECT first_name, last_name, salary
|
||||||
|
FROM teachers
|
||||||
|
ORDER BY salary DESC;
|
||||||
|
|
||||||
|
-- Listing 2-6: Sorting multiple columns with ORDER BY
|
||||||
|
|
||||||
|
SELECT last_name, school, hire_date
|
||||||
|
FROM teachers
|
||||||
|
ORDER BY school ASC, hire_date DESC;
|
||||||
|
|
||||||
|
-- Listing 2-7: Filtering rows using WHERE
|
||||||
|
|
||||||
|
SELECT last_name, school, hire_date
|
||||||
|
FROM teachers
|
||||||
|
WHERE school = 'Myers Middle School';
|
||||||
|
|
||||||
|
-- Examples of WHERE comparison operators
|
||||||
|
|
||||||
|
-- Teachers with first name of Janet
|
||||||
|
SELECT first_name, last_name, school
|
||||||
|
FROM teachers
|
||||||
|
WHERE first_name = 'Janet';
|
||||||
|
|
||||||
|
-- School names not equal to F.D. Roosevelt HS
|
||||||
|
SELECT school
|
||||||
|
FROM teachers
|
||||||
|
WHERE school != 'F.D. Roosevelt HS';
|
||||||
|
|
||||||
|
-- Teachers hired before Jan. 1, 2000
|
||||||
|
SELECT first_name, last_name, hire_date
|
||||||
|
FROM teachers
|
||||||
|
WHERE hire_date < '2000-01-01';
|
||||||
|
|
||||||
|
-- Teachers earning 43,500 or more
|
||||||
|
SELECT first_name, last_name, salary
|
||||||
|
FROM teachers
|
||||||
|
WHERE salary >= 43500;
|
||||||
|
|
||||||
|
-- Teachers who earn between $40,000 and $65,000
|
||||||
|
SELECT first_name, last_name, school, salary
|
||||||
|
FROM teachers
|
||||||
|
WHERE salary BETWEEN 40000 AND 65000;
|
||||||
|
|
||||||
|
-- Listing 2-8: Filtering with LIKE AND ILIKE
|
||||||
|
|
||||||
|
SELECT first_name
|
||||||
|
FROM teachers
|
||||||
|
WHERE first_name LIKE 'sam%';
|
||||||
|
|
||||||
|
SELECT first_name
|
||||||
|
FROM teachers
|
||||||
|
WHERE first_name ILIKE 'sam%';
|
||||||
|
|
||||||
|
-- Listing 2-9: Combining operators using AND and OR
|
||||||
|
|
||||||
|
SELECT *
|
||||||
|
FROM teachers
|
||||||
|
WHERE school = 'Myers Middle School'
|
||||||
|
AND salary < 40000;
|
||||||
|
|
||||||
|
SELECT *
|
||||||
|
FROM teachers
|
||||||
|
WHERE last_name = 'Cole'
|
||||||
|
OR last_name = 'Bush';
|
||||||
|
|
||||||
|
SELECT *
|
||||||
|
FROM teachers
|
||||||
|
WHERE school = 'F.D. Roosevelt HS'
|
||||||
|
AND (salary < 38000 OR salary > 40000);
|
||||||
|
|
||||||
|
-- Listing 2-10: A SELECT statement including WHERE and ORDER BY
|
||||||
|
|
||||||
|
SELECT first_name, last_name, school, hire_date, salary
|
||||||
|
FROM teachers
|
||||||
|
WHERE school LIKE '%Roos%'
|
||||||
|
ORDER BY hire_date DESC;
|
||||||
90
Chapter_04/Chapter_04.sql
Normal file
90
Chapter_04/Chapter_04.sql
Normal file
@ -0,0 +1,90 @@
|
|||||||
|
--------------------------------------------------------------
|
||||||
|
-- Practical SQL: A Beginner's Guide to Storytelling with Data
|
||||||
|
-- by Anthony DeBarros
|
||||||
|
|
||||||
|
-- Chapter 3 Code Examples
|
||||||
|
--------------------------------------------------------------
|
||||||
|
|
||||||
|
-- Listing 3-1: Character data types in action
|
||||||
|
|
||||||
|
CREATE TABLE char_data_types (
|
||||||
|
varchar_column varchar(10),
|
||||||
|
char_column char(10),
|
||||||
|
text_column text
|
||||||
|
);
|
||||||
|
|
||||||
|
INSERT INTO char_data_types
|
||||||
|
VALUES
|
||||||
|
('abc', 'abc', 'abc'),
|
||||||
|
('defghi', 'defghi', 'defghi');
|
||||||
|
|
||||||
|
COPY char_data_types TO 'C:\YourDirectory\typetest.txt'
|
||||||
|
WITH (FORMAT CSV, HEADER, DELIMITER '|');
|
||||||
|
|
||||||
|
|
||||||
|
-- Listing 3-2: Number data types in action
|
||||||
|
|
||||||
|
CREATE TABLE number_data_types (
|
||||||
|
numeric_column numeric(20,5),
|
||||||
|
real_column real,
|
||||||
|
double_column double precision
|
||||||
|
);
|
||||||
|
|
||||||
|
INSERT INTO number_data_types
|
||||||
|
VALUES
|
||||||
|
(.7, .7, .7),
|
||||||
|
(2.13579, 2.13579, 2.13579),
|
||||||
|
(2.1357987654, 2.1357987654, 2.1357987654);
|
||||||
|
|
||||||
|
SELECT * FROM number_data_types;
|
||||||
|
|
||||||
|
-- Listing 3-3: Rounding issues with float columns
|
||||||
|
-- Assumes table created and loaded with Listing 3-2
|
||||||
|
|
||||||
|
SELECT
|
||||||
|
numeric_column * 10000000 AS "Fixed",
|
||||||
|
real_column * 10000000 AS "Float"
|
||||||
|
FROM number_data_types
|
||||||
|
WHERE numeric_column = .7;
|
||||||
|
|
||||||
|
-- Listing 3-4: Timestamp and interval types in action
|
||||||
|
|
||||||
|
CREATE TABLE date_time_types (
|
||||||
|
timestamp_column timestamp with time zone,
|
||||||
|
interval_column interval
|
||||||
|
);
|
||||||
|
|
||||||
|
INSERT INTO date_time_types
|
||||||
|
VALUES
|
||||||
|
('2018-12-31 01:00 EST','2 days'),
|
||||||
|
('2018-12-31 01:00 PST','1 month'),
|
||||||
|
('2018-12-31 01:00 Australia/Melbourne','1 century'),
|
||||||
|
(now(),'1 week');
|
||||||
|
|
||||||
|
SELECT * FROM date_time_types;
|
||||||
|
|
||||||
|
-- Listing 3-5: Using the interval data type
|
||||||
|
-- Assumes script 3-4 has been run
|
||||||
|
|
||||||
|
SELECT
|
||||||
|
timestamp_column,
|
||||||
|
interval_column,
|
||||||
|
timestamp_column - interval_column AS new_date
|
||||||
|
FROM date_time_types;
|
||||||
|
|
||||||
|
-- Listing 3-6: Three CAST() examples
|
||||||
|
|
||||||
|
SELECT timestamp_column, CAST(timestamp_column AS varchar(10))
|
||||||
|
FROM date_time_types;
|
||||||
|
|
||||||
|
SELECT numeric_column,
|
||||||
|
CAST(numeric_column AS integer),
|
||||||
|
CAST(numeric_column AS varchar(6))
|
||||||
|
FROM number_data_types;
|
||||||
|
|
||||||
|
-- Does not work:
|
||||||
|
SELECT CAST(char_column AS integer) FROM char_data_types;
|
||||||
|
|
||||||
|
-- Alternate notation for CAST is the double-colon:
|
||||||
|
SELECT timestamp_column::varchar(10)
|
||||||
|
FROM date_time_types;
|
||||||
212
Chapter_05/Chapter_05.sql
Normal file
212
Chapter_05/Chapter_05.sql
Normal file
@ -0,0 +1,212 @@
|
|||||||
|
--------------------------------------------------------------
|
||||||
|
-- Practical SQL: A Beginner's Guide to Storytelling with Data
|
||||||
|
-- by Anthony DeBarros
|
||||||
|
|
||||||
|
-- Chapter 4 Code Examples
|
||||||
|
--------------------------------------------------------------
|
||||||
|
|
||||||
|
-- Listing 4-1: Using COPY for data import
|
||||||
|
-- This is example syntax only; running it will produce an error
|
||||||
|
|
||||||
|
COPY table_name
|
||||||
|
FROM 'C:\YourDirectory\your_file.csv'
|
||||||
|
WITH (FORMAT CSV, HEADER);
|
||||||
|
|
||||||
|
|
||||||
|
-- Listing 4-2: A CREATE TABLE statement for Census county data
|
||||||
|
-- Full data dictionary available at: http://www.census.gov/prod/cen2010/doc/pl94-171.pdf
|
||||||
|
-- Note: Some columns have been given more descriptive names
|
||||||
|
|
||||||
|
CREATE TABLE us_counties_2010 (
|
||||||
|
geo_name varchar(90), -- Name of the geography
|
||||||
|
state_us_abbreviation varchar(2), -- State/U.S. abbreviation
|
||||||
|
summary_level varchar(3), -- Summary Level
|
||||||
|
region smallint, -- Region
|
||||||
|
division smallint, -- Division
|
||||||
|
state_fips varchar(2), -- State FIPS code
|
||||||
|
county_fips varchar(3), -- County code
|
||||||
|
area_land bigint, -- Area (Land) in square meters
|
||||||
|
area_water bigint, -- Area (Water) in square meters
|
||||||
|
population_count_100_percent integer, -- Population count (100%)
|
||||||
|
housing_unit_count_100_percent integer, -- Housing Unit count (100%)
|
||||||
|
internal_point_lat numeric(10,7), -- Internal point (latitude)
|
||||||
|
internal_point_lon numeric(10,7), -- Internal point (longitude)
|
||||||
|
|
||||||
|
-- This section is referred to as P1. Race:
|
||||||
|
p0010001 integer, -- Total population
|
||||||
|
p0010002 integer, -- Population of one race:
|
||||||
|
p0010003 integer, -- White Alone
|
||||||
|
p0010004 integer, -- Black or African American alone
|
||||||
|
p0010005 integer, -- American Indian and Alaska Native alone
|
||||||
|
p0010006 integer, -- Asian alone
|
||||||
|
p0010007 integer, -- Native Hawaiian and Other Pacific Islander alone
|
||||||
|
p0010008 integer, -- Some Other Race alone
|
||||||
|
p0010009 integer, -- Population of two or more races
|
||||||
|
p0010010 integer, -- Population of two races:
|
||||||
|
p0010011 integer, -- White; Black or African American
|
||||||
|
p0010012 integer, -- White; American Indian and Alaska Native
|
||||||
|
p0010013 integer, -- White; Asian
|
||||||
|
p0010014 integer, -- White; Native Hawaiian and Other Pacific Islander
|
||||||
|
p0010015 integer, -- White; Some Other Race
|
||||||
|
p0010016 integer, -- Black or African American; American Indian and Alaska Native
|
||||||
|
p0010017 integer, -- Black or African American; Asian
|
||||||
|
p0010018 integer, -- Black or African American; Native Hawaiian and Other Pacific Islander
|
||||||
|
p0010019 integer, -- Black or African American; Some Other Race
|
||||||
|
p0010020 integer, -- American Indian and Alaska Native; Asian
|
||||||
|
p0010021 integer, -- American Indian and Alaska Native; Native Hawaiian and Other Pacific Islander
|
||||||
|
p0010022 integer, -- American Indian and Alaska Native; Some Other Race
|
||||||
|
p0010023 integer, -- Asian; Native Hawaiian and Other Pacific Islander
|
||||||
|
p0010024 integer, -- Asian; Some Other Race
|
||||||
|
p0010025 integer, -- Native Hawaiian and Other Pacific Islander; Some Other Race
|
||||||
|
p0010026 integer, -- Population of three races
|
||||||
|
p0010047 integer, -- Population of four races
|
||||||
|
p0010063 integer, -- Population of five races
|
||||||
|
p0010070 integer, -- Population of six races
|
||||||
|
|
||||||
|
-- This section is referred to as P2. HISPANIC OR LATINO, AND NOT HISPANIC OR LATINO BY RACE
|
||||||
|
p0020001 integer, -- Total
|
||||||
|
p0020002 integer, -- Hispanic or Latino
|
||||||
|
p0020003 integer, -- Not Hispanic or Latino:
|
||||||
|
p0020004 integer, -- Population of one race:
|
||||||
|
p0020005 integer, -- White Alone
|
||||||
|
p0020006 integer, -- Black or African American alone
|
||||||
|
p0020007 integer, -- American Indian and Alaska Native alone
|
||||||
|
p0020008 integer, -- Asian alone
|
||||||
|
p0020009 integer, -- Native Hawaiian and Other Pacific Islander alone
|
||||||
|
p0020010 integer, -- Some Other Race alone
|
||||||
|
p0020011 integer, -- Two or More Races
|
||||||
|
p0020012 integer, -- Population of two races
|
||||||
|
p0020028 integer, -- Population of three races
|
||||||
|
p0020049 integer, -- Population of four races
|
||||||
|
p0020065 integer, -- Population of five races
|
||||||
|
p0020072 integer, -- Population of six races
|
||||||
|
|
||||||
|
-- This section is referred to as P3. RACE FOR THE POPULATION 18 YEARS AND OVER
|
||||||
|
p0030001 integer, -- Total
|
||||||
|
p0030002 integer, -- Population of one race:
|
||||||
|
p0030003 integer, -- White alone
|
||||||
|
p0030004 integer, -- Black or African American alone
|
||||||
|
p0030005 integer, -- American Indian and Alaska Native alone
|
||||||
|
p0030006 integer, -- Asian alone
|
||||||
|
p0030007 integer, -- Native Hawaiian and Other Pacific Islander alone
|
||||||
|
p0030008 integer, -- Some Other Race alone
|
||||||
|
p0030009 integer, -- Two or More Races
|
||||||
|
p0030010 integer, -- Population of two races
|
||||||
|
p0030026 integer, -- Population of three races
|
||||||
|
p0030047 integer, -- Population of four races
|
||||||
|
p0030063 integer, -- Population of five races
|
||||||
|
p0030070 integer, -- Population of six races
|
||||||
|
|
||||||
|
-- This section is referred to as P4. HISPANIC OR LATINO, AND NOT HISPANIC OR LATINO BY RACE
|
||||||
|
-- FOR THE POPULATION 18 YEARS AND OVER
|
||||||
|
p0040001 integer, -- Total
|
||||||
|
p0040002 integer, -- Hispanic or Latino
|
||||||
|
p0040003 integer, -- Not Hispanic or Latino:
|
||||||
|
p0040004 integer, -- Population of one race:
|
||||||
|
p0040005 integer, -- White alone
|
||||||
|
p0040006 integer, -- Black or African American alone
|
||||||
|
p0040007 integer, -- American Indian and Alaska Native alone
|
||||||
|
p0040008 integer, -- Asian alone
|
||||||
|
p0040009 integer, -- Native Hawaiian and Other Pacific Islander alone
|
||||||
|
p0040010 integer, -- Some Other Race alone
|
||||||
|
p0040011 integer, -- Two or More Races
|
||||||
|
p0040012 integer, -- Population of two races
|
||||||
|
p0040028 integer, -- Population of three races
|
||||||
|
p0040049 integer, -- Population of four races
|
||||||
|
p0040065 integer, -- Population of five races
|
||||||
|
p0040072 integer, -- Population of six races
|
||||||
|
|
||||||
|
-- This section is referred to as H1. OCCUPANCY STATUS
|
||||||
|
h0010001 integer, -- Total housing units
|
||||||
|
h0010002 integer, -- Occupied
|
||||||
|
h0010003 integer -- Vacant
|
||||||
|
);
|
||||||
|
|
||||||
|
SELECT * FROM us_counties_2010;
|
||||||
|
|
||||||
|
-- Listing 4-3: Importing Census data using COPY
|
||||||
|
-- Note! If you run into an import error here, be sure you downloaded the code and
|
||||||
|
-- data for the book according to the steps listed on page xxvii in the Introduction.
|
||||||
|
-- Windows users: Please check the Note on page xxvii as well.
|
||||||
|
|
||||||
|
COPY us_counties_2010
|
||||||
|
FROM 'C:\YourDirectory\us_counties_2010.csv'
|
||||||
|
WITH (FORMAT CSV, HEADER);
|
||||||
|
|
||||||
|
-- Checking the data
|
||||||
|
|
||||||
|
SELECT * FROM us_counties_2010;
|
||||||
|
|
||||||
|
SELECT geo_name, state_us_abbreviation, area_land
|
||||||
|
FROM us_counties_2010
|
||||||
|
ORDER BY area_land DESC
|
||||||
|
LIMIT 3;
|
||||||
|
|
||||||
|
SELECT geo_name, state_us_abbreviation, internal_point_lon
|
||||||
|
FROM us_counties_2010
|
||||||
|
ORDER BY internal_point_lon DESC
|
||||||
|
LIMIT 5;
|
||||||
|
|
||||||
|
|
||||||
|
-- Listing 4-4: Creating a table to track supervisor salaries
|
||||||
|
|
||||||
|
CREATE TABLE supervisor_salaries (
|
||||||
|
town varchar(30),
|
||||||
|
county varchar(30),
|
||||||
|
supervisor varchar(30),
|
||||||
|
start_date date,
|
||||||
|
salary money,
|
||||||
|
benefits money
|
||||||
|
);
|
||||||
|
|
||||||
|
-- Listing 4-5: Importing salaries data from CSV to three table columns
|
||||||
|
|
||||||
|
COPY supervisor_salaries (town, supervisor, salary)
|
||||||
|
FROM 'C:\YourDirectory\supervisor_salaries.csv'
|
||||||
|
WITH (FORMAT CSV, HEADER);
|
||||||
|
|
||||||
|
-- Check the data
|
||||||
|
SELECT * FROM supervisor_salaries LIMIT 2;
|
||||||
|
|
||||||
|
-- Listing 4-6 Use a temporary table to add a default value to a column during
|
||||||
|
-- import
|
||||||
|
|
||||||
|
DELETE FROM supervisor_salaries;
|
||||||
|
|
||||||
|
CREATE TEMPORARY TABLE supervisor_salaries_temp (LIKE supervisor_salaries);
|
||||||
|
|
||||||
|
COPY supervisor_salaries_temp (town, supervisor, salary)
|
||||||
|
FROM 'C:\YourDirectory\supervisor_salaries.csv'
|
||||||
|
WITH (FORMAT CSV, HEADER);
|
||||||
|
|
||||||
|
INSERT INTO supervisor_salaries (town, county, supervisor, salary)
|
||||||
|
SELECT town, 'Some County', supervisor, salary
|
||||||
|
FROM supervisor_salaries_temp;
|
||||||
|
|
||||||
|
DROP TABLE supervisor_salaries_temp;
|
||||||
|
|
||||||
|
-- Check the data
|
||||||
|
SELECT * FROM supervisor_salaries LIMIT 2;
|
||||||
|
|
||||||
|
-- Listing 4-7: Export an entire table with COPY
|
||||||
|
|
||||||
|
COPY us_counties_2010
|
||||||
|
TO 'C:\YourDirectory\us_counties_export.txt'
|
||||||
|
WITH (FORMAT CSV, HEADER, DELIMITER '|');
|
||||||
|
|
||||||
|
|
||||||
|
-- Listing 4-8: Exporting selected columns from a table with COPY
|
||||||
|
|
||||||
|
COPY us_counties_2010 (geo_name, internal_point_lat, internal_point_lon)
|
||||||
|
TO 'C:\YourDirectory\us_counties_latlon_export.txt'
|
||||||
|
WITH (FORMAT CSV, HEADER, DELIMITER '|');
|
||||||
|
|
||||||
|
-- Listing 4-9: Exporting query results with COPY
|
||||||
|
|
||||||
|
COPY (
|
||||||
|
SELECT geo_name, state_us_abbreviation
|
||||||
|
FROM us_counties_2010
|
||||||
|
WHERE geo_name ILIKE '%mill%'
|
||||||
|
)
|
||||||
|
TO 'C:\YourDirectory\us_counties_mill_export.txt'
|
||||||
|
WITH (FORMAT CSV, HEADER, DELIMITER '|');
|
||||||
207
Chapter_06/Chapter_06.sql
Normal file
207
Chapter_06/Chapter_06.sql
Normal file
@ -0,0 +1,207 @@
|
|||||||
|
--------------------------------------------------------------
|
||||||
|
-- Practical SQL: A Beginner's Guide to Storytelling with Data
|
||||||
|
-- by Anthony DeBarros
|
||||||
|
|
||||||
|
-- Chapter 5 Code Examples
|
||||||
|
--------------------------------------------------------------
|
||||||
|
|
||||||
|
-- Listing 5-1: Basic addition, subtraction and multiplication with SQL
|
||||||
|
|
||||||
|
SELECT 2 + 2; -- addition
|
||||||
|
SELECT 9 - 1; -- subtraction
|
||||||
|
SELECT 3 * 4; -- multiplication
|
||||||
|
|
||||||
|
-- Listing 5-2: Integer and decimal division with SQL
|
||||||
|
|
||||||
|
SELECT 11 / 6; -- integer division
|
||||||
|
SELECT 11 % 6; -- modulo division
|
||||||
|
SELECT 11.0 / 6; -- decimal division
|
||||||
|
SELECT CAST(11 AS numeric(3,1)) / 6;
|
||||||
|
|
||||||
|
-- Listing 5-3: Exponents, roots and factorials with SQL
|
||||||
|
|
||||||
|
SELECT 3 ^ 4; -- exponentiation
|
||||||
|
SELECT |/ 10; -- square root (operator)
|
||||||
|
SELECT sqrt(10); -- square root (function)
|
||||||
|
SELECT ||/ 10; -- cube root
|
||||||
|
SELECT 4 !; -- factorial
|
||||||
|
|
||||||
|
-- Order of operations
|
||||||
|
|
||||||
|
SELECT 7 + 8 * 9; -- answer: 79
|
||||||
|
SELECT (7 + 8) * 9; -- answer: 135
|
||||||
|
|
||||||
|
SELECT 3 ^ 3 - 1; -- answer: 26
|
||||||
|
SELECT 3 ^ (3 - 1); -- answer: 9
|
||||||
|
|
||||||
|
-- Listing 5-4: Selecting Census population columns by race with aliases
|
||||||
|
|
||||||
|
SELECT geo_name,
|
||||||
|
state_us_abbreviation AS "st",
|
||||||
|
p0010001 AS "Total Population",
|
||||||
|
p0010003 AS "White Alone",
|
||||||
|
p0010004 AS "Black or African American Alone",
|
||||||
|
p0010005 AS "Am Indian/Alaska Native Alone",
|
||||||
|
p0010006 AS "Asian Alone",
|
||||||
|
p0010007 AS "Native Hawaiian and Other Pacific Islander Alone",
|
||||||
|
p0010008 AS "Some Other Race Alone",
|
||||||
|
p0010009 AS "Two or More Races"
|
||||||
|
FROM us_counties_2010;
|
||||||
|
|
||||||
|
-- Listing 5-5: Adding two columns in us_counties_2010
|
||||||
|
|
||||||
|
SELECT geo_name,
|
||||||
|
state_us_abbreviation AS "st",
|
||||||
|
p0010003 AS "White Alone",
|
||||||
|
p0010004 AS "Black Alone",
|
||||||
|
p0010003 + p0010004 AS "Total White and Black"
|
||||||
|
FROM us_counties_2010;
|
||||||
|
|
||||||
|
-- Listing 5-6: Checking Census data totals
|
||||||
|
|
||||||
|
SELECT geo_name,
|
||||||
|
state_us_abbreviation AS "st",
|
||||||
|
p0010001 AS "Total",
|
||||||
|
p0010003 + p0010004 + p0010005 + p0010006 + p0010007
|
||||||
|
+ p0010008 + p0010009 AS "All Races",
|
||||||
|
(p0010003 + p0010004 + p0010005 + p0010006 + p0010007
|
||||||
|
+ p0010008 + p0010009) - p0010001 AS "Difference"
|
||||||
|
FROM us_counties_2010
|
||||||
|
ORDER BY "Difference" DESC;
|
||||||
|
|
||||||
|
-- Listing 5-7: Calculating the percent of the population that is
|
||||||
|
-- Asian by county (percent of the whole)
|
||||||
|
|
||||||
|
SELECT geo_name,
|
||||||
|
state_us_abbreviation AS "st",
|
||||||
|
(CAST(p0010006 AS numeric(8,1)) / p0010001) * 100 AS "pct_asian"
|
||||||
|
FROM us_counties_2010
|
||||||
|
ORDER BY "pct_asian" DESC;
|
||||||
|
|
||||||
|
-- Listing 5-8: Calculating percent change
|
||||||
|
|
||||||
|
CREATE TABLE percent_change (
|
||||||
|
department varchar(20),
|
||||||
|
spend_2014 numeric(10,2),
|
||||||
|
spend_2017 numeric(10,2)
|
||||||
|
);
|
||||||
|
|
||||||
|
INSERT INTO percent_change
|
||||||
|
VALUES
|
||||||
|
('Building', 250000, 289000),
|
||||||
|
('Assessor', 178556, 179500),
|
||||||
|
('Library', 87777, 90001),
|
||||||
|
('Clerk', 451980, 650000),
|
||||||
|
('Police', 250000, 223000),
|
||||||
|
('Recreation', 199000, 195000);
|
||||||
|
|
||||||
|
SELECT department,
|
||||||
|
spend_2014,
|
||||||
|
spend_2017,
|
||||||
|
round( (spend_2017 - spend_2014) /
|
||||||
|
spend_2014 * 100, 1 ) AS "pct_change"
|
||||||
|
FROM percent_change;
|
||||||
|
|
||||||
|
-- Listing 5-9: Using sum() and avg() aggregate functions
|
||||||
|
|
||||||
|
SELECT sum(p0010001) AS "County Sum",
|
||||||
|
round(avg(p0010001), 0) AS "County Average"
|
||||||
|
FROM us_counties_2010;
|
||||||
|
|
||||||
|
-- Listing 5-10: Testing SQL percentile functions
|
||||||
|
|
||||||
|
CREATE TABLE percentile_test (
|
||||||
|
numbers integer
|
||||||
|
);
|
||||||
|
|
||||||
|
INSERT INTO percentile_test (numbers) VALUES
|
||||||
|
(1), (2), (3), (4), (5), (6);
|
||||||
|
|
||||||
|
SELECT
|
||||||
|
percentile_cont(.5)
|
||||||
|
WITHIN GROUP (ORDER BY numbers),
|
||||||
|
percentile_disc(.5)
|
||||||
|
WITHIN GROUP (ORDER BY numbers)
|
||||||
|
FROM percentile_test;
|
||||||
|
|
||||||
|
-- Listing 5-11: Using sum(), avg(), and percentile_cont() aggregate functions
|
||||||
|
|
||||||
|
SELECT sum(p0010001) AS "County Sum",
|
||||||
|
round(avg(p0010001), 0) AS "County Average",
|
||||||
|
percentile_cont(.5)
|
||||||
|
WITHIN GROUP (ORDER BY p0010001) AS "County Median"
|
||||||
|
FROM us_counties_2010;
|
||||||
|
|
||||||
|
-- Listing 5-12: Passing an array of values to percentile_cont()
|
||||||
|
|
||||||
|
-- quartiles
|
||||||
|
SELECT percentile_cont(array[.25,.5,.75])
|
||||||
|
WITHIN GROUP (ORDER BY p0010001) AS "quartiles"
|
||||||
|
FROM us_counties_2010;
|
||||||
|
|
||||||
|
-- Extra:
|
||||||
|
-- quintiles
|
||||||
|
SELECT percentile_cont(array[.2,.4,.6,.8])
|
||||||
|
WITHIN GROUP (ORDER BY p0010001) AS "quintiles"
|
||||||
|
FROM us_counties_2010;
|
||||||
|
|
||||||
|
-- deciles
|
||||||
|
SELECT percentile_cont(array[.1,.2,.3,.4,.5,.6,.7,.8,.9])
|
||||||
|
WITHIN GROUP (ORDER BY p0010001) AS "deciles"
|
||||||
|
FROM us_counties_2010;
|
||||||
|
|
||||||
|
-- Listing 5-13: Using unnest() to turn an array into rows
|
||||||
|
|
||||||
|
SELECT unnest(
|
||||||
|
percentile_cont(array[.25,.5,.75])
|
||||||
|
WITHIN GROUP (ORDER BY p0010001)
|
||||||
|
) AS "quartiles"
|
||||||
|
FROM us_counties_2010;
|
||||||
|
|
||||||
|
-- Listing 5-14: Creating a median() aggregate function in PostgreSQL
|
||||||
|
-- Source: https://wiki.postgresql.org/wiki/Aggregate_Median
|
||||||
|
|
||||||
|
CREATE OR REPLACE FUNCTION _final_median(anyarray)
|
||||||
|
RETURNS float8 AS
|
||||||
|
$$
|
||||||
|
WITH q AS
|
||||||
|
(
|
||||||
|
SELECT val
|
||||||
|
FROM unnest($1) val
|
||||||
|
WHERE VAL IS NOT NULL
|
||||||
|
ORDER BY 1
|
||||||
|
),
|
||||||
|
cnt AS
|
||||||
|
(
|
||||||
|
SELECT COUNT(*) AS c FROM q
|
||||||
|
)
|
||||||
|
SELECT AVG(val)::float8
|
||||||
|
FROM
|
||||||
|
(
|
||||||
|
SELECT val FROM q
|
||||||
|
LIMIT 2 - MOD((SELECT c FROM cnt), 2)
|
||||||
|
OFFSET GREATEST(CEIL((SELECT c FROM cnt) / 2.0) - 1,0)
|
||||||
|
) q2;
|
||||||
|
$$
|
||||||
|
LANGUAGE sql IMMUTABLE;
|
||||||
|
|
||||||
|
CREATE AGGREGATE median(anyelement) (
|
||||||
|
SFUNC=array_append,
|
||||||
|
STYPE=anyarray,
|
||||||
|
FINALFUNC=_final_median,
|
||||||
|
INITCOND='{}'
|
||||||
|
);
|
||||||
|
|
||||||
|
-- Listing 5-15: Using a median() aggregate function
|
||||||
|
|
||||||
|
SELECT sum(p0010001) AS "County Sum",
|
||||||
|
round(avg(p0010001), 0) AS "County Average",
|
||||||
|
median(p0010001) AS "County Median",
|
||||||
|
percentile_cont(.5)
|
||||||
|
WITHIN GROUP (ORDER BY P0010001) AS "50th Percentile"
|
||||||
|
FROM us_counties_2010;
|
||||||
|
|
||||||
|
-- Listing 5-16: Finding the most-frequent value with mode()
|
||||||
|
|
||||||
|
SELECT mode() WITHIN GROUP (ORDER BY p0010001)
|
||||||
|
FROM us_counties_2010;
|
||||||
196
Chapter_07/Chapter_07.sql
Normal file
196
Chapter_07/Chapter_07.sql
Normal file
@ -0,0 +1,196 @@
|
|||||||
|
--------------------------------------------------------------
|
||||||
|
-- Practical SQL: A Beginner's Guide to Storytelling with Data
|
||||||
|
-- by Anthony DeBarros
|
||||||
|
|
||||||
|
-- Chapter 6 Code Examples
|
||||||
|
--------------------------------------------------------------
|
||||||
|
|
||||||
|
-- Listing 6-1: Creating the departments and employees tables
|
||||||
|
|
||||||
|
CREATE TABLE departments (
|
||||||
|
dept_id bigserial,
|
||||||
|
dept varchar(100),
|
||||||
|
city varchar(100),
|
||||||
|
CONSTRAINT dept_key PRIMARY KEY (dept_id),
|
||||||
|
CONSTRAINT dept_city_unique UNIQUE (dept, city)
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE TABLE employees (
|
||||||
|
emp_id bigserial,
|
||||||
|
first_name varchar(100),
|
||||||
|
last_name varchar(100),
|
||||||
|
salary integer,
|
||||||
|
dept_id integer REFERENCES departments (dept_id),
|
||||||
|
CONSTRAINT emp_key PRIMARY KEY (emp_id),
|
||||||
|
CONSTRAINT emp_dept_unique UNIQUE (emp_id, dept_id)
|
||||||
|
);
|
||||||
|
|
||||||
|
INSERT INTO departments (dept, city)
|
||||||
|
VALUES
|
||||||
|
('Tax', 'Atlanta'),
|
||||||
|
('IT', 'Boston');
|
||||||
|
|
||||||
|
INSERT INTO employees (first_name, last_name, salary, dept_id)
|
||||||
|
VALUES
|
||||||
|
('Nancy', 'Jones', 62500, 1),
|
||||||
|
('Lee', 'Smith', 59300, 1),
|
||||||
|
('Soo', 'Nguyen', 83000, 2),
|
||||||
|
('Janet', 'King', 95000, 2);
|
||||||
|
|
||||||
|
-- Listing 6-2: Joining the employees and departments tables
|
||||||
|
|
||||||
|
SELECT *
|
||||||
|
FROM employees JOIN departments
|
||||||
|
ON employees.dept_id = departments.dept_id;
|
||||||
|
|
||||||
|
-- Listing 6-3: Creating two tables to explore JOIN types
|
||||||
|
|
||||||
|
CREATE TABLE schools_left (
|
||||||
|
id integer CONSTRAINT left_id_key PRIMARY KEY,
|
||||||
|
left_school varchar(30)
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE TABLE schools_right (
|
||||||
|
id integer CONSTRAINT right_id_key PRIMARY KEY,
|
||||||
|
right_school varchar(30)
|
||||||
|
);
|
||||||
|
|
||||||
|
INSERT INTO schools_left (id, left_school) VALUES
|
||||||
|
(1, 'Oak Street School'),
|
||||||
|
(2, 'Roosevelt High School'),
|
||||||
|
(5, 'Washington Middle School'),
|
||||||
|
(6, 'Jefferson High School');
|
||||||
|
|
||||||
|
INSERT INTO schools_right (id, right_school) VALUES
|
||||||
|
(1, 'Oak Street School'),
|
||||||
|
(2, 'Roosevelt High School'),
|
||||||
|
(3, 'Morrison Elementary'),
|
||||||
|
(4, 'Chase Magnet Academy'),
|
||||||
|
(6, 'Jefferson High School');
|
||||||
|
|
||||||
|
-- Listing 6-4: Using JOIN
|
||||||
|
|
||||||
|
SELECT *
|
||||||
|
FROM schools_left JOIN schools_right
|
||||||
|
ON schools_left.id = schools_right.id;
|
||||||
|
|
||||||
|
-- Bonus: Also can be specified as INNER JOIN
|
||||||
|
|
||||||
|
SELECT *
|
||||||
|
FROM schools_left INNER JOIN schools_right
|
||||||
|
ON schools_left.id = schools_right.id;
|
||||||
|
|
||||||
|
-- Listing 6-5: Using LEFT JOIN
|
||||||
|
|
||||||
|
SELECT *
|
||||||
|
FROM schools_left LEFT JOIN schools_right
|
||||||
|
ON schools_left.id = schools_right.id;
|
||||||
|
|
||||||
|
-- Listing 6-6: Using RIGHT JOIN
|
||||||
|
|
||||||
|
SELECT *
|
||||||
|
FROM schools_left RIGHT JOIN schools_right
|
||||||
|
ON schools_left.id = schools_right.id;
|
||||||
|
|
||||||
|
-- Listing 6-7: Using FULL OUTER JOIN
|
||||||
|
|
||||||
|
SELECT *
|
||||||
|
FROM schools_left FULL OUTER JOIN schools_right
|
||||||
|
ON schools_left.id = schools_right.id;
|
||||||
|
|
||||||
|
-- Listing 6-8: Using CROSS JOIN
|
||||||
|
|
||||||
|
SELECT *
|
||||||
|
FROM schools_left CROSS JOIN schools_right;
|
||||||
|
|
||||||
|
-- Listing 6-9: Filtering to show missing values with IS NULL
|
||||||
|
|
||||||
|
SELECT *
|
||||||
|
FROM schools_left LEFT JOIN schools_right
|
||||||
|
ON schools_left.id = schools_right.id
|
||||||
|
WHERE schools_right.id IS NULL;
|
||||||
|
|
||||||
|
-- Listing 6-10: Querying specific columns in a join
|
||||||
|
SELECT schools_left.id,
|
||||||
|
schools_left.left_school,
|
||||||
|
schools_right.right_school
|
||||||
|
FROM schools_left LEFT JOIN schools_right
|
||||||
|
ON schools_left.id = schools_right.id;
|
||||||
|
|
||||||
|
-- Listing 6-11: Simplifying code with table aliases
|
||||||
|
SELECT lt.id,
|
||||||
|
lt.left_school,
|
||||||
|
rt.right_school
|
||||||
|
FROM schools_left AS lt LEFT JOIN schools_right AS rt
|
||||||
|
ON lt.id = rt.id;
|
||||||
|
|
||||||
|
-- Listing 6-12: Joining multiple tables
|
||||||
|
CREATE TABLE schools_enrollment (
|
||||||
|
id integer,
|
||||||
|
enrollment integer
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE TABLE schools_grades (
|
||||||
|
id integer,
|
||||||
|
grades varchar(10)
|
||||||
|
);
|
||||||
|
|
||||||
|
INSERT INTO schools_enrollment (id, enrollment)
|
||||||
|
VALUES
|
||||||
|
(1, 360),
|
||||||
|
(2, 1001),
|
||||||
|
(5, 450),
|
||||||
|
(6, 927);
|
||||||
|
|
||||||
|
INSERT INTO schools_grades (id, grades)
|
||||||
|
VALUES
|
||||||
|
(1, 'K-3'),
|
||||||
|
(2, '9-12'),
|
||||||
|
(5, '6-8'),
|
||||||
|
(6, '9-12');
|
||||||
|
|
||||||
|
SELECT lt.id, lt.left_school, en.enrollment, gr.grades
|
||||||
|
FROM schools_left AS lt LEFT JOIN schools_enrollment AS en
|
||||||
|
ON lt.id = en.id
|
||||||
|
LEFT JOIN schools_grades AS gr
|
||||||
|
ON lt.id = gr.id;
|
||||||
|
|
||||||
|
-- Listing 6-13: Performing math on joined Census tables
|
||||||
|
-- Decennial Census 2000. Full data dictionary at https://www.census.gov/prod/cen2000/doc/pl94-171.pdf
|
||||||
|
-- Note: Some non-number columns have been given more descriptive names
|
||||||
|
|
||||||
|
CREATE TABLE us_counties_2000 (
|
||||||
|
geo_name varchar(90), -- County/state name,
|
||||||
|
state_us_abbreviation varchar(2), -- State/U.S. abbreviation
|
||||||
|
state_fips varchar(2), -- State FIPS code
|
||||||
|
county_fips varchar(3), -- County code
|
||||||
|
p0010001 integer, -- Total population
|
||||||
|
p0010002 integer, -- Population of one race:
|
||||||
|
p0010003 integer, -- White Alone
|
||||||
|
p0010004 integer, -- Black or African American alone
|
||||||
|
p0010005 integer, -- American Indian and Alaska Native alone
|
||||||
|
p0010006 integer, -- Asian alone
|
||||||
|
p0010007 integer, -- Native Hawaiian and Other Pacific Islander alone
|
||||||
|
p0010008 integer, -- Some Other Race alone
|
||||||
|
p0010009 integer, -- Population of two or more races
|
||||||
|
p0010010 integer, -- Population of two races
|
||||||
|
p0020002 integer, -- Hispanic or Latino
|
||||||
|
p0020003 integer -- Not Hispanic or Latino:
|
||||||
|
);
|
||||||
|
|
||||||
|
COPY us_counties_2000
|
||||||
|
FROM 'C:\YourDirectory\us_counties_2000.csv'
|
||||||
|
WITH (FORMAT CSV, HEADER);
|
||||||
|
|
||||||
|
SELECT c2010.geo_name,
|
||||||
|
c2010.state_us_abbreviation AS state,
|
||||||
|
c2010.p0010001 AS pop_2010,
|
||||||
|
c2000.p0010001 AS pop_2000,
|
||||||
|
c2010.p0010001 - c2000.p0010001 AS raw_change,
|
||||||
|
round( (CAST(c2010.p0010001 AS numeric(8,1)) - c2000.p0010001)
|
||||||
|
/ c2000.p0010001 * 100, 1 ) AS pct_change
|
||||||
|
FROM us_counties_2010 c2010 INNER JOIN us_counties_2000 c2000
|
||||||
|
ON c2010.state_fips = c2000.state_fips
|
||||||
|
AND c2010.county_fips = c2000.county_fips
|
||||||
|
AND c2010.p0010001 <> c2000.p0010001
|
||||||
|
ORDER BY pct_change DESC;
|
||||||
184
Chapter_08/Chapter_08.sql
Normal file
184
Chapter_08/Chapter_08.sql
Normal file
@ -0,0 +1,184 @@
|
|||||||
|
--------------------------------------------------------------
|
||||||
|
-- Practical SQL: A Beginner's Guide to Storytelling with Data
|
||||||
|
-- by Anthony DeBarros
|
||||||
|
|
||||||
|
-- Chapter 7 Code Examples
|
||||||
|
--------------------------------------------------------------
|
||||||
|
|
||||||
|
-- Listing 7-1: Declaring a single-column natural key as primary key
|
||||||
|
|
||||||
|
-- As a column constraint
|
||||||
|
CREATE TABLE natural_key_example (
|
||||||
|
license_id varchar(10) CONSTRAINT license_key PRIMARY KEY,
|
||||||
|
first_name varchar(50),
|
||||||
|
last_name varchar(50)
|
||||||
|
);
|
||||||
|
|
||||||
|
-- Drop the table before trying again
|
||||||
|
DROP TABLE natural_key_example;
|
||||||
|
|
||||||
|
-- As a table constraint
|
||||||
|
CREATE TABLE natural_key_example (
|
||||||
|
license_id varchar(10),
|
||||||
|
first_name varchar(50),
|
||||||
|
last_name varchar(50),
|
||||||
|
CONSTRAINT license_key PRIMARY KEY (license_id)
|
||||||
|
);
|
||||||
|
|
||||||
|
-- Listing 7-2: Example of a primary key violation
|
||||||
|
INSERT INTO natural_key_example (license_id, first_name, last_name)
|
||||||
|
VALUES ('T229901', 'Lynn', 'Malero');
|
||||||
|
|
||||||
|
INSERT INTO natural_key_example (license_id, first_name, last_name)
|
||||||
|
VALUES ('T229901', 'Sam', 'Tracy');
|
||||||
|
|
||||||
|
-- Listing 7-3: Declaring a composite primary key as a natural key
|
||||||
|
CREATE TABLE natural_key_composite_example (
|
||||||
|
student_id varchar(10),
|
||||||
|
school_day date,
|
||||||
|
present boolean,
|
||||||
|
CONSTRAINT student_key PRIMARY KEY (student_id, school_day)
|
||||||
|
);
|
||||||
|
|
||||||
|
-- Listing 7-4: Example of a composite primary key violation
|
||||||
|
|
||||||
|
INSERT INTO natural_key_composite_example (student_id, school_day, present)
|
||||||
|
VALUES(775, '1/22/2017', 'Y');
|
||||||
|
|
||||||
|
INSERT INTO natural_key_composite_example (student_id, school_day, present)
|
||||||
|
VALUES(775, '1/23/2017', 'Y');
|
||||||
|
|
||||||
|
INSERT INTO natural_key_composite_example (student_id, school_day, present)
|
||||||
|
VALUES(775, '1/23/2017', 'N');
|
||||||
|
|
||||||
|
-- Listing 7-5: Declaring a bigserial column as a surrogate key
|
||||||
|
|
||||||
|
CREATE TABLE surrogate_key_example (
|
||||||
|
order_number bigserial,
|
||||||
|
product_name varchar(50),
|
||||||
|
order_date date,
|
||||||
|
CONSTRAINT order_key PRIMARY KEY (order_number)
|
||||||
|
);
|
||||||
|
|
||||||
|
INSERT INTO surrogate_key_example (product_name, order_date)
|
||||||
|
VALUES ('Beachball Polish', '2015-03-17'),
|
||||||
|
('Wrinkle De-Atomizer', '2017-05-22'),
|
||||||
|
('Flux Capacitor', '1985-10-26');
|
||||||
|
|
||||||
|
SELECT * FROM surrogate_key_example;
|
||||||
|
|
||||||
|
-- Listing 7-6: A foreign key example
|
||||||
|
|
||||||
|
CREATE TABLE licenses (
|
||||||
|
license_id varchar(10),
|
||||||
|
first_name varchar(50),
|
||||||
|
last_name varchar(50),
|
||||||
|
CONSTRAINT licenses_key PRIMARY KEY (license_id)
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE TABLE registrations (
|
||||||
|
registration_id varchar(10),
|
||||||
|
registration_date date,
|
||||||
|
license_id varchar(10) REFERENCES licenses (license_id),
|
||||||
|
CONSTRAINT registration_key PRIMARY KEY (registration_id, license_id)
|
||||||
|
);
|
||||||
|
|
||||||
|
INSERT INTO licenses (license_id, first_name, last_name)
|
||||||
|
VALUES ('T229901', 'Lynn', 'Malero');
|
||||||
|
|
||||||
|
INSERT INTO registrations (registration_id, registration_date, license_id)
|
||||||
|
VALUES ('A203391', '3/17/2017', 'T229901');
|
||||||
|
|
||||||
|
INSERT INTO registrations (registration_id, registration_date, license_id)
|
||||||
|
VALUES ('A75772', '3/17/2017', 'T000001');
|
||||||
|
|
||||||
|
-- Listing 7-7: CHECK constraint examples
|
||||||
|
|
||||||
|
CREATE TABLE check_constraint_example (
|
||||||
|
user_id bigserial,
|
||||||
|
user_role varchar(50),
|
||||||
|
salary integer,
|
||||||
|
CONSTRAINT user_id_key PRIMARY KEY (user_id),
|
||||||
|
CONSTRAINT check_role_in_list CHECK (user_role IN('Admin', 'Staff')),
|
||||||
|
CONSTRAINT check_salary_not_zero CHECK (salary > 0)
|
||||||
|
);
|
||||||
|
|
||||||
|
-- Both of these will fail:
|
||||||
|
INSERT INTO check_constraint_example (user_role)
|
||||||
|
VALUES ('admin');
|
||||||
|
|
||||||
|
INSERT INTO check_constraint_example (salary)
|
||||||
|
VALUES (0);
|
||||||
|
|
||||||
|
-- Listing 7-8: UNIQUE constraint example
|
||||||
|
|
||||||
|
CREATE TABLE unique_constraint_example (
|
||||||
|
contact_id bigserial CONSTRAINT contact_id_key PRIMARY KEY,
|
||||||
|
first_name varchar(50),
|
||||||
|
last_name varchar(50),
|
||||||
|
email varchar(200),
|
||||||
|
CONSTRAINT email_unique UNIQUE (email)
|
||||||
|
);
|
||||||
|
|
||||||
|
INSERT INTO unique_constraint_example (first_name, last_name, email)
|
||||||
|
VALUES ('Samantha', 'Lee', 'slee@example.org');
|
||||||
|
|
||||||
|
INSERT INTO unique_constraint_example (first_name, last_name, email)
|
||||||
|
VALUES ('Betty', 'Diaz', 'bdiaz@example.org');
|
||||||
|
|
||||||
|
INSERT INTO unique_constraint_example (first_name, last_name, email)
|
||||||
|
VALUES ('Sasha', 'Lee', 'slee@example.org');
|
||||||
|
|
||||||
|
-- Listing 7-9: NOT NULL constraint example
|
||||||
|
|
||||||
|
CREATE TABLE not_null_example (
|
||||||
|
student_id bigserial,
|
||||||
|
first_name varchar(50) NOT NULL,
|
||||||
|
last_name varchar(50) NOT NULL,
|
||||||
|
CONSTRAINT student_id_key PRIMARY KEY (student_id)
|
||||||
|
);
|
||||||
|
|
||||||
|
-- Listing 7-10: Dropping and adding a primary key and a NOT NULL constraint
|
||||||
|
|
||||||
|
-- Drop
|
||||||
|
ALTER TABLE not_null_example DROP CONSTRAINT student_id_key;
|
||||||
|
|
||||||
|
-- Add
|
||||||
|
ALTER TABLE not_null_example ADD CONSTRAINT student_id_key PRIMARY KEY (student_id);
|
||||||
|
|
||||||
|
-- Drop
|
||||||
|
ALTER TABLE not_null_example ALTER COLUMN first_name DROP NOT NULL;
|
||||||
|
|
||||||
|
-- Add
|
||||||
|
ALTER TABLE not_null_example ALTER COLUMN first_name SET NOT NULL;
|
||||||
|
|
||||||
|
-- Listing 7-11: Importing New York City address data
|
||||||
|
|
||||||
|
CREATE TABLE new_york_addresses (
|
||||||
|
longitude numeric(9,6),
|
||||||
|
latitude numeric(9,6),
|
||||||
|
street_number varchar(10),
|
||||||
|
street varchar(32),
|
||||||
|
unit varchar(7),
|
||||||
|
postcode varchar(5),
|
||||||
|
id integer CONSTRAINT new_york_key PRIMARY KEY
|
||||||
|
);
|
||||||
|
|
||||||
|
COPY new_york_addresses
|
||||||
|
FROM 'C:\YourDirectory\city_of_new_york.csv'
|
||||||
|
WITH (FORMAT CSV, HEADER);
|
||||||
|
|
||||||
|
-- Listing 7-12: Benchmark queries for index performance
|
||||||
|
|
||||||
|
EXPLAIN ANALYZE SELECT * FROM new_york_addresses
|
||||||
|
WHERE street = 'BROADWAY';
|
||||||
|
|
||||||
|
EXPLAIN ANALYZE SELECT * FROM new_york_addresses
|
||||||
|
WHERE street = '52 STREET';
|
||||||
|
|
||||||
|
EXPLAIN ANALYZE SELECT * FROM new_york_addresses
|
||||||
|
WHERE street = 'ZWICKY AVENUE';
|
||||||
|
|
||||||
|
-- Listing 7-13: Creating a B-Tree index on the new_york_addresses table
|
||||||
|
|
||||||
|
CREATE INDEX street_idx ON new_york_addresses (street);
|
||||||
299
Chapter_09/Chapter_09.sql
Normal file
299
Chapter_09/Chapter_09.sql
Normal file
@ -0,0 +1,299 @@
|
|||||||
|
--------------------------------------------------------------
|
||||||
|
-- Practical SQL: A Beginner's Guide to Storytelling with Data
|
||||||
|
-- by Anthony DeBarros
|
||||||
|
|
||||||
|
-- Chapter 8 Code Examples
|
||||||
|
--------------------------------------------------------------
|
||||||
|
|
||||||
|
-- Listing 8-1: Creating and filling the 2014 Public Libraries Survey table
|
||||||
|
|
||||||
|
CREATE TABLE pls_fy2014_pupld14a (
|
||||||
|
stabr varchar(2) NOT NULL,
|
||||||
|
fscskey varchar(6) CONSTRAINT fscskey2014_key PRIMARY KEY,
|
||||||
|
libid varchar(20) NOT NULL,
|
||||||
|
libname varchar(100) NOT NULL,
|
||||||
|
obereg varchar(2) NOT NULL,
|
||||||
|
rstatus integer NOT NULL,
|
||||||
|
statstru varchar(2) NOT NULL,
|
||||||
|
statname varchar(2) NOT NULL,
|
||||||
|
stataddr varchar(2) NOT NULL,
|
||||||
|
longitud numeric(10,7) NOT NULL,
|
||||||
|
latitude numeric(10,7) NOT NULL,
|
||||||
|
fipsst varchar(2) NOT NULL,
|
||||||
|
fipsco varchar(3) NOT NULL,
|
||||||
|
address varchar(35) NOT NULL,
|
||||||
|
city varchar(20) NOT NULL,
|
||||||
|
zip varchar(5) NOT NULL,
|
||||||
|
zip4 varchar(4) NOT NULL,
|
||||||
|
cnty varchar(20) NOT NULL,
|
||||||
|
phone varchar(10) NOT NULL,
|
||||||
|
c_relatn varchar(2) NOT NULL,
|
||||||
|
c_legbas varchar(2) NOT NULL,
|
||||||
|
c_admin varchar(2) NOT NULL,
|
||||||
|
geocode varchar(3) NOT NULL,
|
||||||
|
lsabound varchar(1) NOT NULL,
|
||||||
|
startdat varchar(10),
|
||||||
|
enddate varchar(10),
|
||||||
|
popu_lsa integer NOT NULL,
|
||||||
|
centlib integer NOT NULL,
|
||||||
|
branlib integer NOT NULL,
|
||||||
|
bkmob integer NOT NULL,
|
||||||
|
master numeric(8,2) NOT NULL,
|
||||||
|
libraria numeric(8,2) NOT NULL,
|
||||||
|
totstaff numeric(8,2) NOT NULL,
|
||||||
|
locgvt integer NOT NULL,
|
||||||
|
stgvt integer NOT NULL,
|
||||||
|
fedgvt integer NOT NULL,
|
||||||
|
totincm integer NOT NULL,
|
||||||
|
salaries integer,
|
||||||
|
benefit integer,
|
||||||
|
staffexp integer,
|
||||||
|
prmatexp integer NOT NULL,
|
||||||
|
elmatexp integer NOT NULL,
|
||||||
|
totexpco integer NOT NULL,
|
||||||
|
totopexp integer NOT NULL,
|
||||||
|
lcap_rev integer NOT NULL,
|
||||||
|
scap_rev integer NOT NULL,
|
||||||
|
fcap_rev integer NOT NULL,
|
||||||
|
cap_rev integer NOT NULL,
|
||||||
|
capital integer NOT NULL,
|
||||||
|
bkvol integer NOT NULL,
|
||||||
|
ebook integer NOT NULL,
|
||||||
|
audio_ph integer NOT NULL,
|
||||||
|
audio_dl float NOT NULL,
|
||||||
|
video_ph integer NOT NULL,
|
||||||
|
video_dl float NOT NULL,
|
||||||
|
databases integer NOT NULL,
|
||||||
|
subscrip integer NOT NULL,
|
||||||
|
hrs_open integer NOT NULL,
|
||||||
|
visits integer NOT NULL,
|
||||||
|
referenc integer NOT NULL,
|
||||||
|
regbor integer NOT NULL,
|
||||||
|
totcir integer NOT NULL,
|
||||||
|
kidcircl integer NOT NULL,
|
||||||
|
elmatcir integer NOT NULL,
|
||||||
|
loanto integer NOT NULL,
|
||||||
|
loanfm integer NOT NULL,
|
||||||
|
totpro integer NOT NULL,
|
||||||
|
totatten integer NOT NULL,
|
||||||
|
gpterms integer NOT NULL,
|
||||||
|
pitusr integer NOT NULL,
|
||||||
|
wifisess integer NOT NULL,
|
||||||
|
yr_sub integer NOT NULL
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE INDEX libname2014_idx ON pls_fy2014_pupld14a (libname);
|
||||||
|
CREATE INDEX stabr2014_idx ON pls_fy2014_pupld14a (stabr);
|
||||||
|
CREATE INDEX city2014_idx ON pls_fy2014_pupld14a (city);
|
||||||
|
CREATE INDEX visits2014_idx ON pls_fy2014_pupld14a (visits);
|
||||||
|
|
||||||
|
COPY pls_fy2014_pupld14a
|
||||||
|
FROM 'C:\YourDirectory\pls_fy2014_pupld14a.csv'
|
||||||
|
WITH (FORMAT CSV, HEADER);
|
||||||
|
|
||||||
|
-- Listing 8-2: Creating and filling the 2009 Public Libraries Survey table
|
||||||
|
|
||||||
|
CREATE TABLE pls_fy2009_pupld09a (
|
||||||
|
stabr varchar(2) NOT NULL,
|
||||||
|
fscskey varchar(6) CONSTRAINT fscskey2009_key PRIMARY KEY,
|
||||||
|
libid varchar(20) NOT NULL,
|
||||||
|
libname varchar(100) NOT NULL,
|
||||||
|
address varchar(35) NOT NULL,
|
||||||
|
city varchar(20) NOT NULL,
|
||||||
|
zip varchar(5) NOT NULL,
|
||||||
|
zip4 varchar(4) NOT NULL,
|
||||||
|
cnty varchar(20) NOT NULL,
|
||||||
|
phone varchar(10) NOT NULL,
|
||||||
|
c_relatn varchar(2) NOT NULL,
|
||||||
|
c_legbas varchar(2) NOT NULL,
|
||||||
|
c_admin varchar(2) NOT NULL,
|
||||||
|
geocode varchar(3) NOT NULL,
|
||||||
|
lsabound varchar(1) NOT NULL,
|
||||||
|
startdat varchar(10),
|
||||||
|
enddate varchar(10),
|
||||||
|
popu_lsa integer NOT NULL,
|
||||||
|
centlib integer NOT NULL,
|
||||||
|
branlib integer NOT NULL,
|
||||||
|
bkmob integer NOT NULL,
|
||||||
|
master numeric(8,2) NOT NULL,
|
||||||
|
libraria numeric(8,2) NOT NULL,
|
||||||
|
totstaff numeric(8,2) NOT NULL,
|
||||||
|
locgvt integer NOT NULL,
|
||||||
|
stgvt integer NOT NULL,
|
||||||
|
fedgvt integer NOT NULL,
|
||||||
|
totincm integer NOT NULL,
|
||||||
|
salaries integer,
|
||||||
|
benefit integer,
|
||||||
|
staffexp integer,
|
||||||
|
prmatexp integer NOT NULL,
|
||||||
|
elmatexp integer NOT NULL,
|
||||||
|
totexpco integer NOT NULL,
|
||||||
|
totopexp integer NOT NULL,
|
||||||
|
lcap_rev integer NOT NULL,
|
||||||
|
scap_rev integer NOT NULL,
|
||||||
|
fcap_rev integer NOT NULL,
|
||||||
|
cap_rev integer NOT NULL,
|
||||||
|
capital integer NOT NULL,
|
||||||
|
bkvol integer NOT NULL,
|
||||||
|
ebook integer NOT NULL,
|
||||||
|
audio integer NOT NULL,
|
||||||
|
video integer NOT NULL,
|
||||||
|
databases integer NOT NULL,
|
||||||
|
subscrip integer NOT NULL,
|
||||||
|
hrs_open integer NOT NULL,
|
||||||
|
visits integer NOT NULL,
|
||||||
|
referenc integer NOT NULL,
|
||||||
|
regbor integer NOT NULL,
|
||||||
|
totcir integer NOT NULL,
|
||||||
|
kidcircl integer NOT NULL,
|
||||||
|
loanto integer NOT NULL,
|
||||||
|
loanfm integer NOT NULL,
|
||||||
|
totpro integer NOT NULL,
|
||||||
|
totatten integer NOT NULL,
|
||||||
|
gpterms integer NOT NULL,
|
||||||
|
pitusr integer NOT NULL,
|
||||||
|
yr_sub integer NOT NULL,
|
||||||
|
obereg varchar(2) NOT NULL,
|
||||||
|
rstatus integer NOT NULL,
|
||||||
|
statstru varchar(2) NOT NULL,
|
||||||
|
statname varchar(2) NOT NULL,
|
||||||
|
stataddr varchar(2) NOT NULL,
|
||||||
|
longitud numeric(10,7) NOT NULL,
|
||||||
|
latitude numeric(10,7) NOT NULL,
|
||||||
|
fipsst varchar(2) NOT NULL,
|
||||||
|
fipsco varchar(3) NOT NULL
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE INDEX libname2009_idx ON pls_fy2009_pupld09a (libname);
|
||||||
|
CREATE INDEX stabr2009_idx ON pls_fy2009_pupld09a (stabr);
|
||||||
|
CREATE INDEX city2009_idx ON pls_fy2009_pupld09a (city);
|
||||||
|
CREATE INDEX visits2009_idx ON pls_fy2009_pupld09a (visits);
|
||||||
|
|
||||||
|
COPY pls_fy2009_pupld09a
|
||||||
|
FROM 'C:\YourDirectory\pls_fy2009_pupld09a.csv'
|
||||||
|
WITH (FORMAT CSV, HEADER);
|
||||||
|
|
||||||
|
-- Listing 8-3: Using count() for table row counts
|
||||||
|
|
||||||
|
SELECT count(*)
|
||||||
|
FROM pls_fy2014_pupld14a;
|
||||||
|
|
||||||
|
SELECT count(*)
|
||||||
|
FROM pls_fy2009_pupld09a;
|
||||||
|
|
||||||
|
-- Listing 8-4: Using count() for the number of values in a column
|
||||||
|
|
||||||
|
SELECT count(salaries)
|
||||||
|
FROM pls_fy2014_pupld14a;
|
||||||
|
|
||||||
|
-- Listing 8-5: Using count() for the number of distinct values in a column
|
||||||
|
|
||||||
|
SELECT count(libname)
|
||||||
|
FROM pls_fy2014_pupld14a;
|
||||||
|
|
||||||
|
SELECT count(DISTINCT libname)
|
||||||
|
FROM pls_fy2014_pupld14a;
|
||||||
|
|
||||||
|
-- Bonus: find duplicate libnames
|
||||||
|
SELECT libname, count(libname)
|
||||||
|
FROM pls_fy2014_pupld14a
|
||||||
|
GROUP BY libname
|
||||||
|
ORDER BY count(libname) DESC;
|
||||||
|
|
||||||
|
-- Bonus: see location of every Oxford Public Library
|
||||||
|
SELECT libname, city, stabr
|
||||||
|
FROM pls_fy2014_pupld14a
|
||||||
|
WHERE libname = 'OXFORD PUBLIC LIBRARY';
|
||||||
|
|
||||||
|
-- Listing 8-6: Finding the most and fewest visits using max() and min()
|
||||||
|
SELECT max(visits), min(visits)
|
||||||
|
FROM pls_fy2014_pupld14a;
|
||||||
|
|
||||||
|
-- Listing 8-7: Using GROUP BY on the stabr column
|
||||||
|
|
||||||
|
-- There are 56 in 2014.
|
||||||
|
SELECT stabr
|
||||||
|
FROM pls_fy2014_pupld14a
|
||||||
|
GROUP BY stabr
|
||||||
|
ORDER BY stabr;
|
||||||
|
|
||||||
|
-- Bonus: there are 55 in 2009.
|
||||||
|
SELECT stabr
|
||||||
|
FROM pls_fy2009_pupld09a
|
||||||
|
GROUP BY stabr
|
||||||
|
ORDER BY stabr;
|
||||||
|
|
||||||
|
-- Listing 8-8: Using GROUP BY on the city and stabr columns
|
||||||
|
|
||||||
|
SELECT city, stabr
|
||||||
|
FROM pls_fy2014_pupld14a
|
||||||
|
GROUP BY city, stabr
|
||||||
|
ORDER BY city, stabr;
|
||||||
|
|
||||||
|
-- Bonus: We can count some of the combos
|
||||||
|
SELECT city, stabr, count(*)
|
||||||
|
FROM pls_fy2014_pupld14a
|
||||||
|
GROUP BY city, stabr
|
||||||
|
ORDER BY count(*) DESC;
|
||||||
|
|
||||||
|
-- Listing 8-9: GROUP BY with count() on the stabr column
|
||||||
|
|
||||||
|
SELECT stabr, count(*)
|
||||||
|
FROM pls_fy2014_pupld14a
|
||||||
|
GROUP BY stabr
|
||||||
|
ORDER BY count(*) DESC;
|
||||||
|
|
||||||
|
-- Listing 8-10: GROUP BY with count() on the stabr and stataddr columns
|
||||||
|
|
||||||
|
SELECT stabr, stataddr, count(*)
|
||||||
|
FROM pls_fy2014_pupld14a
|
||||||
|
GROUP BY stabr, stataddr
|
||||||
|
ORDER BY stabr ASC, count(*) DESC;
|
||||||
|
|
||||||
|
-- Listing 8-11: Using the sum() aggregate function to total visits to
|
||||||
|
-- libraries in 2014 and 2009
|
||||||
|
|
||||||
|
-- 2014
|
||||||
|
SELECT sum(visits) AS visits_2014
|
||||||
|
FROM pls_fy2014_pupld14a
|
||||||
|
WHERE visits >= 0;
|
||||||
|
|
||||||
|
-- 2009
|
||||||
|
SELECT sum(visits) AS visits_2009
|
||||||
|
FROM pls_fy2009_pupld09a
|
||||||
|
WHERE visits >= 0;
|
||||||
|
|
||||||
|
-- Listing 8-12: Using sum() to total visits on joined 2014 and 2009 library tables
|
||||||
|
|
||||||
|
SELECT sum(pls14.visits) AS visits_2014,
|
||||||
|
sum(pls09.visits) AS visits_2009
|
||||||
|
FROM pls_fy2014_pupld14a pls14 JOIN pls_fy2009_pupld09a pls09
|
||||||
|
ON pls14.fscskey = pls09.fscskey
|
||||||
|
WHERE pls14.visits >= 0 AND pls09.visits >= 0;
|
||||||
|
|
||||||
|
-- Listing 8-13: Using GROUP BY to track percent change in library visits by state
|
||||||
|
|
||||||
|
SELECT pls14.stabr,
|
||||||
|
sum(pls14.visits) AS visits_2014,
|
||||||
|
sum(pls09.visits) AS visits_2009,
|
||||||
|
round( (CAST(sum(pls14.visits) AS decimal(10,1)) - sum(pls09.visits)) /
|
||||||
|
sum(pls09.visits) * 100, 2 ) AS pct_change
|
||||||
|
FROM pls_fy2014_pupld14a pls14 JOIN pls_fy2009_pupld09a pls09
|
||||||
|
ON pls14.fscskey = pls09.fscskey
|
||||||
|
WHERE pls14.visits >= 0 AND pls09.visits >= 0
|
||||||
|
GROUP BY pls14.stabr
|
||||||
|
ORDER BY pct_change DESC;
|
||||||
|
|
||||||
|
-- Listing 8-14: Using HAVING to filter the results of an aggregate query
|
||||||
|
|
||||||
|
SELECT pls14.stabr,
|
||||||
|
sum(pls14.visits) AS visits_2014,
|
||||||
|
sum(pls09.visits) AS visits_2009,
|
||||||
|
round( (CAST(sum(pls14.visits) AS decimal(10,1)) - sum(pls09.visits)) /
|
||||||
|
sum(pls09.visits) * 100, 2 ) AS pct_change
|
||||||
|
FROM pls_fy2014_pupld14a pls14 JOIN pls_fy2009_pupld09a pls09
|
||||||
|
ON pls14.fscskey = pls09.fscskey
|
||||||
|
WHERE pls14.visits >= 0 AND pls09.visits >= 0
|
||||||
|
GROUP BY pls14.stabr
|
||||||
|
HAVING sum(pls14.visits) > 50000000
|
||||||
|
ORDER BY pct_change DESC;
|
||||||
259
Chapter_10/Chapter_10.sql
Normal file
259
Chapter_10/Chapter_10.sql
Normal file
@ -0,0 +1,259 @@
|
|||||||
|
--------------------------------------------------------------
|
||||||
|
-- Practical SQL: A Beginner's Guide to Storytelling with Data
|
||||||
|
-- by Anthony DeBarros
|
||||||
|
|
||||||
|
-- Chapter 9 Code Examples
|
||||||
|
--------------------------------------------------------------
|
||||||
|
|
||||||
|
-- Listing 9-1: Importing the FSIS Meat, Poultry, and Egg Inspection Directory
|
||||||
|
-- https://catalog.data.gov/dataset/meat-poultry-and-egg-inspection-directory-by-establishment-name
|
||||||
|
|
||||||
|
CREATE TABLE meat_poultry_egg_inspect (
|
||||||
|
est_number varchar(50) CONSTRAINT est_number_key PRIMARY KEY,
|
||||||
|
company varchar(100),
|
||||||
|
street varchar(100),
|
||||||
|
city varchar(30),
|
||||||
|
st varchar(2),
|
||||||
|
zip varchar(5),
|
||||||
|
phone varchar(14),
|
||||||
|
grant_date date,
|
||||||
|
activities text,
|
||||||
|
dbas text
|
||||||
|
);
|
||||||
|
|
||||||
|
COPY meat_poultry_egg_inspect
|
||||||
|
FROM 'C:\YourDirectory\MPI_Directory_by_Establishment_Name.csv'
|
||||||
|
WITH (FORMAT CSV, HEADER, DELIMITER ',');
|
||||||
|
|
||||||
|
CREATE INDEX company_idx ON meat_poultry_egg_inspect (company);
|
||||||
|
|
||||||
|
-- Count the rows imported:
|
||||||
|
SELECT count(*) FROM meat_poultry_egg_inspect;
|
||||||
|
|
||||||
|
-- Listing 9-2: Finding multiple companies at the same address
|
||||||
|
SELECT company,
|
||||||
|
street,
|
||||||
|
city,
|
||||||
|
st,
|
||||||
|
count(*) AS address_count
|
||||||
|
FROM meat_poultry_egg_inspect
|
||||||
|
GROUP BY company, street, city, st
|
||||||
|
HAVING count(*) > 1
|
||||||
|
ORDER BY company, street, city, st;
|
||||||
|
|
||||||
|
-- Listing 9-3: Grouping and counting states
|
||||||
|
SELECT st,
|
||||||
|
count(*) AS st_count
|
||||||
|
FROM meat_poultry_egg_inspect
|
||||||
|
GROUP BY st
|
||||||
|
ORDER BY st;
|
||||||
|
|
||||||
|
-- Listing 9-4: Using IS NULL to find missing values in the st column
|
||||||
|
SELECT est_number,
|
||||||
|
company,
|
||||||
|
city,
|
||||||
|
st,
|
||||||
|
zip
|
||||||
|
FROM meat_poultry_egg_inspect
|
||||||
|
WHERE st IS NULL;
|
||||||
|
|
||||||
|
-- Listing 9-5: Using GROUP BY and count() to find inconsistent company names
|
||||||
|
|
||||||
|
SELECT company,
|
||||||
|
count(*) AS company_count
|
||||||
|
FROM meat_poultry_egg_inspect
|
||||||
|
GROUP BY company
|
||||||
|
ORDER BY company ASC;
|
||||||
|
|
||||||
|
-- Listing 9-6: Using length() and count() to test the zip column
|
||||||
|
|
||||||
|
SELECT length(zip),
|
||||||
|
count(*) AS length_count
|
||||||
|
FROM meat_poultry_egg_inspect
|
||||||
|
GROUP BY length(zip)
|
||||||
|
ORDER BY length(zip) ASC;
|
||||||
|
|
||||||
|
-- Listing 9-7: Filtering with length() to find short zip values
|
||||||
|
|
||||||
|
SELECT st,
|
||||||
|
count(*) AS st_count
|
||||||
|
FROM meat_poultry_egg_inspect
|
||||||
|
WHERE length(zip) < 5
|
||||||
|
GROUP BY st
|
||||||
|
ORDER BY st ASC;
|
||||||
|
|
||||||
|
-- Listing 9-8: Backing up a table
|
||||||
|
|
||||||
|
CREATE TABLE meat_poultry_egg_inspect_backup AS
|
||||||
|
SELECT * FROM meat_poultry_egg_inspect;
|
||||||
|
|
||||||
|
-- Check number of records:
|
||||||
|
SELECT
|
||||||
|
(SELECT count(*) FROM meat_poultry_egg_inspect) AS original,
|
||||||
|
(SELECT count(*) FROM meat_poultry_egg_inspect_backup) AS backup;
|
||||||
|
|
||||||
|
-- Listing 9-9: Creating and filling the st_copy column with ALTER TABLE and UPDATE
|
||||||
|
|
||||||
|
ALTER TABLE meat_poultry_egg_inspect ADD COLUMN st_copy varchar(2);
|
||||||
|
|
||||||
|
UPDATE meat_poultry_egg_inspect
|
||||||
|
SET st_copy = st;
|
||||||
|
|
||||||
|
-- Listing 9-10: Checking values in the st and st_copy columns
|
||||||
|
|
||||||
|
SELECT st,
|
||||||
|
st_copy
|
||||||
|
FROM meat_poultry_egg_inspect
|
||||||
|
ORDER BY st;
|
||||||
|
|
||||||
|
-- Listing 9-11: Updating the st column for three establishments
|
||||||
|
|
||||||
|
UPDATE meat_poultry_egg_inspect
|
||||||
|
SET st = 'MN'
|
||||||
|
WHERE est_number = 'V18677A';
|
||||||
|
|
||||||
|
UPDATE meat_poultry_egg_inspect
|
||||||
|
SET st = 'AL'
|
||||||
|
WHERE est_number = 'M45319+P45319';
|
||||||
|
|
||||||
|
UPDATE meat_poultry_egg_inspect
|
||||||
|
SET st = 'WI'
|
||||||
|
WHERE est_number = 'M263A+P263A+V263A';
|
||||||
|
|
||||||
|
-- Listing 9-12: Restoring original st column values
|
||||||
|
|
||||||
|
-- Restoring from the column backup
|
||||||
|
UPDATE meat_poultry_egg_inspect
|
||||||
|
SET st = st_copy;
|
||||||
|
|
||||||
|
-- Restoring from the table backup
|
||||||
|
UPDATE meat_poultry_egg_inspect original
|
||||||
|
SET st = backup.st
|
||||||
|
FROM meat_poultry_egg_inspect_backup backup
|
||||||
|
WHERE original.est_number = backup.est_number;
|
||||||
|
|
||||||
|
-- Listing 9-13: Creating and filling the company_standard column
|
||||||
|
|
||||||
|
ALTER TABLE meat_poultry_egg_inspect ADD COLUMN company_standard varchar(100);
|
||||||
|
|
||||||
|
UPDATE meat_poultry_egg_inspect
|
||||||
|
SET company_standard = company;
|
||||||
|
|
||||||
|
-- Listing 9-14: Use UPDATE to modify field values that match a string
|
||||||
|
|
||||||
|
UPDATE meat_poultry_egg_inspect
|
||||||
|
SET company_standard = 'Armour-Eckrich Meats'
|
||||||
|
WHERE company LIKE 'Armour%';
|
||||||
|
|
||||||
|
SELECT company, company_standard
|
||||||
|
FROM meat_poultry_egg_inspect
|
||||||
|
WHERE company LIKE 'Armour%';
|
||||||
|
|
||||||
|
-- Listing 9-15: Creating and filling the zip_copy column
|
||||||
|
|
||||||
|
ALTER TABLE meat_poultry_egg_inspect ADD COLUMN zip_copy varchar(5);
|
||||||
|
|
||||||
|
UPDATE meat_poultry_egg_inspect
|
||||||
|
SET zip_copy = zip;
|
||||||
|
|
||||||
|
-- Listing 9-16: Modify codes in the zip column missing two leading zeros
|
||||||
|
|
||||||
|
UPDATE meat_poultry_egg_inspect
|
||||||
|
SET zip = '00' || zip
|
||||||
|
WHERE st IN('PR','VI') AND length(zip) = 3;
|
||||||
|
|
||||||
|
-- Listing 9-17: Modify codes in the zip column missing one leading zero
|
||||||
|
|
||||||
|
UPDATE meat_poultry_egg_inspect
|
||||||
|
SET zip = '0' || zip
|
||||||
|
WHERE st IN('CT','MA','ME','NH','NJ','RI','VT') AND length(zip) = 4;
|
||||||
|
|
||||||
|
-- Listing 9-18: Creating and filling a state_regions table
|
||||||
|
|
||||||
|
CREATE TABLE state_regions (
|
||||||
|
st varchar(2) CONSTRAINT st_key PRIMARY KEY,
|
||||||
|
region varchar(20) NOT NULL
|
||||||
|
);
|
||||||
|
|
||||||
|
COPY state_regions
|
||||||
|
FROM 'C:\YourDirectory\state_regions.csv'
|
||||||
|
WITH (FORMAT CSV, HEADER, DELIMITER ',');
|
||||||
|
|
||||||
|
-- Listing 9-19: Adding and updating an inspection_date column
|
||||||
|
|
||||||
|
ALTER TABLE meat_poultry_egg_inspect ADD COLUMN inspection_date date;
|
||||||
|
|
||||||
|
UPDATE meat_poultry_egg_inspect inspect
|
||||||
|
SET inspection_date = '2019-12-01'
|
||||||
|
WHERE EXISTS (SELECT state_regions.region
|
||||||
|
FROM state_regions
|
||||||
|
WHERE inspect.st = state_regions.st
|
||||||
|
AND state_regions.region = 'New England');
|
||||||
|
|
||||||
|
-- Listing 9-20: Viewing updated inspection_date values
|
||||||
|
|
||||||
|
SELECT st, inspection_date
|
||||||
|
FROM meat_poultry_egg_inspect
|
||||||
|
GROUP BY st, inspection_date
|
||||||
|
ORDER BY st;
|
||||||
|
|
||||||
|
-- Listing 9-21: Delete rows matching an expression
|
||||||
|
|
||||||
|
DELETE FROM meat_poultry_egg_inspect
|
||||||
|
WHERE st IN('PR','VI');
|
||||||
|
|
||||||
|
-- Listing 9-22: Remove a column from a table using DROP
|
||||||
|
|
||||||
|
ALTER TABLE meat_poultry_egg_inspect DROP COLUMN zip_copy;
|
||||||
|
|
||||||
|
-- Listing 9-23: Remove a table from a database using DROP
|
||||||
|
|
||||||
|
DROP TABLE meat_poultry_egg_inspect_backup;
|
||||||
|
|
||||||
|
-- Listing 9-24: Demonstrating a transaction block
|
||||||
|
|
||||||
|
-- Start transaction and perform update
|
||||||
|
START TRANSACTION;
|
||||||
|
|
||||||
|
UPDATE meat_poultry_egg_inspect
|
||||||
|
SET company = 'AGRO Merchantss Oakland LLC'
|
||||||
|
WHERE company = 'AGRO Merchants Oakland, LLC';
|
||||||
|
|
||||||
|
-- view changes
|
||||||
|
SELECT company
|
||||||
|
FROM meat_poultry_egg_inspect
|
||||||
|
WHERE company LIKE 'AGRO%'
|
||||||
|
ORDER BY company;
|
||||||
|
|
||||||
|
-- Revert changes
|
||||||
|
ROLLBACK;
|
||||||
|
|
||||||
|
-- See restored state
|
||||||
|
SELECT company
|
||||||
|
FROM meat_poultry_egg_inspect
|
||||||
|
WHERE company LIKE 'AGRO%'
|
||||||
|
ORDER BY company;
|
||||||
|
|
||||||
|
-- Alternately, commit changes at the end:
|
||||||
|
START TRANSACTION;
|
||||||
|
|
||||||
|
UPDATE meat_poultry_egg_inspect
|
||||||
|
SET company = 'AGRO Merchants Oakland LLC'
|
||||||
|
WHERE company = 'AGRO Merchants Oakland, LLC';
|
||||||
|
|
||||||
|
COMMIT;
|
||||||
|
|
||||||
|
-- Listing 9-25: Backing up a table while adding and filling a new column
|
||||||
|
|
||||||
|
CREATE TABLE meat_poultry_egg_inspect_backup AS
|
||||||
|
SELECT *,
|
||||||
|
'2018-02-07'::date AS reviewed_date
|
||||||
|
FROM meat_poultry_egg_inspect;
|
||||||
|
|
||||||
|
-- Listing 9-26: Swapping table names using ALTER TABLE
|
||||||
|
|
||||||
|
ALTER TABLE meat_poultry_egg_inspect RENAME TO meat_poultry_egg_inspect_temp;
|
||||||
|
ALTER TABLE meat_poultry_egg_inspect_backup RENAME TO meat_poultry_egg_inspect;
|
||||||
|
ALTER TABLE meat_poultry_egg_inspect_temp RENAME TO meat_poultry_egg_inspect_backup;
|
||||||
|
|
||||||
|
|
||||||
167
Chapter_11/Chapter_11.sql
Normal file
167
Chapter_11/Chapter_11.sql
Normal file
@ -0,0 +1,167 @@
|
|||||||
|
--------------------------------------------------------------
|
||||||
|
-- Practical SQL: A Beginner's Guide to Storytelling with Data
|
||||||
|
-- by Anthony DeBarros
|
||||||
|
|
||||||
|
-- Chapter 10 Code Examples
|
||||||
|
--------------------------------------------------------------
|
||||||
|
|
||||||
|
-- Listing 10-1: Create Census 2011-2015 ACS 5-Year stats table and import data
|
||||||
|
|
||||||
|
CREATE TABLE acs_2011_2015_stats (
|
||||||
|
geoid varchar(14) CONSTRAINT geoid_key PRIMARY KEY,
|
||||||
|
county varchar(50) NOT NULL,
|
||||||
|
st varchar(20) NOT NULL,
|
||||||
|
pct_travel_60_min numeric(5,3) NOT NULL,
|
||||||
|
pct_bachelors_higher numeric(5,3) NOT NULL,
|
||||||
|
pct_masters_higher numeric(5,3) NOT NULL,
|
||||||
|
median_hh_income integer,
|
||||||
|
CHECK (pct_masters_higher <= pct_bachelors_higher)
|
||||||
|
);
|
||||||
|
|
||||||
|
COPY acs_2011_2015_stats
|
||||||
|
FROM 'C:\YourDirectory\acs_2011_2015_stats.csv'
|
||||||
|
WITH (FORMAT CSV, HEADER, DELIMITER ',');
|
||||||
|
|
||||||
|
SELECT * FROM acs_2011_2015_stats;
|
||||||
|
|
||||||
|
-- Listing 10-2: Using corr(Y, X) to measure the relationship between
|
||||||
|
-- education and income
|
||||||
|
|
||||||
|
SELECT corr(median_hh_income, pct_bachelors_higher)
|
||||||
|
AS bachelors_income_r
|
||||||
|
FROM acs_2011_2015_stats;
|
||||||
|
|
||||||
|
-- Listing 10-3: Using corr(Y, X) on additional variables
|
||||||
|
|
||||||
|
SELECT
|
||||||
|
round(
|
||||||
|
corr(median_hh_income, pct_bachelors_higher)::numeric, 2
|
||||||
|
) AS bachelors_income_r,
|
||||||
|
round(
|
||||||
|
corr(pct_travel_60_min, median_hh_income)::numeric, 2
|
||||||
|
) AS income_travel_r,
|
||||||
|
round(
|
||||||
|
corr(pct_travel_60_min, pct_bachelors_higher)::numeric, 2
|
||||||
|
) AS bachelors_travel_r
|
||||||
|
FROM acs_2011_2015_stats;
|
||||||
|
|
||||||
|
-- Listing 10-4: Regression slope and intercept functions
|
||||||
|
|
||||||
|
SELECT
|
||||||
|
round(
|
||||||
|
regr_slope(median_hh_income, pct_bachelors_higher)::numeric, 2
|
||||||
|
) AS slope,
|
||||||
|
round(
|
||||||
|
regr_intercept(median_hh_income, pct_bachelors_higher)::numeric, 2
|
||||||
|
) AS y_intercept
|
||||||
|
FROM acs_2011_2015_stats;
|
||||||
|
|
||||||
|
-- Listing 10-5: Calculating the coefficient of determination, or r-squared
|
||||||
|
|
||||||
|
SELECT round(
|
||||||
|
regr_r2(median_hh_income, pct_bachelors_higher)::numeric, 3
|
||||||
|
) AS r_squared
|
||||||
|
FROM acs_2011_2015_stats;
|
||||||
|
|
||||||
|
-- Bonus: Additional stats functions.
|
||||||
|
-- Variance
|
||||||
|
SELECT var_pop(median_hh_income)
|
||||||
|
FROM acs_2011_2015_stats;
|
||||||
|
|
||||||
|
-- Standard deviation of the entire population
|
||||||
|
SELECT stddev_pop(median_hh_income)
|
||||||
|
FROM acs_2011_2015_stats;
|
||||||
|
|
||||||
|
-- Covariance
|
||||||
|
SELECT covar_pop(median_hh_income, pct_bachelors_higher)
|
||||||
|
FROM acs_2011_2015_stats;
|
||||||
|
|
||||||
|
-- Listing 10-6: The rank() and dense_rank() window functions
|
||||||
|
|
||||||
|
CREATE TABLE widget_companies (
|
||||||
|
id bigserial,
|
||||||
|
company varchar(30) NOT NULL,
|
||||||
|
widget_output integer NOT NULL
|
||||||
|
);
|
||||||
|
|
||||||
|
INSERT INTO widget_companies (company, widget_output)
|
||||||
|
VALUES
|
||||||
|
('Morse Widgets', 125000),
|
||||||
|
('Springfield Widget Masters', 143000),
|
||||||
|
('Best Widgets', 196000),
|
||||||
|
('Acme Inc.', 133000),
|
||||||
|
('District Widget Inc.', 201000),
|
||||||
|
('Clarke Amalgamated', 620000),
|
||||||
|
('Stavesacre Industries', 244000),
|
||||||
|
('Bowers Widget Emporium', 201000);
|
||||||
|
|
||||||
|
SELECT
|
||||||
|
company,
|
||||||
|
widget_output,
|
||||||
|
rank() OVER (ORDER BY widget_output DESC),
|
||||||
|
dense_rank() OVER (ORDER BY widget_output DESC)
|
||||||
|
FROM widget_companies;
|
||||||
|
|
||||||
|
-- Listing 10-7: Applying rank() within groups using PARTITION BY
|
||||||
|
|
||||||
|
CREATE TABLE store_sales (
|
||||||
|
store varchar(30),
|
||||||
|
category varchar(30) NOT NULL,
|
||||||
|
unit_sales bigint NOT NULL,
|
||||||
|
CONSTRAINT store_category_key PRIMARY KEY (store, category)
|
||||||
|
);
|
||||||
|
|
||||||
|
INSERT INTO store_sales (store, category, unit_sales)
|
||||||
|
VALUES
|
||||||
|
('Broders', 'Cereal', 1104),
|
||||||
|
('Wallace', 'Ice Cream', 1863),
|
||||||
|
('Broders', 'Ice Cream', 2517),
|
||||||
|
('Cramers', 'Ice Cream', 2112),
|
||||||
|
('Broders', 'Beer', 641),
|
||||||
|
('Cramers', 'Cereal', 1003),
|
||||||
|
('Cramers', 'Beer', 640),
|
||||||
|
('Wallace', 'Cereal', 980),
|
||||||
|
('Wallace', 'Beer', 988);
|
||||||
|
|
||||||
|
SELECT
|
||||||
|
category,
|
||||||
|
store,
|
||||||
|
unit_sales,
|
||||||
|
rank() OVER (PARTITION BY category ORDER BY unit_sales DESC)
|
||||||
|
FROM store_sales;
|
||||||
|
|
||||||
|
-- Listing 10-8: Create and fill a 2015 FBI crime data table
|
||||||
|
|
||||||
|
CREATE TABLE fbi_crime_data_2015 (
|
||||||
|
st varchar(20),
|
||||||
|
city varchar(50),
|
||||||
|
population integer,
|
||||||
|
violent_crime integer,
|
||||||
|
property_crime integer,
|
||||||
|
burglary integer,
|
||||||
|
larceny_theft integer,
|
||||||
|
motor_vehicle_theft integer,
|
||||||
|
CONSTRAINT st_city_key PRIMARY KEY (st, city)
|
||||||
|
);
|
||||||
|
|
||||||
|
COPY fbi_crime_data_2015
|
||||||
|
FROM 'C:\YourDirectory\fbi_crime_data_2015.csv'
|
||||||
|
WITH (FORMAT CSV, HEADER, DELIMITER ',');
|
||||||
|
|
||||||
|
SELECT * FROM fbi_crime_data_2015
|
||||||
|
ORDER BY population DESC;
|
||||||
|
|
||||||
|
-- Listing 10-9: Find property crime rates per thousand in cities with 500,000
|
||||||
|
-- or more people
|
||||||
|
|
||||||
|
SELECT
|
||||||
|
city,
|
||||||
|
st,
|
||||||
|
population,
|
||||||
|
property_crime,
|
||||||
|
round(
|
||||||
|
(property_crime::numeric / population) * 1000, 1
|
||||||
|
) AS pc_per_1000
|
||||||
|
FROM fbi_crime_data_2015
|
||||||
|
WHERE population >= 500000
|
||||||
|
ORDER BY (property_crime::numeric / population) DESC;
|
||||||
233
Chapter_12/Chapter_12.sql
Normal file
233
Chapter_12/Chapter_12.sql
Normal file
@ -0,0 +1,233 @@
|
|||||||
|
--------------------------------------------------------------
|
||||||
|
-- Practical SQL: A Beginner's Guide to Storytelling with Data
|
||||||
|
-- by Anthony DeBarros
|
||||||
|
|
||||||
|
-- Chapter 11 Code Examples
|
||||||
|
--------------------------------------------------------------
|
||||||
|
|
||||||
|
-- Listing 11-1: Extracting components of a timestamp value using date_part()
|
||||||
|
|
||||||
|
SELECT
|
||||||
|
date_part('year', '2019-12-01 18:37:12 EST'::timestamptz) AS "year",
|
||||||
|
date_part('month', '2019-12-01 18:37:12 EST'::timestamptz) AS "month",
|
||||||
|
date_part('day', '2019-12-01 18:37:12 EST'::timestamptz) AS "day",
|
||||||
|
date_part('hour', '2019-12-01 18:37:12 EST'::timestamptz) AS "hour",
|
||||||
|
date_part('minute', '2019-12-01 18:37:12 EST'::timestamptz) AS "minute",
|
||||||
|
date_part('seconds', '2019-12-01 18:37:12 EST'::timestamptz) AS "seconds",
|
||||||
|
date_part('timezone_hour', '2019-12-01 18:37:12 EST'::timestamptz) AS "tz",
|
||||||
|
date_part('week', '2019-12-01 18:37:12 EST'::timestamptz) AS "week",
|
||||||
|
date_part('quarter', '2019-12-01 18:37:12 EST'::timestamptz) AS "quarter",
|
||||||
|
date_part('epoch', '2019-12-01 18:37:12 EST'::timestamptz) AS "epoch";
|
||||||
|
|
||||||
|
-- Bonus: Using the SQL-standard extract() for similar datetime parsing:
|
||||||
|
|
||||||
|
SELECT extract('year' from '2019-12-01 18:37:12 EST'::timestamptz) AS "year";
|
||||||
|
|
||||||
|
-- Listing 11-2: Three functions for making datetimes from components
|
||||||
|
|
||||||
|
-- make a date
|
||||||
|
SELECT make_date(2018, 2, 22);
|
||||||
|
-- make a time
|
||||||
|
SELECT make_time(18, 4, 30.3);
|
||||||
|
-- make a timestamp with time zone
|
||||||
|
SELECT make_timestamptz(2018, 2, 22, 18, 4, 30.3, 'Europe/Lisbon');
|
||||||
|
|
||||||
|
-- Bonus: Retrieving the current date and time
|
||||||
|
|
||||||
|
SELECT
|
||||||
|
current_date,
|
||||||
|
current_time,
|
||||||
|
current_timestamp,
|
||||||
|
localtime,
|
||||||
|
localtimestamp,
|
||||||
|
now();
|
||||||
|
|
||||||
|
-- Listing 11-3: Comparing current_timestamp and clock_timestamp() during row insert
|
||||||
|
|
||||||
|
CREATE TABLE current_time_example (
|
||||||
|
time_id bigserial,
|
||||||
|
current_timestamp_col timestamp with time zone,
|
||||||
|
clock_timestamp_col timestamp with time zone
|
||||||
|
);
|
||||||
|
|
||||||
|
INSERT INTO current_time_example (current_timestamp_col, clock_timestamp_col)
|
||||||
|
(SELECT current_timestamp,
|
||||||
|
clock_timestamp()
|
||||||
|
FROM generate_series(1,1000));
|
||||||
|
|
||||||
|
SELECT * FROM current_time_example;
|
||||||
|
|
||||||
|
-- Time Zones
|
||||||
|
|
||||||
|
-- Listing 11-4: Showing your PostgreSQL server's default time zone
|
||||||
|
|
||||||
|
SHOW timezone; -- Note: You can see all run-time defaults with SHOW ALL;
|
||||||
|
|
||||||
|
-- Listing 11-5: Showing time zone abbreviations and names
|
||||||
|
|
||||||
|
SELECT * FROM pg_timezone_abbrevs;
|
||||||
|
SELECT * FROM pg_timezone_names;
|
||||||
|
|
||||||
|
-- Filter to find one
|
||||||
|
SELECT * FROM pg_timezone_names
|
||||||
|
WHERE name LIKE 'Europe%';
|
||||||
|
|
||||||
|
-- Listing 11-6: Setting the time zone for a client session
|
||||||
|
|
||||||
|
SET timezone TO 'US/Pacific';
|
||||||
|
|
||||||
|
CREATE TABLE time_zone_test (
|
||||||
|
test_date timestamp with time zone
|
||||||
|
);
|
||||||
|
INSERT INTO time_zone_test VALUES ('2020-01-01 4:00');
|
||||||
|
|
||||||
|
SELECT test_date
|
||||||
|
FROM time_zone_test;
|
||||||
|
|
||||||
|
SET timezone TO 'US/Eastern';
|
||||||
|
|
||||||
|
SELECT test_date
|
||||||
|
FROM time_zone_test;
|
||||||
|
|
||||||
|
SELECT test_date AT TIME ZONE 'Asia/Seoul'
|
||||||
|
FROM time_zone_test;
|
||||||
|
|
||||||
|
|
||||||
|
-- Math with dates!
|
||||||
|
|
||||||
|
SELECT '9/30/1929'::date - '9/27/1929'::date;
|
||||||
|
SELECT '9/30/1929'::date + '5 years'::interval;
|
||||||
|
|
||||||
|
|
||||||
|
-- Taxi Rides
|
||||||
|
|
||||||
|
-- Listing 11-7: Creating a table and importing NYC yellow taxi data
|
||||||
|
|
||||||
|
CREATE TABLE nyc_yellow_taxi_trips_2016_06_01 (
|
||||||
|
trip_id bigserial PRIMARY KEY,
|
||||||
|
vendor_id varchar(1) NOT NULL,
|
||||||
|
tpep_pickup_datetime timestamp with time zone NOT NULL,
|
||||||
|
tpep_dropoff_datetime timestamp with time zone NOT NULL,
|
||||||
|
passenger_count integer NOT NULL,
|
||||||
|
trip_distance numeric(8,2) NOT NULL,
|
||||||
|
pickup_longitude numeric(18,15) NOT NULL,
|
||||||
|
pickup_latitude numeric(18,15) NOT NULL,
|
||||||
|
rate_code_id varchar(2) NOT NULL,
|
||||||
|
store_and_fwd_flag varchar(1) NOT NULL,
|
||||||
|
dropoff_longitude numeric(18,15) NOT NULL,
|
||||||
|
dropoff_latitude numeric(18,15) NOT NULL,
|
||||||
|
payment_type varchar(1) NOT NULL,
|
||||||
|
fare_amount numeric(9,2) NOT NULL,
|
||||||
|
extra numeric(9,2) NOT NULL,
|
||||||
|
mta_tax numeric(5,2) NOT NULL,
|
||||||
|
tip_amount numeric(9,2) NOT NULL,
|
||||||
|
tolls_amount numeric(9,2) NOT NULL,
|
||||||
|
improvement_surcharge numeric(9,2) NOT NULL,
|
||||||
|
total_amount numeric(9,2) NOT NULL
|
||||||
|
);
|
||||||
|
|
||||||
|
COPY nyc_yellow_taxi_trips_2016_06_01 (
|
||||||
|
vendor_id,
|
||||||
|
tpep_pickup_datetime,
|
||||||
|
tpep_dropoff_datetime,
|
||||||
|
passenger_count,
|
||||||
|
trip_distance,
|
||||||
|
pickup_longitude,
|
||||||
|
pickup_latitude,
|
||||||
|
rate_code_id,
|
||||||
|
store_and_fwd_flag,
|
||||||
|
dropoff_longitude,
|
||||||
|
dropoff_latitude,
|
||||||
|
payment_type,
|
||||||
|
fare_amount,
|
||||||
|
extra,
|
||||||
|
mta_tax,
|
||||||
|
tip_amount,
|
||||||
|
tolls_amount,
|
||||||
|
improvement_surcharge,
|
||||||
|
total_amount
|
||||||
|
)
|
||||||
|
FROM 'C:\YourDirectory\yellow_tripdata_2016_06_01.csv'
|
||||||
|
WITH (FORMAT CSV, HEADER, DELIMITER ',');
|
||||||
|
|
||||||
|
CREATE INDEX tpep_pickup_idx
|
||||||
|
ON nyc_yellow_taxi_trips_2016_06_01 (tpep_pickup_datetime);
|
||||||
|
|
||||||
|
SELECT count(*) FROM nyc_yellow_taxi_trips_2016_06_01;
|
||||||
|
|
||||||
|
-- Listing 11-8: Counting taxi trips by hour
|
||||||
|
|
||||||
|
SELECT
|
||||||
|
date_part('hour', tpep_pickup_datetime) AS trip_hour,
|
||||||
|
count(*)
|
||||||
|
FROM nyc_yellow_taxi_trips_2016_06_01
|
||||||
|
GROUP BY trip_hour
|
||||||
|
ORDER BY trip_hour;
|
||||||
|
|
||||||
|
-- Listing 11-9: Exporting taxi pickups per hour to a CSV file
|
||||||
|
|
||||||
|
COPY
|
||||||
|
(SELECT
|
||||||
|
date_part('hour', tpep_pickup_datetime) AS trip_hour,
|
||||||
|
count(*)
|
||||||
|
FROM nyc_yellow_taxi_trips_2016_06_01
|
||||||
|
GROUP BY trip_hour
|
||||||
|
ORDER BY trip_hour
|
||||||
|
)
|
||||||
|
TO 'C:\YourDirectory\hourly_pickups_2016_06_01.csv'
|
||||||
|
WITH (FORMAT CSV, HEADER, DELIMITER ',');
|
||||||
|
|
||||||
|
-- Listing 11-10: Calculating median trip time by hour
|
||||||
|
|
||||||
|
SELECT
|
||||||
|
date_part('hour', tpep_pickup_datetime) AS trip_hour,
|
||||||
|
percentile_cont(.5)
|
||||||
|
WITHIN GROUP (ORDER BY
|
||||||
|
tpep_dropoff_datetime - tpep_pickup_datetime) AS median_trip
|
||||||
|
FROM nyc_yellow_taxi_trips_2016_06_01
|
||||||
|
GROUP BY trip_hour
|
||||||
|
ORDER BY trip_hour;
|
||||||
|
|
||||||
|
-- Listing 11-11: Creating a table to hold train trip data
|
||||||
|
|
||||||
|
SET timezone TO 'US/Central';
|
||||||
|
|
||||||
|
CREATE TABLE train_rides (
|
||||||
|
trip_id bigserial PRIMARY KEY,
|
||||||
|
segment varchar(50) NOT NULL,
|
||||||
|
departure timestamp with time zone NOT NULL,
|
||||||
|
arrival timestamp with time zone NOT NULL
|
||||||
|
);
|
||||||
|
|
||||||
|
INSERT INTO train_rides (segment, departure, arrival)
|
||||||
|
VALUES
|
||||||
|
('Chicago to New York', '2017-11-13 21:30 CST', '2017-11-14 18:23 EST'),
|
||||||
|
('New York to New Orleans', '2017-11-15 14:15 EST', '2017-11-16 19:32 CST'),
|
||||||
|
('New Orleans to Los Angeles', '2017-11-17 13:45 CST', '2017-11-18 9:00 PST'),
|
||||||
|
('Los Angeles to San Francisco', '2017-11-19 10:10 PST', '2017-11-19 21:24 PST'),
|
||||||
|
('San Francisco to Denver', '2017-11-20 9:10 PST', '2017-11-21 18:38 MST'),
|
||||||
|
('Denver to Chicago', '2017-11-22 19:10 MST', '2017-11-23 14:50 CST');
|
||||||
|
|
||||||
|
SELECT * FROM train_rides;
|
||||||
|
|
||||||
|
-- Listing 11-12: Calculating the length of each trip segment
|
||||||
|
|
||||||
|
SELECT segment,
|
||||||
|
to_char(departure, 'YYYY-MM-DD HH12:MI a.m. TZ') AS departure,
|
||||||
|
arrival - departure AS segment_time
|
||||||
|
FROM train_rides;
|
||||||
|
|
||||||
|
-- Listing 11-13: Calculating cumulative intervals using OVER
|
||||||
|
|
||||||
|
SELECT segment,
|
||||||
|
arrival - departure AS segment_time,
|
||||||
|
sum(arrival - departure) OVER (ORDER BY trip_id) AS cume_time
|
||||||
|
FROM train_rides;
|
||||||
|
|
||||||
|
-- Listing 11-14: Better formatting for cumulative trip time
|
||||||
|
|
||||||
|
SELECT segment,
|
||||||
|
arrival - departure AS segment_time,
|
||||||
|
sum(date_part('epoch', (arrival - departure)))
|
||||||
|
OVER (ORDER BY trip_id) * interval '1 second' AS cume_time
|
||||||
|
FROM train_rides;
|
||||||
310
Chapter_13/Chapter_13.sql
Normal file
310
Chapter_13/Chapter_13.sql
Normal file
@ -0,0 +1,310 @@
|
|||||||
|
--------------------------------------------------------------
|
||||||
|
-- Practical SQL: A Beginner's Guide to Storytelling with Data
|
||||||
|
-- by Anthony DeBarros
|
||||||
|
|
||||||
|
-- Chapter 12 Code Examples
|
||||||
|
--------------------------------------------------------------
|
||||||
|
|
||||||
|
-- Listing 12-1: Using a subquery in a WHERE clause
|
||||||
|
|
||||||
|
SELECT geo_name,
|
||||||
|
state_us_abbreviation,
|
||||||
|
p0010001
|
||||||
|
FROM us_counties_2010
|
||||||
|
WHERE p0010001 >= (
|
||||||
|
SELECT percentile_cont(.9) WITHIN GROUP (ORDER BY p0010001)
|
||||||
|
FROM us_counties_2010
|
||||||
|
)
|
||||||
|
ORDER BY p0010001 DESC;
|
||||||
|
|
||||||
|
-- Listing 12-2: Using a subquery in a WHERE clause for DELETE
|
||||||
|
|
||||||
|
CREATE TABLE us_counties_2010_top10 AS
|
||||||
|
SELECT * FROM us_counties_2010;
|
||||||
|
|
||||||
|
DELETE FROM us_counties_2010_top10
|
||||||
|
WHERE p0010001 < (
|
||||||
|
SELECT percentile_cont(.9) WITHIN GROUP (ORDER BY p0010001)
|
||||||
|
FROM us_counties_2010_top10
|
||||||
|
);
|
||||||
|
|
||||||
|
SELECT count(*) FROM us_counties_2010_top10;
|
||||||
|
|
||||||
|
-- Listing 12-3: Subquery as a derived table in a FROM clause
|
||||||
|
|
||||||
|
SELECT round(calcs.average, 0) as average,
|
||||||
|
calcs.median,
|
||||||
|
round(calcs.average - calcs.median, 0) AS median_average_diff
|
||||||
|
FROM (
|
||||||
|
SELECT avg(p0010001) AS average,
|
||||||
|
percentile_cont(.5)
|
||||||
|
WITHIN GROUP (ORDER BY p0010001)::numeric(10,1) AS median
|
||||||
|
FROM us_counties_2010
|
||||||
|
)
|
||||||
|
AS calcs;
|
||||||
|
|
||||||
|
-- Listing 12-4: Joining two derived tables
|
||||||
|
|
||||||
|
SELECT census.state_us_abbreviation AS st,
|
||||||
|
census.st_population,
|
||||||
|
plants.plant_count,
|
||||||
|
round((plants.plant_count/census.st_population::numeric(10,1)) * 1000000, 1)
|
||||||
|
AS plants_per_million
|
||||||
|
FROM
|
||||||
|
(
|
||||||
|
SELECT st,
|
||||||
|
count(*) AS plant_count
|
||||||
|
FROM meat_poultry_egg_inspect
|
||||||
|
GROUP BY st
|
||||||
|
)
|
||||||
|
AS plants
|
||||||
|
JOIN
|
||||||
|
(
|
||||||
|
SELECT state_us_abbreviation,
|
||||||
|
sum(p0010001) AS st_population
|
||||||
|
FROM us_counties_2010
|
||||||
|
GROUP BY state_us_abbreviation
|
||||||
|
)
|
||||||
|
AS census
|
||||||
|
ON plants.st = census.state_us_abbreviation
|
||||||
|
ORDER BY plants_per_million DESC;
|
||||||
|
|
||||||
|
-- Listing 12-5: Adding a subquery to a column list
|
||||||
|
|
||||||
|
SELECT geo_name,
|
||||||
|
state_us_abbreviation AS st,
|
||||||
|
p0010001 AS total_pop,
|
||||||
|
(SELECT percentile_cont(.5) WITHIN GROUP (ORDER BY p0010001)
|
||||||
|
FROM us_counties_2010) AS us_median
|
||||||
|
FROM us_counties_2010;
|
||||||
|
|
||||||
|
-- Listing 12-6: Using a subquery expression in a calculation
|
||||||
|
|
||||||
|
SELECT geo_name,
|
||||||
|
state_us_abbreviation AS st,
|
||||||
|
p0010001 AS total_pop,
|
||||||
|
(SELECT percentile_cont(.5) WITHIN GROUP (ORDER BY p0010001)
|
||||||
|
FROM us_counties_2010) AS us_median,
|
||||||
|
p0010001 - (SELECT percentile_cont(.5) WITHIN GROUP (ORDER BY p0010001)
|
||||||
|
FROM us_counties_2010) AS diff_from_median
|
||||||
|
FROM us_counties_2010
|
||||||
|
WHERE (p0010001 - (SELECT percentile_cont(.5) WITHIN GROUP (ORDER BY p0010001)
|
||||||
|
FROM us_counties_2010))
|
||||||
|
BETWEEN -1000 AND 1000;
|
||||||
|
|
||||||
|
|
||||||
|
-- BONUS: Subquery expressions
|
||||||
|
-- If you'd like to try the IN, EXISTS, and NOT EXISTS expressions on pages 199-200,
|
||||||
|
-- here's the code to create a retirees table. The queries below are similar
|
||||||
|
-- to the hypothetical examples on pages 199 and 200. You will need the
|
||||||
|
-- employees table you created in Chapter 6.
|
||||||
|
|
||||||
|
-- Create table and insert data
|
||||||
|
CREATE TABLE retirees (
|
||||||
|
id int,
|
||||||
|
first_name varchar(50),
|
||||||
|
last_name varchar(50)
|
||||||
|
);
|
||||||
|
|
||||||
|
INSERT INTO retirees
|
||||||
|
VALUES (2, 'Lee', 'Smith'),
|
||||||
|
(4, 'Janet', 'King');
|
||||||
|
|
||||||
|
-- Generating values for the IN operator
|
||||||
|
SELECT first_name, last_name
|
||||||
|
FROM employees
|
||||||
|
WHERE emp_id IN (
|
||||||
|
SELECT id
|
||||||
|
FROM retirees);
|
||||||
|
|
||||||
|
-- Checking whether values exist (returns all rows from employees
|
||||||
|
-- if the expression evaluates as true)
|
||||||
|
SELECT first_name, last_name
|
||||||
|
FROM employees
|
||||||
|
WHERE EXISTS (
|
||||||
|
SELECT id
|
||||||
|
FROM retirees);
|
||||||
|
|
||||||
|
-- Using a correlated subquery to find matching values from employees
|
||||||
|
-- in retirees.
|
||||||
|
SELECT first_name, last_name
|
||||||
|
FROM employees
|
||||||
|
WHERE EXISTS (
|
||||||
|
SELECT id
|
||||||
|
FROM retirees
|
||||||
|
WHERE id = employees.emp_id);
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
-- Listing 12-7: Using a simple CTE to find large counties
|
||||||
|
|
||||||
|
WITH
|
||||||
|
large_counties (geo_name, st, p0010001)
|
||||||
|
AS
|
||||||
|
(
|
||||||
|
SELECT geo_name, state_us_abbreviation, p0010001
|
||||||
|
FROM us_counties_2010
|
||||||
|
WHERE p0010001 >= 100000
|
||||||
|
)
|
||||||
|
SELECT st, count(*)
|
||||||
|
FROM large_counties
|
||||||
|
GROUP BY st
|
||||||
|
ORDER BY count(*) DESC;
|
||||||
|
|
||||||
|
-- Bonus: You can also write this query as:
|
||||||
|
SELECT state_us_abbreviation, count(*)
|
||||||
|
FROM us_counties_2010
|
||||||
|
WHERE p0010001 >= 100000
|
||||||
|
GROUP BY state_us_abbreviation
|
||||||
|
ORDER BY count(*) DESC;
|
||||||
|
|
||||||
|
-- Listing 12-8: Using CTEs in a table join
|
||||||
|
|
||||||
|
WITH
|
||||||
|
counties (st, population) AS
|
||||||
|
(SELECT state_us_abbreviation, sum(population_count_100_percent)
|
||||||
|
FROM us_counties_2010
|
||||||
|
GROUP BY state_us_abbreviation),
|
||||||
|
|
||||||
|
plants (st, plants) AS
|
||||||
|
(SELECT st, count(*) AS plants
|
||||||
|
FROM meat_poultry_egg_inspect
|
||||||
|
GROUP BY st)
|
||||||
|
|
||||||
|
SELECT counties.st,
|
||||||
|
population,
|
||||||
|
plants,
|
||||||
|
round((plants/population::numeric(10,1))*1000000, 1) AS per_million
|
||||||
|
FROM counties JOIN plants
|
||||||
|
ON counties.st = plants.st
|
||||||
|
ORDER BY per_million DESC;
|
||||||
|
|
||||||
|
-- Listing 12-9: Using CTEs to minimize redundant code
|
||||||
|
|
||||||
|
WITH us_median AS
|
||||||
|
(SELECT percentile_cont(.5)
|
||||||
|
WITHIN GROUP (ORDER BY p0010001) AS us_median_pop
|
||||||
|
FROM us_counties_2010)
|
||||||
|
|
||||||
|
SELECT geo_name,
|
||||||
|
state_us_abbreviation AS st,
|
||||||
|
p0010001 AS total_pop,
|
||||||
|
us_median_pop,
|
||||||
|
p0010001 - us_median_pop AS diff_from_median
|
||||||
|
FROM us_counties_2010 CROSS JOIN us_median
|
||||||
|
WHERE (p0010001 - us_median_pop)
|
||||||
|
BETWEEN -1000 AND 1000;
|
||||||
|
|
||||||
|
|
||||||
|
-- Cross tabulations
|
||||||
|
-- Install the crosstab() function via the tablefunc module
|
||||||
|
|
||||||
|
CREATE EXTENSION tablefunc;
|
||||||
|
|
||||||
|
-- Listing 12-10: Creating and filling the ice_cream_survey table
|
||||||
|
|
||||||
|
CREATE TABLE ice_cream_survey (
|
||||||
|
response_id integer PRIMARY KEY,
|
||||||
|
office varchar(20),
|
||||||
|
flavor varchar(20)
|
||||||
|
);
|
||||||
|
|
||||||
|
COPY ice_cream_survey
|
||||||
|
FROM 'C:\YourDirectory\ice_cream_survey.csv'
|
||||||
|
WITH (FORMAT CSV, HEADER);
|
||||||
|
|
||||||
|
-- Listing 12-11: Generating the ice cream survey crosstab
|
||||||
|
|
||||||
|
SELECT *
|
||||||
|
FROM crosstab('SELECT office,
|
||||||
|
flavor,
|
||||||
|
count(*)
|
||||||
|
FROM ice_cream_survey
|
||||||
|
GROUP BY office, flavor
|
||||||
|
ORDER BY office',
|
||||||
|
|
||||||
|
'SELECT flavor
|
||||||
|
FROM ice_cream_survey
|
||||||
|
GROUP BY flavor
|
||||||
|
ORDER BY flavor')
|
||||||
|
|
||||||
|
AS (office varchar(20),
|
||||||
|
chocolate bigint,
|
||||||
|
strawberry bigint,
|
||||||
|
vanilla bigint);
|
||||||
|
|
||||||
|
-- Listing 12-12: Creating and filling a temperature_readings table
|
||||||
|
|
||||||
|
CREATE TABLE temperature_readings (
|
||||||
|
reading_id bigserial PRIMARY KEY,
|
||||||
|
station_name varchar(50),
|
||||||
|
observation_date date,
|
||||||
|
max_temp integer,
|
||||||
|
min_temp integer
|
||||||
|
);
|
||||||
|
|
||||||
|
COPY temperature_readings
|
||||||
|
(station_name, observation_date, max_temp, min_temp)
|
||||||
|
FROM 'C:\YourDirectory\temperature_readings.csv'
|
||||||
|
WITH (FORMAT CSV, HEADER);
|
||||||
|
|
||||||
|
-- Listing 12-13: Generating the temperature readings crosstab
|
||||||
|
|
||||||
|
SELECT *
|
||||||
|
FROM crosstab('SELECT
|
||||||
|
station_name,
|
||||||
|
date_part(''month'', observation_date),
|
||||||
|
percentile_cont(.5)
|
||||||
|
WITHIN GROUP (ORDER BY max_temp)
|
||||||
|
FROM temperature_readings
|
||||||
|
GROUP BY station_name,
|
||||||
|
date_part(''month'', observation_date)
|
||||||
|
ORDER BY station_name',
|
||||||
|
|
||||||
|
'SELECT month
|
||||||
|
FROM generate_series(1,12) month')
|
||||||
|
|
||||||
|
AS (station varchar(50),
|
||||||
|
jan numeric(3,0),
|
||||||
|
feb numeric(3,0),
|
||||||
|
mar numeric(3,0),
|
||||||
|
apr numeric(3,0),
|
||||||
|
may numeric(3,0),
|
||||||
|
jun numeric(3,0),
|
||||||
|
jul numeric(3,0),
|
||||||
|
aug numeric(3,0),
|
||||||
|
sep numeric(3,0),
|
||||||
|
oct numeric(3,0),
|
||||||
|
nov numeric(3,0),
|
||||||
|
dec numeric(3,0)
|
||||||
|
);
|
||||||
|
|
||||||
|
-- Listing 12-14: Re-classifying temperature data with CASE
|
||||||
|
|
||||||
|
SELECT max_temp,
|
||||||
|
CASE WHEN max_temp >= 90 THEN 'Hot'
|
||||||
|
WHEN max_temp BETWEEN 70 AND 89 THEN 'Warm'
|
||||||
|
WHEN max_temp BETWEEN 50 AND 69 THEN 'Pleasant'
|
||||||
|
WHEN max_temp BETWEEN 33 AND 49 THEN 'Cold'
|
||||||
|
WHEN max_temp BETWEEN 20 AND 32 THEN 'Freezing'
|
||||||
|
ELSE 'Inhumane'
|
||||||
|
END AS temperature_group
|
||||||
|
FROM temperature_readings;
|
||||||
|
|
||||||
|
-- Listing 12-15: Using CASE in a Common Table Expression
|
||||||
|
|
||||||
|
WITH temps_collapsed (station_name, max_temperature_group) AS
|
||||||
|
(SELECT station_name,
|
||||||
|
CASE WHEN max_temp >= 90 THEN 'Hot'
|
||||||
|
WHEN max_temp BETWEEN 70 AND 89 THEN 'Warm'
|
||||||
|
WHEN max_temp BETWEEN 50 AND 69 THEN 'Pleasant'
|
||||||
|
WHEN max_temp BETWEEN 33 AND 49 THEN 'Cold'
|
||||||
|
WHEN max_temp BETWEEN 20 AND 32 THEN 'Freezing'
|
||||||
|
ELSE 'Inhumane'
|
||||||
|
END
|
||||||
|
FROM temperature_readings)
|
||||||
|
|
||||||
|
SELECT station_name, max_temperature_group, count(*)
|
||||||
|
FROM temps_collapsed
|
||||||
|
GROUP BY station_name, max_temperature_group
|
||||||
|
ORDER BY station_name, count(*) DESC;
|
||||||
363
Chapter_14/Chapter_14.sql
Normal file
363
Chapter_14/Chapter_14.sql
Normal file
@ -0,0 +1,363 @@
|
|||||||
|
--------------------------------------------------------------
|
||||||
|
-- Practical SQL: A Beginner's Guide to Storytelling with Data
|
||||||
|
-- by Anthony DeBarros
|
||||||
|
|
||||||
|
-- Chapter 13 Code Examples
|
||||||
|
--------------------------------------------------------------
|
||||||
|
|
||||||
|
-- Commonly used string functions
|
||||||
|
-- Full list at https://www.postgresql.org/docs/current/static/functions-string.html
|
||||||
|
|
||||||
|
-- Case formatting
|
||||||
|
SELECT upper('Neal7');
|
||||||
|
SELECT lower('Randy');
|
||||||
|
SELECT initcap('at the end of the day');
|
||||||
|
-- Note initcap's imperfect for acronyms
|
||||||
|
SELECT initcap('Practical SQL');
|
||||||
|
|
||||||
|
-- Character Information
|
||||||
|
SELECT char_length(' Pat ');
|
||||||
|
SELECT length(' Pat ');
|
||||||
|
SELECT position(', ' in 'Tan, Bella');
|
||||||
|
|
||||||
|
-- Removing characters
|
||||||
|
SELECT trim('s' from 'socks');
|
||||||
|
SELECT trim(trailing 's' from 'socks');
|
||||||
|
SELECT trim(' Pat ');
|
||||||
|
SELECT char_length(trim(' Pat ')); -- note the length change
|
||||||
|
SELECT ltrim('socks', 's');
|
||||||
|
SELECT rtrim('socks', 's');
|
||||||
|
|
||||||
|
-- Extracting and replacing characters
|
||||||
|
SELECT left('703-555-1212', 3);
|
||||||
|
SELECT right('703-555-1212', 8);
|
||||||
|
SELECT replace('bat', 'b', 'c');
|
||||||
|
|
||||||
|
|
||||||
|
-- Table 13-2: Regular Expression Matching Examples
|
||||||
|
|
||||||
|
-- Any character one or more times
|
||||||
|
SELECT substring('The game starts at 7 p.m. on May 2, 2019.' from '.+');
|
||||||
|
-- One or two digits followed by a space and p.m.
|
||||||
|
SELECT substring('The game starts at 7 p.m. on May 2, 2019.' from '\d{1,2} (?:a.m.|p.m.)');
|
||||||
|
-- One or more word characters at the start
|
||||||
|
SELECT substring('The game starts at 7 p.m. on May 2, 2019.' from '^\w+');
|
||||||
|
-- One or more word characters followed by any character at the end.
|
||||||
|
SELECT substring('The game starts at 7 p.m. on May 2, 2019.' from '\w+.$');
|
||||||
|
-- The words May or June
|
||||||
|
SELECT substring('The game starts at 7 p.m. on May 2, 2019.' from 'May|June');
|
||||||
|
-- Four digits
|
||||||
|
SELECT substring('The game starts at 7 p.m. on May 2, 2019.' from '\d{4}');
|
||||||
|
-- May followed by a space, digit, comma, space, and four digits.
|
||||||
|
SELECT substring('The game starts at 7 p.m. on May 2, 2019.' from 'May \d, \d{4}');
|
||||||
|
|
||||||
|
|
||||||
|
-- Turning Text to Data with Regular Expression Functions
|
||||||
|
|
||||||
|
-- Listing 13-2: Creating and loading the crime_reports table
|
||||||
|
-- Data from https://sheriff.loudoun.gov/dailycrime
|
||||||
|
|
||||||
|
CREATE TABLE crime_reports (
|
||||||
|
crime_id bigserial PRIMARY KEY,
|
||||||
|
date_1 timestamp with time zone,
|
||||||
|
date_2 timestamp with time zone,
|
||||||
|
street varchar(250),
|
||||||
|
city varchar(100),
|
||||||
|
crime_type varchar(100),
|
||||||
|
description text,
|
||||||
|
case_number varchar(50),
|
||||||
|
original_text text NOT NULL
|
||||||
|
);
|
||||||
|
|
||||||
|
COPY crime_reports (original_text)
|
||||||
|
FROM 'C:\YourDirectory\crime_reports.csv'
|
||||||
|
WITH (FORMAT CSV, HEADER OFF, QUOTE '"');
|
||||||
|
|
||||||
|
SELECT original_text FROM crime_reports;
|
||||||
|
|
||||||
|
-- Listing 13-3: Using regexp_match() to find the first date
|
||||||
|
SELECT crime_id,
|
||||||
|
regexp_match(original_text, '\d{1,2}\/\d{1,2}\/\d{2}')
|
||||||
|
FROM crime_reports;
|
||||||
|
|
||||||
|
-- Listing 13-4: Using the regexp_matches() function with the 'g' flag
|
||||||
|
SELECT crime_id,
|
||||||
|
regexp_matches(original_text, '\d{1,2}\/\d{1,2}\/\d{2}', 'g')
|
||||||
|
FROM crime_reports;
|
||||||
|
|
||||||
|
-- Listing 13-5: Using regexp_match() to find the second date
|
||||||
|
-- Note that the result includes an unwanted hyphen
|
||||||
|
SELECT crime_id,
|
||||||
|
regexp_match(original_text, '-\d{1,2}\/\d{1,2}\/\d{1,2}')
|
||||||
|
FROM crime_reports;
|
||||||
|
|
||||||
|
-- Listing 13-6: Using a capture group to return only the date
|
||||||
|
-- Eliminates the hyphen
|
||||||
|
SELECT crime_id,
|
||||||
|
regexp_match(original_text, '-(\d{1,2}\/\d{1,2}\/\d{1,2})')
|
||||||
|
FROM crime_reports;
|
||||||
|
|
||||||
|
-- Listing 13-7: Matching case number, date, crime type, and city
|
||||||
|
|
||||||
|
SELECT
|
||||||
|
regexp_match(original_text, '(?:C0|SO)[0-9]+') AS case_number,
|
||||||
|
regexp_match(original_text, '\d{1,2}\/\d{1,2}\/\d{2}') AS date_1,
|
||||||
|
regexp_match(original_text, '\n(?:\w+ \w+|\w+)\n(.*):') AS crime_type,
|
||||||
|
regexp_match(original_text, '(?:Sq.|Plz.|Dr.|Ter.|Rd.)\n(\w+ \w+|\w+)\n')
|
||||||
|
AS city
|
||||||
|
FROM crime_reports;
|
||||||
|
|
||||||
|
-- Bonus: Get all parsed elements at once
|
||||||
|
|
||||||
|
SELECT crime_id,
|
||||||
|
regexp_match(original_text, '\d{1,2}\/\d{1,2}\/\d{2}') AS date_1,
|
||||||
|
CASE WHEN EXISTS (SELECT regexp_matches(original_text, '-(\d{1,2}\/\d{1,2}\/\d{1,2})'))
|
||||||
|
THEN regexp_match(original_text, '-(\d{1,2}\/\d{1,2}\/\d{1,2})')
|
||||||
|
ELSE NULL
|
||||||
|
END AS date_2,
|
||||||
|
regexp_match(original_text, '\/\d{2}\n(\d{4})') AS hour_1,
|
||||||
|
CASE WHEN EXISTS (SELECT regexp_matches(original_text, '\/\d{2}\n\d{4}-(\d{4})'))
|
||||||
|
THEN regexp_match(original_text, '\/\d{2}\n\d{4}-(\d{4})')
|
||||||
|
ELSE NULL
|
||||||
|
END AS hour_2,
|
||||||
|
regexp_match(original_text, 'hrs.\n(\d+ .+(?:Sq.|Plz.|Dr.|Ter.|Rd.))') AS street,
|
||||||
|
regexp_match(original_text, '(?:Sq.|Plz.|Dr.|Ter.|Rd.)\n(\w+ \w+|\w+)\n') AS city,
|
||||||
|
regexp_match(original_text, '\n(?:\w+ \w+|\w+)\n(.*):') AS crime_type,
|
||||||
|
regexp_match(original_text, ':\s(.+)(?:C0|SO)') AS description,
|
||||||
|
regexp_match(original_text, '(?:C0|SO)[0-9]+') AS case_number
|
||||||
|
FROM crime_reports;
|
||||||
|
|
||||||
|
-- Listing 13-8: Retrieving a value from within an array
|
||||||
|
|
||||||
|
SELECT
|
||||||
|
crime_id,
|
||||||
|
(regexp_match(original_text, '(?:C0|SO)[0-9]+'))[1]
|
||||||
|
AS case_number
|
||||||
|
FROM crime_reports;
|
||||||
|
|
||||||
|
-- Listing 13-9: Updating the crime_reports date_1 column
|
||||||
|
|
||||||
|
UPDATE crime_reports
|
||||||
|
SET date_1 =
|
||||||
|
(
|
||||||
|
(regexp_match(original_text, '\d{1,2}\/\d{1,2}\/\d{2}'))[1]
|
||||||
|
|| ' ' ||
|
||||||
|
(regexp_match(original_text, '\/\d{2}\n(\d{4})'))[1]
|
||||||
|
||' US/Eastern'
|
||||||
|
)::timestamptz;
|
||||||
|
|
||||||
|
SELECT crime_id,
|
||||||
|
date_1,
|
||||||
|
original_text
|
||||||
|
FROM crime_reports;
|
||||||
|
|
||||||
|
-- Listing 13-10: Updating all crime_reports columns
|
||||||
|
|
||||||
|
UPDATE crime_reports
|
||||||
|
SET date_1 =
|
||||||
|
(
|
||||||
|
(regexp_match(original_text, '\d{1,2}\/\d{1,2}\/\d{2}'))[1]
|
||||||
|
|| ' ' ||
|
||||||
|
(regexp_match(original_text, '\/\d{2}\n(\d{4})'))[1]
|
||||||
|
||' US/Eastern'
|
||||||
|
)::timestamptz,
|
||||||
|
|
||||||
|
date_2 =
|
||||||
|
CASE
|
||||||
|
-- if there is no second date but there is a second hour
|
||||||
|
WHEN (SELECT regexp_match(original_text, '-(\d{1,2}\/\d{1,2}\/\d{1,2})') IS NULL)
|
||||||
|
AND (SELECT regexp_match(original_text, '\/\d{2}\n\d{4}-(\d{4})') IS NOT NULL)
|
||||||
|
THEN
|
||||||
|
((regexp_match(original_text, '\d{1,2}\/\d{1,2}\/\d{2}'))[1]
|
||||||
|
|| ' ' ||
|
||||||
|
(regexp_match(original_text, '\/\d{2}\n\d{4}-(\d{4})'))[1]
|
||||||
|
||' US/Eastern'
|
||||||
|
)::timestamptz
|
||||||
|
|
||||||
|
-- if there is both a second date and second hour
|
||||||
|
WHEN (SELECT regexp_match(original_text, '-(\d{1,2}\/\d{1,2}\/\d{1,2})') IS NOT NULL)
|
||||||
|
AND (SELECT regexp_match(original_text, '\/\d{2}\n\d{4}-(\d{4})') IS NOT NULL)
|
||||||
|
THEN
|
||||||
|
((regexp_match(original_text, '-(\d{1,2}\/\d{1,2}\/\d{1,2})'))[1]
|
||||||
|
|| ' ' ||
|
||||||
|
(regexp_match(original_text, '\/\d{2}\n\d{4}-(\d{4})'))[1]
|
||||||
|
||' US/Eastern'
|
||||||
|
)::timestamptz
|
||||||
|
-- if neither of those conditions exist, provide a NULL
|
||||||
|
ELSE NULL
|
||||||
|
END,
|
||||||
|
street = (regexp_match(original_text, 'hrs.\n(\d+ .+(?:Sq.|Plz.|Dr.|Ter.|Rd.))'))[1],
|
||||||
|
city = (regexp_match(original_text,
|
||||||
|
'(?:Sq.|Plz.|Dr.|Ter.|Rd.)\n(\w+ \w+|\w+)\n'))[1],
|
||||||
|
crime_type = (regexp_match(original_text, '\n(?:\w+ \w+|\w+)\n(.*):'))[1],
|
||||||
|
description = (regexp_match(original_text, ':\s(.+)(?:C0|SO)'))[1],
|
||||||
|
case_number = (regexp_match(original_text, '(?:C0|SO)[0-9]+'))[1];
|
||||||
|
|
||||||
|
-- Listing 13-11: Viewing selected crime data
|
||||||
|
|
||||||
|
SELECT date_1,
|
||||||
|
street,
|
||||||
|
city,
|
||||||
|
crime_type
|
||||||
|
FROM crime_reports;
|
||||||
|
|
||||||
|
-- Listing 13-12: Using regular expressions in a WHERE clause
|
||||||
|
|
||||||
|
SELECT geo_name
|
||||||
|
FROM us_counties_2010
|
||||||
|
WHERE geo_name ~* '(.+lade.+|.+lare.+)'
|
||||||
|
ORDER BY geo_name;
|
||||||
|
|
||||||
|
SELECT geo_name
|
||||||
|
FROM us_counties_2010
|
||||||
|
WHERE geo_name ~* '.+ash.+' AND geo_name !~ 'Wash.+'
|
||||||
|
ORDER BY geo_name;
|
||||||
|
|
||||||
|
|
||||||
|
-- Listing 13-13: Regular expression functions to replace and split
|
||||||
|
|
||||||
|
SELECT regexp_replace('05/12/2018', '\d{4}', '2017');
|
||||||
|
|
||||||
|
SELECT regexp_split_to_table('Four,score,and,seven,years,ago', ',');
|
||||||
|
|
||||||
|
SELECT regexp_split_to_array('Phil Mike Tony Steve', ' ');
|
||||||
|
|
||||||
|
-- Listing 13-14: Finding an array length
|
||||||
|
|
||||||
|
SELECT array_length(regexp_split_to_array('Phil Mike Tony Steve', ' '), 1);
|
||||||
|
|
||||||
|
|
||||||
|
-- FULL TEXT SEARCH
|
||||||
|
|
||||||
|
-- Full-text search operators:
|
||||||
|
-- & (AND)
|
||||||
|
-- | (OR)
|
||||||
|
-- ! (NOT)
|
||||||
|
|
||||||
|
-- Listing 13-15: Converting text to tsvector data
|
||||||
|
|
||||||
|
SELECT to_tsvector('I am walking across the sitting room to sit with you.');
|
||||||
|
|
||||||
|
-- Listing 13-16: Converting search terms to tsquery data
|
||||||
|
|
||||||
|
SELECT to_tsquery('walking & sitting');
|
||||||
|
|
||||||
|
-- Listing 13-17: Querying a tsvector type with a tsquery
|
||||||
|
|
||||||
|
SELECT to_tsvector('I am walking across the sitting room') @@ to_tsquery('walking & sitting');
|
||||||
|
|
||||||
|
SELECT to_tsvector('I am walking across the sitting room') @@ to_tsquery('walking & running');
|
||||||
|
|
||||||
|
-- Listing 13-18: Creating and filling the president_speeches table
|
||||||
|
|
||||||
|
-- Sources:
|
||||||
|
-- https://archive.org/details/State-of-the-Union-Addresses-1945-2006
|
||||||
|
-- http://www.presidency.ucsb.edu/ws/index.php
|
||||||
|
-- https://www.eisenhower.archives.gov/all_about_ike/speeches.html
|
||||||
|
|
||||||
|
CREATE TABLE president_speeches (
|
||||||
|
sotu_id serial PRIMARY KEY,
|
||||||
|
president varchar(100) NOT NULL,
|
||||||
|
title varchar(250) NOT NULL,
|
||||||
|
speech_date date NOT NULL,
|
||||||
|
speech_text text NOT NULL,
|
||||||
|
search_speech_text tsvector
|
||||||
|
);
|
||||||
|
|
||||||
|
COPY president_speeches (president, title, speech_date, speech_text)
|
||||||
|
FROM 'C:\YourDirectory\sotu-1946-1977.csv'
|
||||||
|
WITH (FORMAT CSV, DELIMITER '|', HEADER OFF, QUOTE '@');
|
||||||
|
|
||||||
|
SELECT * FROM president_speeches;
|
||||||
|
|
||||||
|
-- Listing 13-19: Converting speeches to tsvector in the search_speech_text column
|
||||||
|
|
||||||
|
UPDATE president_speeches
|
||||||
|
SET search_speech_text = to_tsvector('english', speech_text);
|
||||||
|
|
||||||
|
-- Listing 13-20: Creating a GIN index for text search
|
||||||
|
|
||||||
|
CREATE INDEX search_idx ON president_speeches USING gin(search_speech_text);
|
||||||
|
|
||||||
|
-- Listing 13-21: Finding speeches containing the word "Vietnam"
|
||||||
|
|
||||||
|
SELECT president, speech_date
|
||||||
|
FROM president_speeches
|
||||||
|
WHERE search_speech_text @@ to_tsquery('Vietnam')
|
||||||
|
ORDER BY speech_date;
|
||||||
|
|
||||||
|
-- Listing 13-22: Displaying search results with ts_headline()
|
||||||
|
|
||||||
|
SELECT president,
|
||||||
|
speech_date,
|
||||||
|
ts_headline(speech_text, to_tsquery('Vietnam'),
|
||||||
|
'StartSel = <,
|
||||||
|
StopSel = >,
|
||||||
|
MinWords=5,
|
||||||
|
MaxWords=7,
|
||||||
|
MaxFragments=1')
|
||||||
|
FROM president_speeches
|
||||||
|
WHERE search_speech_text @@ to_tsquery('Vietnam');
|
||||||
|
|
||||||
|
-- Listing 13-23: Finding speeches with the word "transportation" but not "roads"
|
||||||
|
|
||||||
|
SELECT president,
|
||||||
|
speech_date,
|
||||||
|
ts_headline(speech_text, to_tsquery('transportation & !roads'),
|
||||||
|
'StartSel = <,
|
||||||
|
StopSel = >,
|
||||||
|
MinWords=5,
|
||||||
|
MaxWords=7,
|
||||||
|
MaxFragments=1')
|
||||||
|
FROM president_speeches
|
||||||
|
WHERE search_speech_text @@ to_tsquery('transportation & !roads');
|
||||||
|
|
||||||
|
-- Listing 13-24: Find speeches where "defense" follows "military"
|
||||||
|
|
||||||
|
SELECT president,
|
||||||
|
speech_date,
|
||||||
|
ts_headline(speech_text, to_tsquery('military <-> defense'),
|
||||||
|
'StartSel = <,
|
||||||
|
StopSel = >,
|
||||||
|
MinWords=5,
|
||||||
|
MaxWords=7,
|
||||||
|
MaxFragments=1')
|
||||||
|
FROM president_speeches
|
||||||
|
WHERE search_speech_text @@ to_tsquery('military <-> defense');
|
||||||
|
|
||||||
|
-- Bonus: Example with a distance of 2:
|
||||||
|
SELECT president,
|
||||||
|
speech_date,
|
||||||
|
ts_headline(speech_text, to_tsquery('military <2> defense'),
|
||||||
|
'StartSel = <,
|
||||||
|
StopSel = >,
|
||||||
|
MinWords=5,
|
||||||
|
MaxWords=7,
|
||||||
|
MaxFragments=2')
|
||||||
|
FROM president_speeches
|
||||||
|
WHERE search_speech_text @@ to_tsquery('military <2> defense');
|
||||||
|
|
||||||
|
-- Listing 13-25: Scoring relevance with ts_rank()
|
||||||
|
|
||||||
|
SELECT president,
|
||||||
|
speech_date,
|
||||||
|
ts_rank(search_speech_text,
|
||||||
|
to_tsquery('war & security & threat & enemy')) AS score
|
||||||
|
FROM president_speeches
|
||||||
|
WHERE search_speech_text @@ to_tsquery('war & security & threat & enemy')
|
||||||
|
ORDER BY score DESC
|
||||||
|
LIMIT 5;
|
||||||
|
|
||||||
|
-- Listing 13-26: Normalizing ts_rank() by speech length
|
||||||
|
|
||||||
|
SELECT president,
|
||||||
|
speech_date,
|
||||||
|
ts_rank(search_speech_text,
|
||||||
|
to_tsquery('war & security & threat & enemy'), 2)::numeric
|
||||||
|
AS score
|
||||||
|
FROM president_speeches
|
||||||
|
WHERE search_speech_text @@ to_tsquery('war & security & threat & enemy')
|
||||||
|
ORDER BY score DESC
|
||||||
|
LIMIT 5;
|
||||||
|
|
||||||
|
|
||||||
252
Chapter_15/Chapter_15.sql
Normal file
252
Chapter_15/Chapter_15.sql
Normal file
@ -0,0 +1,252 @@
|
|||||||
|
--------------------------------------------------------------
|
||||||
|
-- Practical SQL: A Beginner's Guide to Storytelling with Data
|
||||||
|
-- by Anthony DeBarros
|
||||||
|
|
||||||
|
-- Chapter 14 Code Examples
|
||||||
|
--------------------------------------------------------------
|
||||||
|
|
||||||
|
-- Listing 14-1: Creating a gis_analysis database
|
||||||
|
|
||||||
|
CREATE DATABASE gis_analysis;
|
||||||
|
-- Note: Switch to this new database before continuing the examples
|
||||||
|
|
||||||
|
-- Listing 14-2: Loading the PostGIS extension
|
||||||
|
|
||||||
|
CREATE EXTENSION postgis;
|
||||||
|
|
||||||
|
SELECT postgis_full_version(); -- shows PostGIS version
|
||||||
|
|
||||||
|
-- Listing 14-3: Retrieving the well-known text for SRID 4326
|
||||||
|
|
||||||
|
SELECT srtext
|
||||||
|
FROM spatial_ref_sys
|
||||||
|
WHERE srid = 4326;
|
||||||
|
|
||||||
|
-- Listing 14-4: Using ST_GeomFromText() to create spatial objects
|
||||||
|
|
||||||
|
SELECT ST_GeomFromText('POINT(-74.9233606 42.699992)', 4326);
|
||||||
|
|
||||||
|
SELECT ST_GeomFromText('LINESTRING(-74.9 42.7, -75.1 42.7)', 4326);
|
||||||
|
|
||||||
|
SELECT ST_GeomFromText('POLYGON((-74.9 42.7, -75.1 42.7,
|
||||||
|
-75.1 42.6, -74.9 42.7))', 4326);
|
||||||
|
|
||||||
|
SELECT ST_GeomFromText('MULTIPOINT (-74.9 42.7, -75.1 42.7)', 4326);
|
||||||
|
|
||||||
|
SELECT ST_GeomFromText('MULTILINESTRING((-76.27 43.1, -76.06 43.08),
|
||||||
|
(-76.2 43.3, -76.2 43.4,
|
||||||
|
-76.4 43.1))', 4326);
|
||||||
|
|
||||||
|
SELECT ST_GeomFromText('MULTIPOLYGON((
|
||||||
|
(-74.92 42.7, -75.06 42.71,
|
||||||
|
-75.07 42.64, -74.92 42.7),
|
||||||
|
(-75.0 42.66, -75.0 42.64,
|
||||||
|
-74.98 42.64, -74.98 42.66,
|
||||||
|
-75.0 42.66)))', 4326);
|
||||||
|
|
||||||
|
-- Listing 14-5: Using ST_GeogFromText() to create spatial objects
|
||||||
|
|
||||||
|
SELECT
|
||||||
|
ST_GeogFromText('SRID=4326;MULTIPOINT(-74.9 42.7, -75.1 42.7, -74.924 42.6)');
|
||||||
|
|
||||||
|
-- Listing 14-6: Functions specific to making points
|
||||||
|
|
||||||
|
SELECT ST_PointFromText('POINT(-74.9233606 42.699992)', 4326);
|
||||||
|
|
||||||
|
SELECT ST_MakePoint(-74.9233606, 42.699992);
|
||||||
|
SELECT ST_SetSRID(ST_MakePoint(-74.9233606, 42.699992), 4326);
|
||||||
|
|
||||||
|
-- Listing 14-7: Functions specific to making LineStrings
|
||||||
|
|
||||||
|
SELECT ST_LineFromText('LINESTRING(-105.90 35.67,-105.91 35.67)', 4326);
|
||||||
|
SELECT ST_MakeLine(ST_MakePoint(-74.92, 42.69), ST_MakePoint(-74.12, 42.45));
|
||||||
|
|
||||||
|
-- Listing 14-8: Functions specific to making Polygons
|
||||||
|
|
||||||
|
SELECT ST_PolygonFromText('POLYGON((-74.9 42.7, -75.1 42.7,
|
||||||
|
-75.1 42.6, -74.9 42.7))', 4326);
|
||||||
|
|
||||||
|
SELECT ST_MakePolygon(
|
||||||
|
ST_GeomFromText('LINESTRING(-74.92 42.7, -75.06 42.71,
|
||||||
|
-75.07 42.64, -74.92 42.7)', 4326));
|
||||||
|
|
||||||
|
SELECT ST_MPolyFromText('MULTIPOLYGON((
|
||||||
|
(-74.92 42.7, -75.06 42.71,
|
||||||
|
-75.07 42.64, -74.92 42.7),
|
||||||
|
(-75.0 42.66, -75.0 42.64,
|
||||||
|
-74.98 42.64, -74.98 42.66,
|
||||||
|
-75.0 42.66)
|
||||||
|
))', 4326);
|
||||||
|
|
||||||
|
|
||||||
|
-- ANALYZING FARMERS MARKETS DATA
|
||||||
|
-- https://catalog.data.gov/dataset/farmers-markets-geographic-data
|
||||||
|
-- https://www.ams.usda.gov/local-food-directories/farmersmarkets
|
||||||
|
|
||||||
|
|
||||||
|
-- Listing 14-9: Create and load the farmers_markets table
|
||||||
|
|
||||||
|
CREATE TABLE farmers_markets (
|
||||||
|
fmid bigint PRIMARY KEY,
|
||||||
|
market_name varchar(100) NOT NULL,
|
||||||
|
street varchar(180),
|
||||||
|
city varchar(60),
|
||||||
|
county varchar(25),
|
||||||
|
st varchar(20) NOT NULL,
|
||||||
|
zip varchar(10),
|
||||||
|
longitude numeric(10,7),
|
||||||
|
latitude numeric(10,7),
|
||||||
|
organic varchar(1) NOT NULL
|
||||||
|
);
|
||||||
|
|
||||||
|
COPY farmers_markets
|
||||||
|
FROM 'C:\YourDirectory\farmers_markets.csv'
|
||||||
|
WITH (FORMAT CSV, HEADER);
|
||||||
|
|
||||||
|
SELECT count(*) FROM farmers_markets; -- should return 8,681 rows
|
||||||
|
|
||||||
|
-- Listing 14-10: Creating and indexing a geography column
|
||||||
|
-- There's also a function: https://postgis.net/docs/AddGeometryColumn.html
|
||||||
|
|
||||||
|
-- Add column
|
||||||
|
ALTER TABLE farmers_markets ADD COLUMN geog_point geography(POINT,4326);
|
||||||
|
|
||||||
|
-- Now fill that column with the lat/long
|
||||||
|
UPDATE farmers_markets
|
||||||
|
SET geog_point = ST_SetSRID(
|
||||||
|
ST_MakePoint(longitude,latitude),4326
|
||||||
|
)::geography;
|
||||||
|
|
||||||
|
-- Add a GiST index
|
||||||
|
CREATE INDEX market_pts_idx ON farmers_markets USING GIST (geog_point);
|
||||||
|
|
||||||
|
-- View the geography column
|
||||||
|
SELECT longitude,
|
||||||
|
latitude,
|
||||||
|
geog_point,
|
||||||
|
ST_AsText(geog_point)
|
||||||
|
FROM farmers_markets
|
||||||
|
WHERE longitude IS NOT NULL
|
||||||
|
LIMIT 5;
|
||||||
|
|
||||||
|
-- Listing 14-11: Using ST_DWithin() to locate farmers' markets within 10 kilometers of a point
|
||||||
|
|
||||||
|
SELECT market_name,
|
||||||
|
city,
|
||||||
|
st
|
||||||
|
FROM farmers_markets
|
||||||
|
WHERE ST_DWithin(geog_point,
|
||||||
|
ST_GeogFromText('POINT(-93.6204386 41.5853202)'),
|
||||||
|
10000)
|
||||||
|
ORDER BY market_name;
|
||||||
|
|
||||||
|
-- Listing 14-12: Using ST_Distance() to calculate the miles between Yankee Stadium
|
||||||
|
-- and Citi Field (Mets)
|
||||||
|
-- 1609.344 meters/mile
|
||||||
|
|
||||||
|
SELECT ST_Distance(
|
||||||
|
ST_GeogFromText('POINT(-73.9283685 40.8296466)'),
|
||||||
|
ST_GeogFromText('POINT(-73.8480153 40.7570917)')
|
||||||
|
) / 1609.344 AS mets_to_yanks;
|
||||||
|
|
||||||
|
-- Listing 14-13: Using ST_Distance() for each row in farmers_markets
|
||||||
|
|
||||||
|
SELECT market_name,
|
||||||
|
city,
|
||||||
|
round(
|
||||||
|
(ST_Distance(geog_point,
|
||||||
|
ST_GeogFromText('POINT(-93.6204386 41.5853202)')
|
||||||
|
) / 1609.344)::numeric(8,5), 2
|
||||||
|
) AS miles_from_dt
|
||||||
|
FROM farmers_markets
|
||||||
|
WHERE ST_DWithin(geog_point,
|
||||||
|
ST_GeogFromText('POINT(-93.6204386 41.5853202)'),
|
||||||
|
10000)
|
||||||
|
ORDER BY miles_from_dt ASC;
|
||||||
|
|
||||||
|
|
||||||
|
-- WORKING WITH SHAPEFILES
|
||||||
|
|
||||||
|
-- Resources:
|
||||||
|
-- TIGER/Line® Shapefiles and TIGER/Line® Files
|
||||||
|
-- https://www.census.gov/geo/maps-data/data/tiger-line.html
|
||||||
|
-- Cartographic Boundary Shapefiles - Counties
|
||||||
|
-- https://www.census.gov/geo/maps-data/data/cbf/cbf_counties.html
|
||||||
|
|
||||||
|
-- Listing 14-14: Checking the geom column's well-known text representation
|
||||||
|
|
||||||
|
SELECT ST_AsText(geom)
|
||||||
|
FROM us_counties_2010_shp
|
||||||
|
LIMIT 1;
|
||||||
|
|
||||||
|
-- Listing 14-15: Find the largest counties by area using ST_Area()
|
||||||
|
|
||||||
|
SELECT name10,
|
||||||
|
statefp10 AS st,
|
||||||
|
round(
|
||||||
|
( ST_Area(geom::geography) / 2589988.110336 )::numeric, 2
|
||||||
|
) AS square_miles
|
||||||
|
FROM us_counties_2010_shp
|
||||||
|
ORDER BY square_miles DESC
|
||||||
|
LIMIT 5;
|
||||||
|
|
||||||
|
-- Listing 14-16: Using ST_Within() to find the county belonging to a pair of coordinates
|
||||||
|
|
||||||
|
SELECT name10,
|
||||||
|
statefp10
|
||||||
|
FROM us_counties_2010_shp
|
||||||
|
WHERE ST_Within('SRID=4269;POINT(-118.3419063 34.0977076)'::geometry, geom);
|
||||||
|
|
||||||
|
-- SPATIAL JOINS
|
||||||
|
-- SANTA FE WATER AND ROAD DATA
|
||||||
|
-- http://www.santafenm.gov/santa_fe_river
|
||||||
|
|
||||||
|
-- Census 2016 Tiger/Line roads, water
|
||||||
|
-- https://www.census.gov/geo/maps-data/data/tiger-line.html
|
||||||
|
-- https://www.census.gov/cgi-bin/geo/shapefiles/index.php?year=2016&layergroup=Roads
|
||||||
|
-- https://www.census.gov/cgi-bin/geo/shapefiles/index.php?year=2016&layergroup=Water
|
||||||
|
|
||||||
|
-- RTTYP - Route Type Code Description
|
||||||
|
-- https://www.census.gov/geo/reference/rttyp.html
|
||||||
|
-- C County
|
||||||
|
-- I Interstate
|
||||||
|
-- M Common Name
|
||||||
|
-- O Other
|
||||||
|
-- S State recognized
|
||||||
|
-- U U.S.
|
||||||
|
|
||||||
|
-- MTFCC MAF/TIGER feature class code
|
||||||
|
-- https://www.census.gov/geo/reference/mtfcc.html
|
||||||
|
-- Here, H3010: A natural flowing waterway
|
||||||
|
|
||||||
|
-- Listing 14-17: Using ST_GeometryType() to determine geometry
|
||||||
|
|
||||||
|
SELECT ST_GeometryType(geom)
|
||||||
|
FROM santafe_linearwater_2016
|
||||||
|
LIMIT 1;
|
||||||
|
|
||||||
|
SELECT ST_GeometryType(geom)
|
||||||
|
FROM santafe_roads_2016
|
||||||
|
LIMIT 1;
|
||||||
|
|
||||||
|
-- Listing 14-18: Spatial join with ST_Intersects() to find roads crossing the Santa Fe river
|
||||||
|
|
||||||
|
SELECT water.fullname AS waterway,
|
||||||
|
roads.rttyp,
|
||||||
|
roads.fullname AS road
|
||||||
|
FROM santafe_linearwater_2016 water JOIN santafe_roads_2016 roads
|
||||||
|
ON ST_Intersects(water.geom, roads.geom)
|
||||||
|
WHERE water.fullname = 'Santa Fe Riv'
|
||||||
|
ORDER BY roads.fullname;
|
||||||
|
|
||||||
|
-- Listing 14-19: Using ST_Intersection() to show where roads cross the river
|
||||||
|
|
||||||
|
SELECT water.fullname AS waterway,
|
||||||
|
roads.rttyp,
|
||||||
|
roads.fullname AS road,
|
||||||
|
ST_AsText(ST_Intersection(water.geom, roads.geom))
|
||||||
|
FROM santafe_linearwater_2016 water JOIN santafe_roads_2016 roads
|
||||||
|
ON ST_Intersects(water.geom, roads.geom)
|
||||||
|
WHERE water.fullname = 'Santa Fe Riv'
|
||||||
|
ORDER BY roads.fullname
|
||||||
|
LIMIT 5;
|
||||||
321
Chapter_17/Chapter_17.sql
Normal file
321
Chapter_17/Chapter_17.sql
Normal file
@ -0,0 +1,321 @@
|
|||||||
|
--------------------------------------------------------------
|
||||||
|
-- Practical SQL: A Beginner's Guide to Storytelling with Data
|
||||||
|
-- by Anthony DeBarros
|
||||||
|
|
||||||
|
-- Chapter 15 Code Examples
|
||||||
|
--------------------------------------------------------------
|
||||||
|
|
||||||
|
-- VIEWS
|
||||||
|
|
||||||
|
-- Listing 15-1: Creating a view that displays Nevada 2010 counties
|
||||||
|
|
||||||
|
CREATE OR REPLACE VIEW nevada_counties_pop_2010 AS
|
||||||
|
SELECT geo_name,
|
||||||
|
state_fips,
|
||||||
|
county_fips,
|
||||||
|
p0010001 AS pop_2010
|
||||||
|
FROM us_counties_2010
|
||||||
|
WHERE state_us_abbreviation = 'NV'
|
||||||
|
ORDER BY county_fips;
|
||||||
|
|
||||||
|
-- Listing 15-2: Querying the nevada_counties_pop_2010 view
|
||||||
|
|
||||||
|
SELECT *
|
||||||
|
FROM nevada_counties_pop_2010
|
||||||
|
LIMIT 5;
|
||||||
|
|
||||||
|
-- Listing 15-3: Creating a view showing population change for US counties
|
||||||
|
|
||||||
|
CREATE OR REPLACE VIEW county_pop_change_2010_2000 AS
|
||||||
|
SELECT c2010.geo_name,
|
||||||
|
c2010.state_us_abbreviation AS st,
|
||||||
|
c2010.state_fips,
|
||||||
|
c2010.county_fips,
|
||||||
|
c2010.p0010001 AS pop_2010,
|
||||||
|
c2000.p0010001 AS pop_2000,
|
||||||
|
round( (CAST(c2010.p0010001 AS numeric(8,1)) - c2000.p0010001)
|
||||||
|
/ c2000.p0010001 * 100, 1 ) AS pct_change_2010_2000
|
||||||
|
FROM us_counties_2010 c2010 INNER JOIN us_counties_2000 c2000
|
||||||
|
ON c2010.state_fips = c2000.state_fips
|
||||||
|
AND c2010.county_fips = c2000.county_fips
|
||||||
|
ORDER BY c2010.state_fips, c2010.county_fips;
|
||||||
|
|
||||||
|
-- Listing 15-4: Selecting columns from the county_pop_change_2010_2000 view
|
||||||
|
|
||||||
|
SELECT geo_name,
|
||||||
|
st,
|
||||||
|
pop_2010,
|
||||||
|
pct_change_2010_2000
|
||||||
|
FROM county_pop_change_2010_2000
|
||||||
|
WHERE st = 'NV'
|
||||||
|
LIMIT 5;
|
||||||
|
|
||||||
|
-- Listing 15-5: Creating a view on the employees table
|
||||||
|
|
||||||
|
CREATE OR REPLACE VIEW employees_tax_dept AS
|
||||||
|
SELECT emp_id,
|
||||||
|
first_name,
|
||||||
|
last_name,
|
||||||
|
dept_id
|
||||||
|
FROM employees
|
||||||
|
WHERE dept_id = 1
|
||||||
|
ORDER BY emp_id
|
||||||
|
WITH LOCAL CHECK OPTION;
|
||||||
|
|
||||||
|
SELECT * FROM employees_tax_dept;
|
||||||
|
|
||||||
|
-- Listing 15-6: Successful and rejected inserts via the employees_tax_dept view
|
||||||
|
|
||||||
|
INSERT INTO employees_tax_dept (first_name, last_name, dept_id)
|
||||||
|
VALUES ('Suzanne', 'Legere', 1);
|
||||||
|
|
||||||
|
INSERT INTO employees_tax_dept (first_name, last_name, dept_id)
|
||||||
|
VALUES ('Jamil', 'White', 2);
|
||||||
|
|
||||||
|
-- optional:
|
||||||
|
SELECT * FROM employees_tax_dept;
|
||||||
|
|
||||||
|
SELECT * FROM employees;
|
||||||
|
|
||||||
|
-- Listing 15-7: Updating a row via the employees_tax_dept view
|
||||||
|
|
||||||
|
UPDATE employees_tax_dept
|
||||||
|
SET last_name = 'Le Gere'
|
||||||
|
WHERE emp_id = 5;
|
||||||
|
|
||||||
|
SELECT * FROM employees_tax_dept;
|
||||||
|
|
||||||
|
-- Bonus: This will fail because the salary column is not in the view
|
||||||
|
UPDATE employees_tax_dept
|
||||||
|
SET salary = 100000
|
||||||
|
WHERE emp_id = 5;
|
||||||
|
|
||||||
|
-- Listing 15-8: Deleting a row via the employees_tax_dept view
|
||||||
|
|
||||||
|
DELETE FROM employees_tax_dept
|
||||||
|
WHERE emp_id = 5;
|
||||||
|
|
||||||
|
|
||||||
|
-- FUNCTIONS
|
||||||
|
-- https://www.postgresql.org/docs/current/static/plpgsql.html
|
||||||
|
|
||||||
|
-- Listing 15-9: Creating a percent_change function
|
||||||
|
-- To delete this function: DROP FUNCTION percent_change(numeric,numeric,integer);
|
||||||
|
|
||||||
|
CREATE OR REPLACE FUNCTION
|
||||||
|
percent_change(new_value numeric,
|
||||||
|
old_value numeric,
|
||||||
|
decimal_places integer DEFAULT 1)
|
||||||
|
RETURNS numeric AS
|
||||||
|
'SELECT round(
|
||||||
|
((new_value - old_value) / old_value) * 100, decimal_places
|
||||||
|
);'
|
||||||
|
LANGUAGE SQL
|
||||||
|
IMMUTABLE
|
||||||
|
RETURNS NULL ON NULL INPUT;
|
||||||
|
|
||||||
|
-- Listing 15-10: Testing the percent_change() function
|
||||||
|
|
||||||
|
SELECT percent_change(110, 108, 2);
|
||||||
|
|
||||||
|
-- Listing 15-11: Testing percent_change() on Census data
|
||||||
|
|
||||||
|
SELECT c2010.geo_name,
|
||||||
|
c2010.state_us_abbreviation AS st,
|
||||||
|
c2010.p0010001 AS pop_2010,
|
||||||
|
percent_change(c2010.p0010001, c2000.p0010001) AS pct_chg_func,
|
||||||
|
round( (CAST(c2010.p0010001 AS numeric(8,1)) - c2000.p0010001)
|
||||||
|
/ c2000.p0010001 * 100, 1 ) AS pct_chg_formula
|
||||||
|
FROM us_counties_2010 c2010 INNER JOIN us_counties_2000 c2000
|
||||||
|
ON c2010.state_fips = c2000.state_fips
|
||||||
|
AND c2010.county_fips = c2000.county_fips
|
||||||
|
ORDER BY pct_chg_func DESC
|
||||||
|
LIMIT 5;
|
||||||
|
|
||||||
|
-- Listing 15-12: Adding a column to the teachers table and seeing the data
|
||||||
|
|
||||||
|
ALTER TABLE teachers ADD COLUMN personal_days integer;
|
||||||
|
|
||||||
|
SELECT first_name,
|
||||||
|
last_name,
|
||||||
|
hire_date,
|
||||||
|
personal_days
|
||||||
|
FROM teachers;
|
||||||
|
|
||||||
|
-- Listing 15-13: Creating an update_personal_days() function
|
||||||
|
|
||||||
|
CREATE OR REPLACE FUNCTION update_personal_days()
|
||||||
|
RETURNS void AS $$
|
||||||
|
BEGIN
|
||||||
|
UPDATE teachers
|
||||||
|
SET personal_days =
|
||||||
|
CASE WHEN (now() - hire_date) BETWEEN '5 years'::interval
|
||||||
|
AND '10 years'::interval THEN 4
|
||||||
|
WHEN (now() - hire_date) > '10 years'::interval THEN 5
|
||||||
|
ELSE 3
|
||||||
|
END;
|
||||||
|
RAISE NOTICE 'personal_days updated!';
|
||||||
|
END;
|
||||||
|
$$ LANGUAGE plpgsql;
|
||||||
|
|
||||||
|
-- To run the function:
|
||||||
|
SELECT update_personal_days();
|
||||||
|
|
||||||
|
-- Listing 15-14: Enabling the PL/Python procedural language
|
||||||
|
|
||||||
|
CREATE EXTENSION plpythonu;
|
||||||
|
|
||||||
|
-- Listing 15-15: Using PL/Python to create the trim_county() function
|
||||||
|
|
||||||
|
CREATE OR REPLACE FUNCTION trim_county(input_string text)
|
||||||
|
RETURNS text AS $$
|
||||||
|
import re
|
||||||
|
cleaned = re.sub(r' County', '', input_string)
|
||||||
|
return cleaned
|
||||||
|
$$ LANGUAGE plpythonu;
|
||||||
|
|
||||||
|
-- Listing 15-16: Testing the trim_county() function
|
||||||
|
|
||||||
|
SELECT geo_name,
|
||||||
|
trim_county(geo_name)
|
||||||
|
FROM us_counties_2010
|
||||||
|
ORDER BY state_fips, county_fips
|
||||||
|
LIMIT 5;
|
||||||
|
|
||||||
|
|
||||||
|
-- TRIGGERS
|
||||||
|
|
||||||
|
-- Listing 15-17: Creating the grades and grades_history tables
|
||||||
|
|
||||||
|
CREATE TABLE grades (
|
||||||
|
student_id bigint,
|
||||||
|
course_id bigint,
|
||||||
|
course varchar(30) NOT NULL,
|
||||||
|
grade varchar(5) NOT NULL,
|
||||||
|
PRIMARY KEY (student_id, course_id)
|
||||||
|
);
|
||||||
|
|
||||||
|
INSERT INTO grades
|
||||||
|
VALUES
|
||||||
|
(1, 1, 'Biology 2', 'F'),
|
||||||
|
(1, 2, 'English 11B', 'D'),
|
||||||
|
(1, 3, 'World History 11B', 'C'),
|
||||||
|
(1, 4, 'Trig 2', 'B');
|
||||||
|
|
||||||
|
CREATE TABLE grades_history (
|
||||||
|
student_id bigint NOT NULL,
|
||||||
|
course_id bigint NOT NULL,
|
||||||
|
change_time timestamp with time zone NOT NULL,
|
||||||
|
course varchar(30) NOT NULL,
|
||||||
|
old_grade varchar(5) NOT NULL,
|
||||||
|
new_grade varchar(5) NOT NULL,
|
||||||
|
PRIMARY KEY (student_id, course_id, change_time)
|
||||||
|
);
|
||||||
|
|
||||||
|
-- Listing 15-18: Creating the record_if_grade_changed() function
|
||||||
|
|
||||||
|
CREATE OR REPLACE FUNCTION record_if_grade_changed()
|
||||||
|
RETURNS trigger AS
|
||||||
|
$$
|
||||||
|
BEGIN
|
||||||
|
IF NEW.grade <> OLD.grade THEN
|
||||||
|
INSERT INTO grades_history (
|
||||||
|
student_id,
|
||||||
|
course_id,
|
||||||
|
change_time,
|
||||||
|
course,
|
||||||
|
old_grade,
|
||||||
|
new_grade)
|
||||||
|
VALUES
|
||||||
|
(OLD.student_id,
|
||||||
|
OLD.course_id,
|
||||||
|
now(),
|
||||||
|
OLD.course,
|
||||||
|
OLD.grade,
|
||||||
|
NEW.grade);
|
||||||
|
END IF;
|
||||||
|
RETURN NEW;
|
||||||
|
END;
|
||||||
|
$$ LANGUAGE plpgsql;
|
||||||
|
|
||||||
|
-- Listing 15-19: Creating the grades_update trigger
|
||||||
|
|
||||||
|
CREATE TRIGGER grades_update
|
||||||
|
AFTER UPDATE
|
||||||
|
ON grades
|
||||||
|
FOR EACH ROW
|
||||||
|
EXECUTE PROCEDURE record_if_grade_changed();
|
||||||
|
|
||||||
|
-- Listing 15-20: Testing the grades_update trigger
|
||||||
|
|
||||||
|
-- Initially, there should be 0 records in the history
|
||||||
|
SELECT * FROM grades_history;
|
||||||
|
|
||||||
|
-- Check the grades
|
||||||
|
SELECT * FROM grades;
|
||||||
|
|
||||||
|
-- Update a grade
|
||||||
|
UPDATE grades
|
||||||
|
SET grade = 'C'
|
||||||
|
WHERE student_id = 1 AND course_id = 1;
|
||||||
|
|
||||||
|
-- Now check the history
|
||||||
|
SELECT student_id,
|
||||||
|
change_time,
|
||||||
|
course,
|
||||||
|
old_grade,
|
||||||
|
new_grade
|
||||||
|
FROM grades_history;
|
||||||
|
|
||||||
|
-- Listing 15-21: Creating a temperature_test table
|
||||||
|
|
||||||
|
CREATE TABLE temperature_test (
|
||||||
|
station_name varchar(50),
|
||||||
|
observation_date date,
|
||||||
|
max_temp integer,
|
||||||
|
min_temp integer,
|
||||||
|
max_temp_group varchar(40),
|
||||||
|
PRIMARY KEY (station_name, observation_date)
|
||||||
|
);
|
||||||
|
|
||||||
|
-- Listing 15-22: Creating the classify_max_temp() function
|
||||||
|
|
||||||
|
CREATE OR REPLACE FUNCTION classify_max_temp()
|
||||||
|
RETURNS trigger AS
|
||||||
|
$$
|
||||||
|
BEGIN
|
||||||
|
CASE
|
||||||
|
WHEN NEW.max_temp >= 90 THEN
|
||||||
|
NEW.max_temp_group := 'Hot';
|
||||||
|
WHEN NEW.max_temp BETWEEN 70 AND 89 THEN
|
||||||
|
NEW.max_temp_group := 'Warm';
|
||||||
|
WHEN NEW.max_temp BETWEEN 50 AND 69 THEN
|
||||||
|
NEW.max_temp_group := 'Pleasant';
|
||||||
|
WHEN NEW.max_temp BETWEEN 33 AND 49 THEN
|
||||||
|
NEW.max_temp_group := 'Cold';
|
||||||
|
WHEN NEW.max_temp BETWEEN 20 AND 32 THEN
|
||||||
|
NEW.max_temp_group := 'Freezing';
|
||||||
|
ELSE NEW.max_temp_group := 'Inhumane';
|
||||||
|
END CASE;
|
||||||
|
RETURN NEW;
|
||||||
|
END;
|
||||||
|
$$ LANGUAGE plpgsql;
|
||||||
|
|
||||||
|
-- Listing 15-23: Creating the temperature_insert trigger
|
||||||
|
|
||||||
|
CREATE TRIGGER temperature_insert
|
||||||
|
BEFORE INSERT
|
||||||
|
ON temperature_test
|
||||||
|
FOR EACH ROW
|
||||||
|
EXECUTE PROCEDURE classify_max_temp();
|
||||||
|
|
||||||
|
-- Listing 15-24: Inserting rows to test the temperature_update trigger
|
||||||
|
|
||||||
|
INSERT INTO temperature_test (station_name, observation_date, max_temp, min_temp)
|
||||||
|
VALUES
|
||||||
|
('North Station', '1/19/2019', 10, -3),
|
||||||
|
('North Station', '3/20/2019', 28, 19),
|
||||||
|
('North Station', '5/2/2019', 65, 42),
|
||||||
|
('North Station', '8/9/2019', 93, 74);
|
||||||
|
|
||||||
|
SELECT * FROM temperature_test;
|
||||||
104
Chapter_19/Chapter_19.sql
Normal file
104
Chapter_19/Chapter_19.sql
Normal file
@ -0,0 +1,104 @@
|
|||||||
|
--------------------------------------------------------------
|
||||||
|
-- Practical SQL: A Beginner's Guide to Storytelling with Data
|
||||||
|
-- by Anthony DeBarros
|
||||||
|
|
||||||
|
-- Chapter 17 Code Examples
|
||||||
|
--------------------------------------------------------------
|
||||||
|
|
||||||
|
-- VACUUM
|
||||||
|
|
||||||
|
-- Listing 17-1: Creating a table to test vacuuming
|
||||||
|
|
||||||
|
CREATE TABLE vacuum_test (
|
||||||
|
integer_column integer
|
||||||
|
);
|
||||||
|
|
||||||
|
-- Listing 17-2: Determining the size of vacuum_test
|
||||||
|
|
||||||
|
SELECT pg_size_pretty(
|
||||||
|
pg_total_relation_size('vacuum_test')
|
||||||
|
);
|
||||||
|
|
||||||
|
-- optional: Determine database size
|
||||||
|
SELECT pg_size_pretty(
|
||||||
|
pg_database_size('analysis')
|
||||||
|
);
|
||||||
|
|
||||||
|
-- Listing 17-3: Inserting 500,000 rows into vacuum_test
|
||||||
|
|
||||||
|
INSERT INTO vacuum_test
|
||||||
|
SELECT * FROM generate_series(1,500000);
|
||||||
|
|
||||||
|
-- Test its size again
|
||||||
|
SELECT pg_size_pretty(
|
||||||
|
pg_table_size('vacuum_test')
|
||||||
|
);
|
||||||
|
|
||||||
|
-- Listing 17-4: Updating all rows in vacuum_test
|
||||||
|
|
||||||
|
UPDATE vacuum_test
|
||||||
|
SET integer_column = integer_column + 1;
|
||||||
|
|
||||||
|
-- Test its size again (35 MB)
|
||||||
|
SELECT pg_size_pretty(
|
||||||
|
pg_table_size('vacuum_test')
|
||||||
|
);
|
||||||
|
|
||||||
|
-- Listing 17-5: Viewing autovacuum statistics for vacuum_test
|
||||||
|
|
||||||
|
SELECT relname,
|
||||||
|
last_vacuum,
|
||||||
|
last_autovacuum,
|
||||||
|
vacuum_count,
|
||||||
|
autovacuum_count
|
||||||
|
FROM pg_stat_all_tables
|
||||||
|
WHERE relname = 'vacuum_test';
|
||||||
|
|
||||||
|
-- To see all columns available
|
||||||
|
SELECT *
|
||||||
|
FROM pg_stat_all_tables
|
||||||
|
WHERE relname = 'vacuum_test';
|
||||||
|
|
||||||
|
-- Listing 17-6: Running VACUUM manually
|
||||||
|
|
||||||
|
VACUUM vacuum_test;
|
||||||
|
|
||||||
|
VACUUM; -- vacuums the whole database
|
||||||
|
|
||||||
|
VACUUM VERBOSE; -- provides messages
|
||||||
|
|
||||||
|
-- Listing 17-7: Using VACUUM FULL to reclaim disk space
|
||||||
|
|
||||||
|
VACUUM FULL vacuum_test;
|
||||||
|
|
||||||
|
-- Test its size again
|
||||||
|
SELECT pg_size_pretty(
|
||||||
|
pg_table_size('vacuum_test')
|
||||||
|
);
|
||||||
|
|
||||||
|
-- SETTINGS
|
||||||
|
|
||||||
|
-- Listing 17-8: Showing the location of postgresql.conf
|
||||||
|
|
||||||
|
SHOW config_file;
|
||||||
|
|
||||||
|
-- Listing 17-10: Show the location of the data directory
|
||||||
|
|
||||||
|
SHOW data_directory;
|
||||||
|
|
||||||
|
-- reload settings
|
||||||
|
-- Mac and Linux: pg_ctl reload -D '/path/to/data/directory/'
|
||||||
|
-- Windows: pg_ctl reload -D "C:\path\to\data\directory\"
|
||||||
|
|
||||||
|
|
||||||
|
-- BACKUP AND RESTORE
|
||||||
|
|
||||||
|
-- Listing 17-11: Backing up the analysis database with pg_dump
|
||||||
|
pg_dump -d analysis -U [user_name] -Fc > analysis_backup.sql
|
||||||
|
|
||||||
|
-- Back up just a table
|
||||||
|
pg_dump -t 'train_rides' -d analysis -U [user_name] -Fc > train_backup.sql
|
||||||
|
|
||||||
|
-- Listing 17-12: Restoring the analysis database with pg_restore
|
||||||
|
|
||||||
|
pg_restore -C -d postgres -U postgres analysis_backup_custom.sql
|
||||||
Loading…
x
Reference in New Issue
Block a user