Updated answer to Try It Yourself Ch. 14, problem 2

2024-07-15 07:23:41 -04:00 · 2024-07-15 07:23:41 -04:00 · 712cec0d31
commit 712cec0d31
parent b35f9dff17
1 changed files with 12 additions and 11 deletions
--- a/Try_It_Yourself/Try_It_Yourself.sql
+++ b/Try_It_Yourself/Try_It_Yourself.sql
@ -899,26 +899,27 @@ SELECT (regexp_match('Williams, Sr.', '.*, (.*)'))[1];
 -- regexp_split_to_table() in a subquery to create a table of words to count.
 -- Bonus: remove commas and periods at the end of each word.

-- Answer:
 -- This query uses a Common Table Expression to first separate each word
-- in the text into a separate row in a table named word_list. Then the SELECT
-- statement counts the words, which are cleaned up with two operations. First,
-- several nested replace functions remove commas, periods, and colons. Second,
-- all words are converted to lowercase so that when we count we group words
-- that may appear with various cases (e.g., "Military" and "military").
+-- in the text into a separate row in a table named word_list. As part of the
+-- operation, we use nested replace functions to remove commas, periods, and
+-- colons. Then the SELECT statement counts the words, which we convert
+-- to lowercase so that when we count we group words that may appear with
+-- various cases (e.g., "Military" and "military").
+
+-- Thanks to reader @DavidSwagger for pointing out the replace function
+-- needed to happen in the WITH statement.

 WITH
    word_list (word)
 AS
    (
-        SELECT regexp_split_to_table(speech_text, '\s') AS word
+        SELECT regexp_split_to_table(
+               replace(replace(replace(speech_text, ',', ''), '.', ''), ':', ''),
+               '\s') AS word
        FROM president_speeches
        WHERE speech_date = '1946-01-21'
    )
-
-SELECT lower(
-               replace(replace(replace(word, ',', ''), '.', ''), ':', '')
-             ) AS cleaned_word,
+SELECT lower(word) AS cleaned_word,
       count(*)
 FROM word_list
 WHERE length(word) >= 5