#lang racket
(require csc151)
(require rackunit)
(require racket/undefined)

;; CSC 151.02 (Fall 2020, Term 2)
;; Lab: Hash Tables (Side B)
;;   https://rebelsky.cs.grinnell.edu/Courses/CSC151/2020Fa2/labs/hash-tables
;; Authors: YOUR NAMES HERE
;; Date: THE DATE HERE
;; Acknowledgements:
;;   ACKNOWLEDGEMENTS HERE

; +---------------+--------------------------------------------------
; | Provided code |
; +---------------+

;;; (common-form str) -> string?
;;;   str : string?
;;; Convert `str` to a common form for use as, say, a key in
;;; a hash table.
;;;   If provided with a non-string, just returns the value.
(define common-form
  (lambda (str)
    (if (string? str)
        (string-downcase (list->string (filter (lambda (ch)
                                                 (or (char-numeric? ch)
                                                     (char-alphabetic? ch)))
                                               (string->list str))))
        str)))

;;; (new-make-hash pairs) -> hash?
;;;   pairs : list-of pair?
;;; Create a hash table that uses the common form of the keys in pairs.
(define new-make-hash
  (lambda (pairs)
    (make-hash (map (lambda (pair)
                      (cons (common-form (car pair))
                            (cdr pair)))
                    pairs))))

;;; (new-hash-set! hash key value) -> void?
;;;   hash : hash?
;;;   key : any?
;;;   value : any?
;;; Sets a value in a hash using the common form of string keys.
(define new-hash-set!
  (lambda (hash key value)
    (hash-set! hash (common-form key) value)))

;;; (new-hash-ref! hash key) -> any?
;;;   hash : hash?
;;;   key : any?
;;; Look up a value using the common form of key.
(define new-hash-ref
  (lambda (hash key)
    (hash-ref hash (common-form key))))

;;; (new-hash-has-key? hash key) -> boolean?
;;;   hash : hash
;;;   key : any?
;;; Determine if hash has the common form of key.
(define new-hash-has-key?
  (lambda (hash key)
    (hash-has-key? hash (common-form key))))

;;; (new-directory) -> hash?
;;; Creates a directory of students usable by the following procedures.
;;; (DrRacket has a `make-directory` procedure with a different meaning,
;;; so we call this `new-directory` instead.
(define new-directory
  make-hash)

;;; (directory-add-entry! dir name phone username address) -> void?
;;;   dir : hash?
;;;   name : string?
;;;   phone : string?
;;;   address : string?
;;; Add an entry to our global student directory.
(define directory-add-entry!
  (lambda (dir name phone username address)
    (new-hash-set! dir name (vector phone username address))))

;;; (directory-lookup-phone dir name) -> string?
;;;   name : string?
;;; Look up the phone number associated with a student in our global
;;; student directory.
(define directory-lookup-phone
  (lambda (dir name)
    (and (new-hash-has-key? dir name)
         (vector-ref (new-hash-ref dir name) 0))))

;;; (directory-lookup-username dir name) -> string?
;;;   name : string?
;;; Look up the username associated with a student in our global
;;; student directory.
(define directory-lookup-username
  (lambda (dir name)
    (and (new-hash-has-key? dir name)
         (vector-ref (new-hash-ref dir name) 1))))

;;; (directory-lookup-address dir name) -> string?
;;;   name : string?
;;; Look up the address associated with a student in our global 
;;; student directory.
(define directory-lookup-address
  (lambda (dir name)
    (and (new-hash-has-key? dir name)
         (vector-ref (new-hash-ref dir name) 2))))

;;; (new-table) -> hash?
;;; Creates a table of student contact info usable by the following
;;; procedures.
(define new-table
  make-hash)

;;; (table-add-entry! table name phone username address) -> void?
;;;   table : hash?
;;;   name : string?
;;;   phone : string?
;;;   address : string?
;;; Add an entry to our global student table.
(define table-add-entry!
  (lambda (table name phone username address)
    (new-hash-set! table (string-append name "000" "phone") phone)
    (new-hash-set! table (string-append name "000" "username") username)
    (new-hash-set! table (string-append name "000" "address") address)))

;;; (table-lookup table name attribute) -> string?
;;;   table : hash?
;;;   name : string?
;;;   attribute : string?
;;; Look up the attribute (column name) for a particular name.
;;; Returns the attribute, if found, or #f if not found.
(define table-lookup
  (lambda (table name attribute)
    (let ([key (string-append name "000" attribute)])
      (and (new-hash-has-key? table key)
           (new-hash-ref table key)))))

;;; (table-lookup-phone table name) -> string?
;;;   name : string?
;;; Look up the phone number associated with a student in table.
(define table-lookup-phone
  (section table-lookup <> <> "phone"))

;;; (table-lookup-username dir name) -> string?
;;;   name : string?
;;; Look up the username associated with a student in our global
;;; student table.
(define table-lookup-username
  (section table-lookup <> <> "username"))

;;; (table-lookup-address dir name) -> string?
;;;   name : string?
;;; Look up the address associated with a student in our global
;;; student table.
(define table-lookup-address
  (section table-lookup <> <> "address"))

#| A |#

; +---------------------------+--------------------------------------
; | Exercise 1: A basic table |
; +---------------------------+

; +-------------------------------------+----------------------------
; | Exercise 2: An immutable hash table |
; +-------------------------------------+

; +---------------------------------------------------------+--------
; | Exercise 3: More experiments with immutable hash tables |
; +---------------------------------------------------------+

#| B |#

; +---------------------------------------------+--------------------
; | Exercise 4: A return to mutable hash tables |
; +---------------------------------------------+

#|
Let's try repeating some of this experiments with a mutable hash
table.  We'll be using our list of characters and protagonists.

    Protagonist          Sidekick
    -----------          --------
    Peabody              Sherman 
    Yogi                 Booboo 
    Secret Squirrel      Morocco Mole 
    Tennessee Tuxedo     Chumley 
    Quick Draw McGraw    Baba Looey 
    Dick Dastardly       Muttley 
    Bullwinkle           Rocky 
    Bart Simpson         Milhouse Van Houten 
    Asterix              Obelix 
    Strong Bad           The Cheat 
|#

#|
a. Create a *mutable* hash table, `sidekick-protagonists` that
associates sidekicks with their protagonists (rather than vice
versa).  Here are a few lines to get you started.
|#

(define sidekick-protagonists (make-hash))
(hash-set! sidekick-protagonists "Sherman" "Peabody")
(hash-set! sidekick-protagonists "Booboo" "Yogi")

#|
b. What do you expect as the final result of the second of the
following two expressions?  (You should assume that both expressions
are evaluated and that `sidekick-protagonists` is defined as in the
previous exercise.)

    > (hash-set! sidekick-protagonists "Shaggy" "Scooby Doo")
    > (hash-ref sidekick-protagonists "Shaggy")

<TODO: Enter your answer where>

Check your answer experimentally.

<TODO: Enter your experiment from the interactions pane>
|#

#|
c. What do you expect as the result of the following expression?

   > (hash-ref (hash-set! sidekick-protagonists "Shaggy" "Scooby Doo")
               "Shaggy")

<TODO: Enter your answer where>

Check your answer experimentally.

<TODO: Enter your experiment from the interactions pane>
|#

#|
d. What do you expect as the final result of the second of the
following two expressions?  

    > (hash-remove! sidekick-protagonists "The Cheat")
    > (hash-ref sidekick-protagonists "The Cheat")

<TODO: Enter your answer where>

Check your answer experimentally.

<TODO: Enter your experiment from the interactions pane>
|#

#|
e. What do you expect as the result of the following expression?

    > (hash-ref (hash-remove! sidekick-protagonists "The Cheat"))

<TODO: Enter your answer where>

Check your answer experimentally.

<TODO: Enter your experiment from the interactions pane>
|#

; +-----------------------------------------------+------------------
; | Exercise 5: Mutable vs. immutable hash tables |
; +-----------------------------------------------+

#|
You've now experimented a bit with both mutable and immutable hash tables.  
Spend a few minutes discussing with your partner what you see as the
relative benefits of mutable and immutable hash tables.

<TODO: Enter youro answer here>
|#

; +----------------------------+-------------------------------------
; | Exercise 6: Counting words |
; +----------------------------+

#|
You've seen that the `tally-value` lets us count the number of times
a particular value appears in a list.  What if we want to count each
different word in the list?  We could go through the list once, tallying
each word separately.  But that seems inefficient.

Here's a better solution: We can use a hash table to keep track of
the count of each word.

Let's assume that `word-counts` is a hash table whose keys are strings
and whose values are numbers.
|#

#|
a. Write a procedure `(add-word! counts word)` with the following
behavior.

* If `word` appears in `counts`, grab the count associated with
  `word`, add 1, and store the new value back in `counts`.
* If `word` does not appear in `counts`, create a new entry
  in word counts whose key is `word` and whose value is 1.

For example,

    > (define word-counts (make-hash))
    > word-counts
    '#hash()
    > (add-word! word-counts "example")
    > (add-word! word-counts "snow")
    > word-counts
    '#hash(("example" . 1) ("snow" . 1))
    > (add-word! word-counts "example")
    > word-counts
    '#hash(("example" . 2) ("snow" . 1))
|#

;;; (add-word! counts word) -> void?
;;;   counts : hash of string-number?
;;;   word : string?
;;; Add a word to a hash table.
(define add-word! 
  (lambda (counts word)
    undefined))

#|
b. What if we want to add lots of words.  That seems to be a task
for `map`, doesn't it?  Give it a try.  Use `map` to add the words
in the following list to `word-counts`.

   (list "cat" "and" "hat" "and" "rat")

<TODO: Enter you code and the result>
|#

#|
c. Here's what we got.

    > (map (section add-word! word-counts <>) (list "cat" "and" "hat" "and" "rat"))
    '(#<void> #<void> #<void> #<void> #<void>)
    > word-counts
    '#hash(("and" . 2) ("cat" . 1) ("example" . 2) ("hat" . 1) ("rat" . 1) ("snow" . 1))

It seems to have worked, but we've also ended up with a list of these
strange `#<void>` values.  Why?  Because `add-word!`, like `hash-set!`,
returns nothing.  For situations like this, in which our primary goal
is to change an underlying structure, Racket provides a procedure
called `map`.  

_Determine experimentally what happens when we use `for-each` rather
than `map`._

<TODO: Enter your code and the result>
|#

#|
d. It seems worthwhile to work with a list that is slightly longer
than our five-word list but shorter than the much longer lists that
we get from a full novel.  In your definitions pane, add a definition
for `eyre-a-words`, which gives all of the words in Jane Eyre with
the lowercase letter "w".  Jane Eyre can be found at
   http://www.gutenberg.org/files/1260/1260-0.txt
|#

(define eyre-a-words
  undefined)

#|
e. Reset `word-counts` to an empty hash table and then use `for-each`
and `add-word!` to add all the words in `eyre-w-words`.  That hash
table should then be short enough to view in the interactions pane.
Can you easily tell which is the most frequent word?

<TODO: What is the most frequent word?  How do you know.>
|#

#| A |#

; +--------------------------+---------------------------------------
; | Exercise 7: Strange code |
; +--------------------------+

#| AB |#

; +---------------------------+--------------------------------------
; | For those with extra time |
; +---------------------------+

#|
If you find that you have extra time, you might consider one or more
of the following exercises.
|#

; +------------------------------------+-----------------------------
; | Extra 1: Counting words, revisited |
; +------------------------------------+

#|
Write a procedure, `(count-words fname)`, that returns a hash table
of the word frequencies in the file named by `fname`.  As you've
likely figured out from the previous exercises, your procedure
should probably,

* Create a new hash table (most likely, with a `let`).
* Read all of the words in the file (most likely, with `file->words`).
* Use the hash table to count the words in the file (using `for-each`
  and `add-word!`).
* Return the hash table.
|#

; +----------------------------+-------------------------------------
; | Extra 2: Most common words |
; +----------------------------+

#|
Write a procedure `(most-common-words fname n)`, that reads all of
the words in the file and returns the `n` most common words in
the file.  You will likely want to create a hash table for all
of the word counts (see extra 1), turn that into a list of
word/count lists, sort that list, and take the first `n` elements.
|#

