#
# See RandText.html
# in http://www.physics.cornell.edu/~myers/teaching/ComputationalMethods/ComputerExercises/
#
import random
def read_file_into_word_list(filename):
"""The text file can be opened using the "open" function, which returns
a file object (named here as "inputFile"):
inputFile = open(filename, 'r')
See Python docs at https://docs.python.org/2/library/stdtypes.html#file-objects for
basic information on file objects and I/O.
Use inputFile.read() to extract the full text contained in filename,
returned as one long string ("text"), and use text.split() to split the text
into a list of individual words ("words").
Return the list of words from the function.
"""
inputFile = open(filename, 'r')
text = inputFile.read()
words = text.split()
return words
def make_prefix_dictionary(filename):
"""make and return the prefix dictionary based on the text in filename,
where the dictionary maps a pair (2-tuple) of words to a list of words
which follow that pair in the text.
(1) read text from filename into words
(2) create empty prefix dictionary: prefix = {}
(3a) loop over index i in the range(0, len(words)-2), accessing all
word triples in the list
See discussion on control flow in Python for information on loop syntax:
http://scipy-lectures.github.io/intro/language/control_flow.html#for-range
(3b) if the word pair "blah=(words[i], words[i+1])" is not already in the
prefix dictionary ("if blah not in prefix"), then create a new empty
list in the dictionary for that pair ("prefix[blah] = []"). This
prepares the list to have the third word appended, just as if the
completion list were already started.
(3c) whether or not blah was in prefix, append the 3rd word "blee" in
the triple to the list associated with the prefix pair
("prefix[blah].append(blee)")
"""
words = read_file_into_word_list(filename)
prefix = {}
for i in range(len(words)-2):
if (words[i], words[i+1]) not in prefix:
prefix[(words[i], words[i+1])] = []
prefix[(words[i], words[i+1])].append(words[i+2])
return prefix
def make_random_text(filename, num_words=100):
"""make_random_text(filename, num_words) generates and returns num_words of
random text based on the triplets contained in the prefix dictionary
constructed from the specified filename.
(1) make a prefix dictionary from filename using the function defined
above
(2) choose a random starting pair ("current_pair") using the
random.choice function from the set of keys in the prefix dictionary
("random.choice(prefix.keys())")
Type "pydoc random.choice" from the command line (or "help(random.choice)"
from within the interpreter once random has been imported).
(3) initialize a string ("random_text") by concatenating the two
words in current_pair with a space in between
("current_pair[0] + ' ' + current_pair[1]")
(4a) loop over range(num_words-2) to generate the remaining words in
the random text
(4b) check to see if current_pair is not a key in the prefix dictionary
("if current_pair not in prefix"), and break if True [since the last two
words in the input text may not have a suffix]
See Learning Python section 10.2 for information about "break" in Python.
(4c) randomly choose a suffix from the list of words associated with
the current_pair ("random.choice(prefix[current_pair])")
(4d) concatenate to the existing random_text a space and the newly chosen
random word
(4e) set the new current_pair to be a tuple containing the last word
("current_pair[1]") of the old current_pair and the newly chosen word
"""
prefix = make_prefix_dictionary(filename)
current_pair = random.choice(list(prefix.keys()))
random_text = current_pair[0] + ' ' + current_pair[1]
for i in range(num_words-2):
# last two words in document may not have a suffix
if current_pair not in prefix:
break
next = random.choice(prefix[current_pair])
random_text = random_text + ' ' + next
current_pair = (current_pair[1], next)
return random_text
def demo():
texts = {'The Declaration of Independence': '../RandText/declaration.txt',
'The Wizard of Oz': '../RandText/oz.txt',
'The Book of Psalms': '../RandText/psalms.txt'}
for book, filename in list(texts.items()):
print("%s (randomized)\n" % book)
words = make_random_text(filename, 200)
print(words)
print("\n")
if __name__ == "__main__":
demo()
# Copyright (C) Cornell University
# All rights reserved.
# Apache License, Version 2.0
# In[ ]: