for sen in sentences: word = sen.split() # space tokenizer input = [word_dict[n] for n in word[:-1]] # create (1~n-1) as input target = word_dict[word[-1]] # create (n) as target, We usually call this 'casual language model'
if __name__ == '__main__': n_step = 2# number of steps, n-1 in paper n_hidden = 2# number of hidden size, h in paper m = 2# embedding size, m in paper
sentences = ["i like dog", "i love coffee", "i hate milk"]
word_list = " ".join(sentences).split() word_list = list(set(word_list)) word_dict = {w: i for i, w inenumerate(word_list)} number_dict = {i: w for i, w inenumerate(word_list)} n_class = len(word_dict) # number of Vocabulary