1 year ago
#380785
user14811578
Word2vec TypeError: 'collections.defaultdict' object is not callable
I am trying to build a word2vec using skip-gram and negative sampling However, when I try to build the vocab dictionary, I have encountered an error as written on the title. This is actually a sentiment analysis project, but I plan to use word2vec to carry out features extraction before the sentiment analysis. Can anyone help me to solve this problem? thank you in advance
def vocab(self):
# sentences: list of sentence token lists
# [[['here', 'are', 'two', 'reasons', 'companies', 'fail', 'they', 'only', 'do', 'more', 'of', 'the', 'same', 'or', 'they', 'only', 'do', 'what', 's', 'new']], [[]], ...]
sentences = self.sentences
vocab = defaultdict(dict)
vocab_words = ['int']
vocab['int']['word_count'] = 0
vocab_size = 0
for sent_tokens in sentences:
for word1 in sent_tokens:
vocab_size += len(word1)
for word in word1:
if not word.isdigit() and word not in vocab:
vocab[word]['word_count'] = 1
vocab_words.append(word)
else:
if word.isdigit():
vocab['int']['word_count'] += 1
else:
vocab[word]['word_count'] += 1
low_freq_words = []
for word in vocab:
if vocab[word]['word_count'] < self.min_count:
low_freq_words.append(word)
for word in low_freq_words:
vocab_size -= vocab[word]['word_count']
del vocab[word]
vocab_words.remove(word)
sorted_vocab = []
for word in vocab:
sorted_vocab.append((word, vocab[word]['word_count']))
sorted_vocab.sort(key=lambda tup: tup[1], reverse=True)
for idx, word in enumerate(sorted_vocab):
vocab[word[0]]['word_freq'] = vocab[word[0]]['word_count'] / vocab_size
vocab[word[0]]['word_index'] = idx
return vocab
python
data-science
word2vec
0 Answers
Your Answer