1 year ago

#380785

test-img

user14811578

Word2vec TypeError: 'collections.defaultdict' object is not callable

I am trying to build a word2vec using skip-gram and negative sampling However, when I try to build the vocab dictionary, I have encountered an error as written on the title. This is actually a sentiment analysis project, but I plan to use word2vec to carry out features extraction before the sentiment analysis. Can anyone help me to solve this problem? thank you in advance

def vocab(self):
    # sentences: list of sentence token lists
    # [[['here', 'are', 'two', 'reasons', 'companies', 'fail', 'they', 'only', 'do', 'more', 'of', 'the', 'same', 'or', 'they', 'only', 'do', 'what', 's', 'new']], [[]], ...]
    sentences = self.sentences
    vocab = defaultdict(dict)
    vocab_words = ['int']
    vocab['int']['word_count'] = 0 
    vocab_size = 0
    for sent_tokens in sentences:
        for word1 in sent_tokens:
            vocab_size += len(word1)
            for word in word1:
                if not word.isdigit() and word not in vocab:
                    vocab[word]['word_count'] = 1
                    vocab_words.append(word)
                else:
                    if word.isdigit():
                        vocab['int']['word_count'] += 1 
                    else:
                        vocab[word]['word_count'] += 1
    low_freq_words = []
    for word in vocab:
        if vocab[word]['word_count'] < self.min_count:
            low_freq_words.append(word)
    for word in low_freq_words:
        vocab_size -= vocab[word]['word_count']
        del vocab[word]
        vocab_words.remove(word)
    sorted_vocab = []
    for word in vocab:
        sorted_vocab.append((word, vocab[word]['word_count']))
    sorted_vocab.sort(key=lambda tup: tup[1], reverse=True)
    for idx, word in enumerate(sorted_vocab):
        vocab[word[0]]['word_freq'] = vocab[word[0]]['word_count'] / vocab_size
        vocab[word[0]]['word_index'] = idx
    return vocab

python

data-science

word2vec

0 Answers

Your Answer

Accepted video resources