1 year ago

#345103

test-img

trojaxat

String / Byte difference between Juypter and Vsc with Python


from wordcloud import WordCloud, STOPWORDS
import matplotlib.pyplot as plt
import requests
from bs4 import BeautifulSoup
import requests
import json
import os
import re

# Get page
param_dict = {'db': 'pubmed', 'term': 'escherichia', 'rettype': 'uilist'}
headers = {
    'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'}

link = "https://www.azlyrics.com/lyrics/redhotchilipeppers/nottheone.html"
page = requests.get(link, params=param_dict, headers=headers)

# Lyrics
soup = BeautifulSoup(page.content, 'html.parser')
lyrics_area = soup.find_all('div', class_="col-xs-12 col-lg-8 text-center")
lyrics_area = lyrics_area[0].find_all('div')
lyrics = lyrics_area[5]
lyrics = soup.text.replace("\n", " ")
lyrics = lyrics.replace("|", " ")
lyrics = lyrics.split("Submit Corrections", 1)[0]
lyrics = lyrics.split("Lyrics", 1)[1]

# Title
song_title = link.rsplit('/', 1)[-1].replace(".html", "")
song_title = song_title + '.json'

cleaned_data_lyric = []
regex = re.compile('[^a-zA-Z]')
cleaned_data = lyrics.strip().split(" ")
for word in cleaned_data:
    word = regex.sub('', word)
    cleaned_data_lyric.append(word)
string = ' '.join(cleaned_data_lyric)
string = string.replace(
    "AZLyricscom                          A B C D E F G H I J K L M N O P Q R S T U V W X Y Z           Search                                         ", "")
string = re.sub(' +', ' ', string)

# Wordcloud
wordcloud = WordCloud(
    background_color="white",
    max_words=200,
    stopwords=set(STOPWORDS)).generate(string)
plt.imshow(wordcloud, interpolation='bilinear')
plt.axis("off")
plt.show()

When I run this code in VSC within the conda base then it causes an error at the wordcloud line, where variable string is not considered to be a string, however it does not cause the error in a juypter notebook.

Both are Python 3. Any ideas?

python

string

byte

word-cloud

0 Answers

Your Answer

Accepted video resources