1 year ago
#345103
trojaxat
String / Byte difference between Juypter and Vsc with Python
from wordcloud import WordCloud, STOPWORDS
import matplotlib.pyplot as plt
import requests
from bs4 import BeautifulSoup
import requests
import json
import os
import re
# Get page
param_dict = {'db': 'pubmed', 'term': 'escherichia', 'rettype': 'uilist'}
headers = {
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'}
link = "https://www.azlyrics.com/lyrics/redhotchilipeppers/nottheone.html"
page = requests.get(link, params=param_dict, headers=headers)
# Lyrics
soup = BeautifulSoup(page.content, 'html.parser')
lyrics_area = soup.find_all('div', class_="col-xs-12 col-lg-8 text-center")
lyrics_area = lyrics_area[0].find_all('div')
lyrics = lyrics_area[5]
lyrics = soup.text.replace("\n", " ")
lyrics = lyrics.replace("|", " ")
lyrics = lyrics.split("Submit Corrections", 1)[0]
lyrics = lyrics.split("Lyrics", 1)[1]
# Title
song_title = link.rsplit('/', 1)[-1].replace(".html", "")
song_title = song_title + '.json'
cleaned_data_lyric = []
regex = re.compile('[^a-zA-Z]')
cleaned_data = lyrics.strip().split(" ")
for word in cleaned_data:
word = regex.sub('', word)
cleaned_data_lyric.append(word)
string = ' '.join(cleaned_data_lyric)
string = string.replace(
"AZLyricscom A B C D E F G H I J K L M N O P Q R S T U V W X Y Z Search ", "")
string = re.sub(' +', ' ', string)
# Wordcloud
wordcloud = WordCloud(
background_color="white",
max_words=200,
stopwords=set(STOPWORDS)).generate(string)
plt.imshow(wordcloud, interpolation='bilinear')
plt.axis("off")
plt.show()
When I run this code in VSC within the conda base then it causes an error at the wordcloud line, where variable string is not considered to be a string, however it does not cause the error in a juypter notebook.
Both are Python 3. Any ideas?
python
string
byte
word-cloud
0 Answers
Your Answer