6. Análisis de sentimiento#
El análisis de sentimiento es una técnica de procesamiento de lenguaje natural (NLP) que permite determinar la actitud de un autor respecto a un tema o la polaridad de una opinión.
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import re
from sentiment_analysis_spanish import sentiment_analysis
tweets = pd.read_csv('https://raw.githubusercontent.com/garnachod/TwitterSentimentDataset/master/tweets_clean.txt', header=None, sep='\t')
tweets = tweets.sample(frac=0.1, random_state=42)
tweets.columns = ['tweet']
# feature engineering, extract #, @, and links
tweets['hashtags'] = tweets['tweet'].apply(lambda x: len([x for x in x.split() if x.startswith('#')]))
tweets['mentions'] = tweets['tweet'].apply(lambda x: len([x for x in x.split() if x.startswith('@')]))
tweets['links'] = tweets['tweet'].apply(lambda x: len([x for x in x.split() if x.startswith('http')]))
# remove #, @, and links
tweets['tweet'] = tweets['tweet'].str.replace('#', '')
tweets['tweet'] = tweets['tweet'].str.replace('@', '')
tweets['tweet'] = tweets['tweet'].apply(lambda x: re.sub(r'http\S+|www.\S+', '', x, flags=re.MULTILINE))
tweets
tweet | hashtags | mentions | links | |
---|---|---|---|---|
13099 | El LG_ES G3 S ya a la venta por 299 euros - | 0 | 1 | 2 |
32607 | No habrá ningún desahucio más en Madrid sin al... | 0 | 0 | 2 |
69261 | El partido socialista derogará la reforma labo... | 1 | 1 | 0 |
59548 | .mdcospedal inicia la campaña electoral del PP... | 0 | 1 | 1 |
21659 | BuenosDíasUAM!!! "La clave de la educación no ... | 3 | 0 | 0 |
... | ... | ... | ... | ... |
3344 | Nada más lindo q estar en casa :) | 0 | 0 | 0 |
64788 | || Yo no me entero de que ha pasado. :D | 0 | 0 | 0 |
69124 | Napolitanas con fritas para el bajon :P | 0 | 0 | 0 |
34177 | Buenas noches. 💜 | 0 | 0 | 1 |
2680 | A las 12:30 conferencia de marianorajoy en el ... | 1 | 3 | 1 |
7019 rows × 4 columns
sentiment = sentiment_analysis.SentimentAnalysisSpanish()
tweets['sentiment'] = tweets['tweet'].apply(lambda x: sentiment.sentiment(x))
/home/alejo/.local/lib/python3.10/site-packages/sklearn/base.py:348: InconsistentVersionWarning: Trying to unpickle estimator CountVectorizer from version 0.23.2 when using version 1.3.2. This might lead to breaking code or invalid results. Use at your own risk. For more info please refer to:
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
warnings.warn(
/home/alejo/.local/lib/python3.10/site-packages/sklearn/base.py:348: InconsistentVersionWarning: Trying to unpickle estimator MultinomialNB from version 0.23.2 when using version 1.3.2. This might lead to breaking code or invalid results. Use at your own risk. For more info please refer to:
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
warnings.warn(
sentiment.sentiment("Me encanta el fútbol")
0.9094362772997505
sentiment.sentiment("El producto, un desastre")
2.623559185538157e-05