6. Análisis de sentimiento#

El análisis de sentimiento es una técnica de procesamiento de lenguaje natural (NLP) que permite determinar la actitud de un autor respecto a un tema o la polaridad de una opinión.

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import re

from sentiment_analysis_spanish import sentiment_analysis
tweets = pd.read_csv('https://raw.githubusercontent.com/garnachod/TwitterSentimentDataset/master/tweets_clean.txt', header=None, sep='\t')
tweets = tweets.sample(frac=0.1, random_state=42)
tweets.columns = ['tweet']

# feature engineering, extract #, @, and links
tweets['hashtags'] = tweets['tweet'].apply(lambda x: len([x for x in x.split() if x.startswith('#')]))
tweets['mentions'] = tweets['tweet'].apply(lambda x: len([x for x in x.split() if x.startswith('@')]))
tweets['links'] = tweets['tweet'].apply(lambda x: len([x for x in x.split() if x.startswith('http')]))

# remove #, @, and links
tweets['tweet'] = tweets['tweet'].str.replace('#', '')
tweets['tweet'] = tweets['tweet'].str.replace('@', '')
tweets['tweet'] = tweets['tweet'].apply(lambda x: re.sub(r'http\S+|www.\S+', '', x, flags=re.MULTILINE))

tweets
tweet hashtags mentions links
13099 El LG_ES G3 S ya a la venta por 299 euros - 0 1 2
32607 No habrá ningún desahucio más en Madrid sin al... 0 0 2
69261 El partido socialista derogará la reforma labo... 1 1 0
59548 .mdcospedal inicia la campaña electoral del PP... 0 1 1
21659 BuenosDíasUAM!!! "La clave de la educación no ... 3 0 0
... ... ... ... ...
3344 Nada más lindo q estar en casa :) 0 0 0
64788 || Yo no me entero de que ha pasado. :D 0 0 0
69124 Napolitanas con fritas para el bajon :P 0 0 0
34177 Buenas noches. 💜 0 0 1
2680 A las 12:30 conferencia de marianorajoy en el ... 1 3 1

7019 rows × 4 columns

sentiment = sentiment_analysis.SentimentAnalysisSpanish()
tweets['sentiment'] = tweets['tweet'].apply(lambda x: sentiment.sentiment(x))
/home/alejo/.local/lib/python3.10/site-packages/sklearn/base.py:348: InconsistentVersionWarning: Trying to unpickle estimator CountVectorizer from version 0.23.2 when using version 1.3.2. This might lead to breaking code or invalid results. Use at your own risk. For more info please refer to:
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
  warnings.warn(
/home/alejo/.local/lib/python3.10/site-packages/sklearn/base.py:348: InconsistentVersionWarning: Trying to unpickle estimator MultinomialNB from version 0.23.2 when using version 1.3.2. This might lead to breaking code or invalid results. Use at your own risk. For more info please refer to:
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
  warnings.warn(
sentiment.sentiment("Me encanta el fútbol")
0.9094362772997505
sentiment.sentiment("El producto, un desastre")
2.623559185538157e-05