« Analyse de sentiments avec R » : différence entre les versions

Version du 1 décembre 2014 à 01:58

Introduction

Analyse de sentiments avec sentiment

Installation

# Package installation and test

install.packages("devtools")
library("devtools")
install_github("sentiment", "andrie")
library("sentiment")

On peut tester un peu et regarder les dictionnaires qui viennent avec

sentiment(c("There is a terrible mistake in this work", "This is wonderful!"))
# Two dictonaries for English (built-in)
afinn96
afinn111
# Simple test
sentiment(c("There is a terrible mistake in this work", "This is wonderful!", "this is bloody brilliant"))

Analyse de papiers de position sur EduTech Wiki Anglais

# ---- set working directory
# Linux
# setwd ("~/schneide/methodo/R")
# Windows
# setwd("s:/methodo/R")
setwd("c:/dks/methodo/R")
getwd()

library("sentiment")
library(tm)
library(XML)
library(tm.plugin.webmining)

# Get list of position papers from EduTechpospap
cat_pospap <- "http://edutechwiki.unige.ch/mediawiki/api.php?action=query&list=categorymembers&cmtitle=Category:Position_paper&cmlimit=500&cmtype=page&format=xml"
XML_list <- xmlTreeParse(cat_pospap,useInternalNodes = TRUE) 
XML_list
XML2_list <- xpathSApply(XML_list, "//cm")
title_list = sapply(XML2_list, function(el) xmlGetAttr(el, "title"))
id_list = sapply(XML2_list, function(el) xmlGetAttr(el, "pageid"))
title_list[[1]]
id_list[[1]]

# --- Identify the URLs for each page (article)
# début et fin de l'URL. Notez le "pageid" qui va nous sortir un article avec sa "pageid"
url_en_start <- "http://edutechwiki.unige.ch/mediawiki/api.php?action=parse&pageid="
url_en_end <- "&format=xml"
article_ids_list <- character(length(id_list))

for (i in 1:length(id_list)) {
  article_ids_list[i] <- (paste (url_en_start, id_list[i], url_en_end, sep=""))
}
# This is the list of articles
article_ids_list

# Define a reader function that will only read the "text" element
readMWXML <- 
  readXML (spec = list (content = list ("node", "//text"),
                        heading = list ("attribute", "//parse/@title")
  ),
  doc=PlainTextDocument())

# ----- download the page contents
pospap.source <- VCorpus(URISource(article_ids_list, encoding="UTF-8"),
                       readerControl=list(reader=readMWXML, language="en"))
names(pospap.source)

# On change les "id" (titres à la place d'URLs illisibles)
for (j in seq.int (pospap.source)) {
  meta(pospap.source[[j]],"id") <- title_list[j]
}

names(pospap.source)

# Ajouter une balise html autour du tout - c'est du bon vodoo
pospap.source <- tm_map (pospap.source, encloseHTML)
# Ecrire les fragments HTML dans des fichiers (inutile, mais permet l'inspection)
writeCorpus(pospap.source, path="./wiki_pospap_source")

# ------------------------------- Clean text into bags of words

pospap.cl1 <- tm_map(pospap.source, content_transformer(tolower))
pospap.cl2 <- tm_map(pospap.cl1, content_transformer(extractHTMLStrip))
pospap.cl2 <- tm_map (pospap.cl2, removePunctuation, preserve_intra_word_dashes = TRUE)
# curly quotes = \u2019
(kill_chars <- content_transformer (function(x, pattern) gsub(pattern, " ", x)))
pospap.cl2 <- tm_map (pospap.cl2, kill_chars, "\u2019")
pospap.cl2 <- tm_map (pospap.cl2, kill_chars,"'")
pospap.cl2 <- tm_map (pospap.cl2, kill_chars,"\\[modifier\\]")
pospap.cl2 <- tm_map (pospap.cl2, kill_chars,"[«»”“\"]")

pospap.essence <- tm_map (pospap.cl2, removeWords, stopwords("english"))
pospap.roots <- tm_map (pospap.essence, stemDocument, language="english")
pospap.roots <- tm_map (pospap.roots, stripWhitespace)

# test
pospap.roots[[1]]
class(pospap.racines)

for (docN in seq.int (pospap.roots)) {
  print (paste ( pospap.roots[[docN]]$meta$heading,
         " = ",
         sentiment (pospap.roots[[docN]]$content )))
}

@@ Ligne 11 : / Ligne 11 : @@
 install_github("sentiment", "andrie")
 library("sentiment")
-<source>
+</source>
 On peut tester un peu et regarder les dictionnaires qui viennent avec
@@ Ligne 21 : / Ligne 21 : @@
 # Simple test
 sentiment(c("There is a terrible mistake in this work", "This is wonderful!", "this is bloody brilliant"))
-<source>
+</source>
 == Analyse de papiers de position sur EduTech Wiki Anglais ==
@@ Ligne 112 : / Ligne 112 : @@
 }
 </source>
-sentiment(as.character(pospap.roots[[1]]))

« Analyse de sentiments avec R » : différence entre les versions

Version du 1 décembre 2014 à 01:58

Sommaire

Introduction

Analyse de sentiments avec sentiment

Installation

Analyse de papiers de position sur EduTech Wiki Anglais

Menu de navigation

« Analyse de sentiments avec R » : différence entre les versions

Version du 1 décembre 2014 à 01:58

Introduction

Analyse de sentiments avec sentiment

Installation

Analyse de papiers de position sur EduTech Wiki Anglais

Menu de navigation

Rechercher

« Analyse de sentiments avec R » : différence entre les versions