Tutoriel koRpus/test script

De EduTech Wiki
Aller à : navigation, rechercher

library("koRpus")

# Exemple à adapter !

# fichier_exemple = "/home/schneide/schneide/methodo/iramuteq/w_txt/Alice.txt"
setwd("c:/dks/methodo")
getwd()
fichier_exemple = "./iramuteq/w_txt/Alice.txt"

# Simple tokenizing
tagged.text.obj <- tokenize(fichier_exemple, 
                            lang="en",
                            detect=c(parag=TRUE, hline=TRUE))
taggedText(tagged.text.obj)

# Avec TreeTagger
tagged.text.objS4 <- treetag(fichier_exemple, 
            treetagger="manual",
            lang="fr", 
            TT.options= list (path="C:/soft/TreeTagger", preset="fr-utf8"))

# tagged.text2.objS4 <- treetag(fichier_exemple, 
#                            treetagger="manual",
#                             lang="fr", 
#                             TT.options= list (path="/home/schneide/tree-tagger/", preset="fr-utf8"))

ARI (tagged.text.objS4)

readability(tagged.text.objS4, hyphen=NULL,
            index=c("ARI", "Bormuth", "Coleman", "Coleman.Liau", "Dale.Chall", "Danielson.Bryan",
                    "Dickes.Steiwer","DRP", "ELF",
                    "Farr.Jenkins.Paterson", "Flesch", "Flesch.Kincaid", "FOG", "FORCAST", "Fucks",
                    "Harris.Jacobson", "Linsear.Write", "LIX",
                    "nWS", "RIX", "SMOG", "Spache", "Strain", "Traenkle.Bailer", "TRI",
                    "Wheeler.Smith")
)

set.kRp.env (TT.cmd = "/home/schneide/tree-tagger/cmd/tree-tagger-french", lang="fr-utf8")

ARI ("/mnt/schneide/methodo/iramuteq/w_txt/Alice.txt",
     tagger = "kRp.env")
ARI ("/mnt/schneide/methodo/iramuteq/w_txt/Activate.txt", tagger = "kRp.env")
ARI ("/mnt/schneide/methodo/iramuteq/w_txt/Mon-entretien-d-embauche.txt", tagger = "kRp.env")

MTLD (tagged.text.objS4)
MTLD (tagged.text2.objS4)

MTLD ("/mnt/schneide/methodo/iramuteq/w_txt/Mon-entretien-d-embauche.txt")

lex.div (tagged.text.objS4)

R.ld(tagged.text.objS4)
R.ld(tagged.text2.objS4)

plot(tagged.text2.objS4)