-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathBTM_restar_data.R
More file actions
41 lines (34 loc) · 1.39 KB
/
BTM_restar_data.R
File metadata and controls
41 lines (34 loc) · 1.39 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
library(data.table)
library(udpipe)
## Annotate text with parts of speech tags
data <- read.csv("/Users/lilifang/KCL/KCL_Angus/RE-STAR-Angus-Susie/All interviewees responses_withoutNONE_updatedV17_with_BTM_b.csv",header=TRUE,sep=",",na.strings = NULL)
#data <- data[0:1000,]
data<- data[, c("ids", "new_response")]
dim(data)
data<- data[1:13435,]
#head(data, n = 5)
anno <- data.frame(doc_id = data$ids, text = data$new_response, stringsAsFactors = FALSE)
anno <- udpipe(anno, "english", trace = 500)
biterms <- as.data.table(anno)
biterms <- biterms[, cooccurrence(x = lemma,
relevant = nchar(lemma) > 2 & !lemma %in% stopwords("en"),
skipgram = 3),
by = list(doc_id)]
library(BTM)
set.seed(1234)
traindata <- subset(anno, !lemma %in% stopwords("en") & nchar(lemma) > 2)
traindata <- traindata[, c("doc_id", "lemma")]
model <- BTM(traindata, biterms = biterms, k = 10, iter = 500, background = TRUE, trace = 100)
library(textplot)
library(ggraph)
plot(model, top_n = 20,
title = "BTM model", subtitle = "ASD, ADHD, ASD.ADHD",
labels = c("Topic 0","Topic 1", "Topic 2", "Topic 3",
"Topic 4", "Topic 5",
"Topic 6", "Topic 7",
"Topic 8", "Topic 9","Topic 10"))
library(tidyverse)
data <- drop_na(data)
dim(data)
topicterms <- terms(model, top_n = 20)
topicterms