Skip to content

Commit 45de081

Browse files
committed
dp : texts_connected implemented
1 parent b12cd5c commit 45de081

File tree

5 files changed

+38
-22
lines changed

5 files changed

+38
-22
lines changed

R/diffrproject.R

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,12 +45,26 @@ diffrproject <-
4545
#### methods =============================================================
4646
# add text
4747
text_add = function( rtext, name = NULL ){
48+
4849
# input check
4950
stopifnot("rtext" %in% class(rtext))
51+
52+
# connecting text with other text
53+
if( length(self$texts)>0 ){
54+
next_item <- length(self$texts_connected)+1
55+
last_item <- length(self$texts)
56+
self$texts_connected[[next_item]] <-
57+
list(
58+
self$texts[[last_item]],
59+
rtext
60+
)
61+
}
62+
5063
# working variable creation
5164
names <- names(self$texts)
5265
ids <- vapply(self$texts, `[[`, "", "id")
5366
id <- rtext$id
67+
5468
# doing-duty-to-do
5569
if( is.null(name) ){
5670
next_num <- max(c(as.numeric(text_extract(names, "\\d+")),0))+1
@@ -62,6 +76,7 @@ diffrproject <-
6276
rtext$id <- text_collapse(id, "_", i)
6377
i <- i+1
6478
}
79+
6580
# return self for piping
6681
return(invisible(self))
6782
},

R/rtext.R

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -252,13 +252,18 @@ rtext <-
252252
self$encoding <- "UTF-8"
253253

254254
#### Tokenizer
255+
# assign tokenizer
255256
self$tokenizer <- tokenizer
256257
if( !is.null(tokenize_by) ){
257258
self$tokenizer <-
258259
function(x){
259260
text_tokenize(x, regex = tokenize_by, non_token = TRUE)
260261
}
261262
}
263+
# check if tokenizer is valid
264+
stopifnot( "data.frame" %in% class(self$tokenizer("")) )
265+
stopifnot( dim2(self$tokenizer(""))==4 )
266+
262267

263268
#### Tokenize
264269
private$tokenize()

R/text_tools.R

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -334,6 +334,7 @@ text_extract_all <-
334334
#' function for collapsing text vectors
335335
#' @param x object to be collapsed
336336
#' @param sep separator between text parts
337+
#' @param ... additional passed on to specific methods (e.g. to paste within text_collapse.default())
337338
#' @export
338339
text_collapse <- function (x, ..., sep) {
339340
UseMethod("text_collapse")

dev.R

Lines changed: 15 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -10,28 +10,21 @@ text_files <- list.files(text_path, pattern = "txt", full.names = TRUE)
1010

1111

1212
dp <- diffrproject$new()
13-
14-
for( i in seq_along(text_files)){
15-
tmp <-
16-
rtext$new(
17-
text_file = text_files[i],
13+
dp$text_add(
14+
rtext = rtext$new(
15+
text_file = text_files[1],
1816
encoding = "latin1",
1917
tokenize_by = "\n"
20-
)
21-
dp$text_add(tmp, name=basename(text_files[i]))
22-
}
23-
24-
25-
dp$text_data()
26-
27-
28-
29-
names(dp$texts)
30-
31-
32-
devtools::install_github("petermeissner/diffr")
33-
34-
diffr::diffr(
35-
text1 = dp$texts[[1]]$text_get(),
36-
text2 = dp$texts[[2]]$text_get()
18+
),
19+
name = basename(text_files[1])
20+
)
21+
22+
dp$text_add(
23+
rtext = rtext$new(
24+
text_file = text_files[2],
25+
encoding = "latin1",
26+
tokenize_by = "\n"
27+
),
28+
name = basename(text_files[2])
3729
)
30+

man/text_collapse.Rd

Lines changed: 2 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)