From 6300d7fe5910d3354a84b39151eba139dde98dcf Mon Sep 17 00:00:00 2001 From: Jim Hamski Date: Tue, 27 Feb 2018 10:32:54 -0500 Subject: [PATCH 1/3] Add print for diagnosing the special character issue. --- bool_detect.R | 1 + 1 file changed, 1 insertion(+) diff --git a/bool_detect.R b/bool_detect.R index 27cc605..59e9db7 100644 --- a/bool_detect.R +++ b/bool_detect.R @@ -43,6 +43,7 @@ bool_detect = function(x, b, ignore_case = TRUE, in_word = TRUE, full_word = FAL i = nchar(b) while(i > 0){ item = stri_extract_last(b, regex = '\\b[^\\s\\(\\)\\&\\|]+') # whole word not inc logicals + print(item) if(!is.na(item) & item != ''){ posn = stri_locate_last(b, fixed = item)[1,] # position of last search term orig_item = str_replace_all(item, sep, ' ') From a4a5dbd2f60392dafc158d2175f465bd7ba3d063 Mon Sep 17 00:00:00 2001 From: Jim Hamski Date: Tue, 27 Feb 2018 10:35:21 -0500 Subject: [PATCH 2/3] Print all str_detect calls for diagnosis. --- bool_detect.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bool_detect.R b/bool_detect.R index 59e9db7..500d575 100644 --- a/bool_detect.R +++ b/bool_detect.R @@ -54,7 +54,7 @@ bool_detect = function(x, b, ignore_case = TRUE, in_word = TRUE, full_word = FAL } subs = subs %>% bind_rows() # to data.frame - + print(subs) # convert terms into own str_detect calls for(i in 1:nrow(subs)){ b_head = substr(b0, 1, subs$start[i]-1) From ca642591d74477000494fcd0215f1e8f48da64fe Mon Sep 17 00:00:00 2001 From: Jim Hamski Date: Tue, 27 Feb 2018 10:42:09 -0500 Subject: [PATCH 3/3] Add commented out line 46, which removes word boundary anchor and adds regex for striping out negation "-". --- bool_detect.R | 1 + 1 file changed, 1 insertion(+) diff --git a/bool_detect.R b/bool_detect.R index 500d575..77888b4 100644 --- a/bool_detect.R +++ b/bool_detect.R @@ -43,6 +43,7 @@ bool_detect = function(x, b, ignore_case = TRUE, in_word = TRUE, full_word = FAL i = nchar(b) while(i > 0){ item = stri_extract_last(b, regex = '\\b[^\\s\\(\\)\\&\\|]+') # whole word not inc logicals + #item = stri_extract_last(b, regex = '[^\\s\\(\\)\\&\\|\\-]+') # Jim's modification print(item) if(!is.na(item) & item != ''){ posn = stri_locate_last(b, fixed = item)[1,] # position of last search term