6
I have a vector of strings
c("YSAHEEHHYDK", "HEHISSDYAGK", "TFAHTESHISK", "ISLGEHEGGGK",
"LSSGYDGTSYK", "FGTGTYAGGEK", "VGASTGYSGLK", "TASGVGGFSTK", "SYASDFGSSAK",
"LYSYYSSTESK")
for each string I would like to replace "Y", "S" or "T" with "pY", "pS" or "pT". But I dont want all the replacements to be in the same final string, I want each replacement to generate a new string, e.g.
"YSAHEEHHYDK" turns into
c("pYSAHEEHHYDK",
"YpSAHEEHHYDK",
"YSAHEEHHpYDK")
4 Answers
Reset to default
4
Using xx
input in the Note at the end (which is as in the question plus some border tests) we use stringi functions. In particular note that stri_sub
can insert a p character. If an input string is empty, i.e. "", or does not contain any of Y, S or T then NA is returned for that string.
library(stringi)
add_p <- function(s, loc) {
start <- loc[, "start"]
stri_sub(s, start, start-1) <- "p"
s
}
Map(add_p, xx, stri_locate_all(xx, regex = "[YST]"))
giving
[1] NA
$ABC
[1] NA
$YSAHEEHHYDK
[1] "pYSAHEEHHYDK" "YpSAHEEHHYDK" "YSAHEEHHpYDK"
$HEHISSDYAGK
[1] "HEHIpSSDYAGK" "HEHISpSDYAGK" "HEHISSDpYAGK"
$TFAHTESHISK
[1] "pTFAHTESHISK" "TFAHpTESHISK" "TFAHTEpSHISK" "TFAHTESHIpSK"
# ...snip...
Note
This is the same as in the question exceept we have added the first two strings.
xx <- c("", "ABC", "YSAHEEHHYDK", "HEHISSDYAGK", "TFAHTESHISK", "ISLGEHEGGGK",
"LSSGYDGTSYK", "FGTGTYAGGEK", "VGASTGYSGLK", "TASGVGGFSTK", "SYASDFGSSAK",
"LYSYYSSTESK")
3
You could write a function in base R:
strings <- c("YSAHEEHHYDK", "HEHISSDYAGK", "TFAHTESHISK", "ISLGEHEGGGK",
"LSSGYDGTSYK", "FGTGTYAGGEK", "VGASTGYSGLK", "TASGVGGFSTK",
"SYASDFGSSAK", "LYSYYSSTESK")
reg <- gregexpr("[YST]", strings)
`regmatches<-`(rep(strings, lengths(reg)),
`attr<-`(unlist(reg), "match.length", 1),
value = paste0('p', unlist(regmatches(strings, reg))))
#> [1] "pYSAHEEHHYDK" "YpSAHEEHHYDK" "YSAHEEHHpYDK" "HEHIpSSDYAGK" "HEHISpSDYAGK"
#> [6] "HEHISSDpYAGK" "pTFAHTESHISK" "TFAHpTESHISK" "TFAHTEpSHISK" "TFAHTESHIpSK"
#> [11] "IpSLGEHEGGGK" "LpSSGYDGTSYK" "LSpSGYDGTSYK" "LSSGpYDGTSYK" "LSSGYDGpTSYK"
#> [16] "LSSGYDGTpSYK" "LSSGYDGTSpYK" "FGpTGTYAGGEK" "FGTGpTYAGGEK" "FGTGTpYAGGEK"
#> [21] "VGApSTGYSGLK" "VGASpTGYSGLK" "VGASTGpYSGLK" "VGASTGYpSGLK" "pTASGVGGFSTK"
#> [26] "TApSGVGGFSTK" "TASGVGGFpSTK" "TASGVGGFSpTK" "pSYASDFGSSAK" "SpYASDFGSSAK"
#> [31] "SYApSDFGSSAK" "SYASDFGpSSAK" "SYASDFGSpSAK" "LpYSYYSSTESK" "LYpSYYSSTESK"
#> [36] "LYSpYYSSTESK" "LYSYpYSSTESK" "LYSYYpSSTESK" "LYSYYSpSTESK" "LYSYYSSpTESK"
#> [41] "LYSYYSSTEpSK"
Created on 2023-02-14 with reprex v2.0.2
You can create a small function to help you out.
my_replace <- function(x){
reg <- gregexpr("[YST]", x)
`regmatches<-`(rep(x, lengths(reg)),
structure(unlist(reg), match.length = 1),
value = paste0('p', unlist(regmatches(x, reg))))
}
3
Perhaps something like this with stringr and purrr.
str_locate_all()
returns a 2-column matrix with start & end of pattern locations, str_sub(string, start) <- "p"
conveniently accepts that same matrix for a start
. Subtracting 1 from current end-column (i.e [1, 1]
becomes [1, 0]
) keeps all existing characters and inserts p
.
library(stringr)
library(purrr)
str_ <- c("YSAHEEHHYDK", "HEHISSDYAGK", "TFAHTESHISK", "ISLGEHEGGGK",
"LSSGYDGTSYK", "FGTGTYAGGEK", "VGASTGYSGLK", "TASGVGGFSTK",
"SYASDFGSSAK", "LYSYYSSTESK")
map2(set_names(str_),
str_locate_all(str_,"Y|S|T"),
function(x, y) {
y[,2] <- y[,2] - 1
str_sub(x, y) <- "p"
x
})
Result as a named list:
#> $YSAHEEHHYDK
#> [1] "pYSAHEEHHYDK" "YpSAHEEHHYDK" "YSAHEEHHpYDK"
#>
#> $HEHISSDYAGK
#> [1] "HEHIpSSDYAGK" "HEHISpSDYAGK" "HEHISSDpYAGK"
#>
#> $TFAHTESHISK
#> [1] "pTFAHTESHISK" "TFAHpTESHISK" "TFAHTEpSHISK" "TFAHTESHIpSK"
#>
#> $ISLGEHEGGGK
#> [1] "IpSLGEHEGGGK"
#>
#> $LSSGYDGTSYK
#> [1] "LpSSGYDGTSYK" "LSpSGYDGTSYK" "LSSGpYDGTSYK" "LSSGYDGpTSYK" "LSSGYDGTpSYK"
#> [6] "LSSGYDGTSpYK"
#>
#> $FGTGTYAGGEK
#> [1] "FGpTGTYAGGEK" "FGTGpTYAGGEK" "FGTGTpYAGGEK"
#>
#> $VGASTGYSGLK
#> [1] "VGApSTGYSGLK" "VGASpTGYSGLK" "VGASTGpYSGLK" "VGASTGYpSGLK"
#>
#> $TASGVGGFSTK
#> [1] "pTASGVGGFSTK" "TApSGVGGFSTK" "TASGVGGFpSTK" "TASGVGGFSpTK"
#>
#> $SYASDFGSSAK
#> [1] "pSYASDFGSSAK" "SpYASDFGSSAK" "SYApSDFGSSAK" "SYASDFGpSSAK" "SYASDFGSpSAK"
#>
#> $LYSYYSSTESK
#> [1] "LpYSYYSSTESK" "LYpSYYSSTESK" "LYSpYYSSTESK" "LYSYpYSSTESK" "LYSYYpSSTESK"
#> [6] "LYSYYSpSTESK" "LYSYYSSpTESK" "LYSYYSSTEpSK"
Created on 2023-02-15 with reprex v2.0.2
2
-
Close, but still not quite correct: YSAHEEHHYDK shouldn't have "pT" in it (third string of
[1] "pYSAHEEHHYDK" "YpSAHEEHHYDK" "YSAHEEHHpTDK"
). This is a deceptively difficult question– jared_mamrot12 hours ago
-
@jared_mamrot, indeed and edited, though now it's just a mimic, and convoluted one, of G. Grothendieck's neat solution.
– margusl11 hours ago
0
A base variant using the method from @G.Grothendieck and @margusl might look like:
sIn <- function(s, i) {
vapply(i, function(j) paste0(substr(s, 0, j-1), "p", substring(s, j)), "")
}
Map(sIn, s, gregexpr("[YST]", s))
#$YSAHEEHHYDK
#[1] "pYSAHEEHHYDK" "YpSAHEEHHYDK" "YSAHEEHHpYDK"
#
#$HEHISSDYAGK
#[1] "HEHIpSSDYAGK" "HEHISpSDYAGK" "HEHISSDpYAGK"
#
#$TFAHTESHISK
#[1] "pTFAHTESHISK" "TFAHpTESHISK" "TFAHTEpSHISK" "TFAHTESHIpSK"
#...
or
sIn2 <- function(s, i) {
`regmatches<-`(rep(s, length(i)), `attr<-`(i, "match.length", 0), value="p")
}
Map(sIn2, s, gregexpr("[YST]", s))
A simplification of the method from @onyambu could be:
i <- gregexpr("[YST]", s)
`regmatches<-`(rep(s, lengths(i)),
`attr<-`(unlist(i), "match.length", 0),
value = "p")
# [1] "pYSAHEEHHYDK" "YpSAHEEHHYDK" "YSAHEEHHpYDK" "HEHIpSSDYAGK" "HEHISpSDYAGK"
# [6] "HEHISSDpYAGK" "pTFAHTESHISK" "TFAHpTESHISK" "TFAHTEpSHISK" "TFAHTESHIpSK"
#[11] "IpSLGEHEGGGK" "LpSSGYDGTSYK" "LSpSGYDGTSYK" "LSSGpYDGTSYK" "LSSGYDGpTSYK"
#[16] "LSSGYDGTpSYK" "LSSGYDGTSpYK" "FGpTGTYAGGEK" "FGTGpTYAGGEK" "FGTGTpYAGGEK"
#[21] "VGApSTGYSGLK" "VGASpTGYSGLK" "VGASTGpYSGLK" "VGASTGYpSGLK" "pTASGVGGFSTK"
#[26] "TApSGVGGFSTK" "TASGVGGFpSTK" "TASGVGGFSpTK" "pSYASDFGSSAK" "SpYASDFGSSAK"
#[31] "SYApSDFGSSAK" "SYASDFGpSSAK" "SYASDFGSpSAK" "LpYSYYSSTESK" "LYpSYYSSTESK"
#[36] "LYSpYYSSTESK" "LYSYpYSSTESK" "LYSYYpSSTESK" "LYSYYSpSTESK" "LYSYYSSpTESK"
#[41] "LYSYYSSTEpSK"
Data
s <- c("YSAHEEHHYDK", "HEHISSDYAGK", "TFAHTESHISK", "ISLGEHEGGGK",
"LSSGYDGTSYK", "FGTGTYAGGEK", "VGASTGYSGLK", "TASGVGGFSTK",
"SYASDFGSSAK", "LYSYYSSTESK")
Not the answer you're looking for? Browse other questions tagged
or ask your own question.
or ask your own question.
|