@@ -122,13 +122,13 @@ pub struct EnzymeParameters {
122122 pub min_len : usize ,
123123 /// Inclusive
124124 pub max_len : usize ,
125- pub enyzme : Option < Enzyme > ,
125+ pub enzyme : Option < Enzyme > ,
126126}
127127
128128#[ derive( Clone ) ]
129129pub struct Enzyme {
130- // Skip cleaving if the site is followed matching this AA
131- pub skip_suffix : Option < char > ,
130+ // Skip cleaving if the site is followed by one of these AAs
131+ pub skip_suffix : [ bool ; 26 ] ,
132132 // Regex for matching cleavage sites
133133 regex : Regex ,
134134 // Cleave at c-terminal?
@@ -150,7 +150,7 @@ pub struct DigestSite {
150150impl Enzyme {
151151 pub fn new (
152152 cleave : & str ,
153- skip_suffix : Option < char > ,
153+ skip_suffix : & str ,
154154 c_terminal : bool ,
155155 semi_enzymatic : bool ,
156156 ) -> Option < Self > {
@@ -160,27 +160,31 @@ impl Enzyme {
160160 cleave
161161 ) ;
162162 assert ! (
163- skip_suffix
164- . map( |x| VALID_AA . contains( & ( x as u8 ) ) )
165- . unwrap_or( true ) ,
166- "Enzyme cleavage restriction is non-amino acid character: {}" ,
167- skip_suffix. unwrap( ) ,
163+ skip_suffix. chars( ) . all( |x| VALID_AA . contains( & ( x as u8 ) ) ) ,
164+ "Enzyme cleavage restriction contains non-amino acid characters: {}" ,
165+ skip_suffix,
168166 ) ;
169167
170168 // At this point, cleave can be three things: empty, "$", or a string of valid AA's
171169 match cleave {
172170 "" => None ,
173171 "$" => Some ( Enzyme {
174172 regex : Regex :: new ( "$" ) . unwrap ( ) ,
175- skip_suffix : None ,
173+ skip_suffix : [ false ; 26 ] ,
176174 // Allowing this to be set to false could cause unexpected behavior
177175 c_terminal : true ,
178176 // Do not allow strange behavior
179177 semi_enzymatic : false ,
180178 } ) ,
181179 _ => Some ( Enzyme {
182180 regex : Regex :: new ( & format ! ( "[{}]" , cleave. replace( '?' , "" ) ) ) . unwrap ( ) ,
183- skip_suffix,
181+ skip_suffix : {
182+ let mut arr = [ false ; 26 ] ;
183+ for b in skip_suffix. bytes ( ) {
184+ arr[ ( b - b'A' ) as usize ] = true ;
185+ }
186+ arr
187+ } ,
184188 c_terminal,
185189 semi_enzymatic,
186190 } ) ,
@@ -195,10 +199,8 @@ impl Enzyme {
195199 true => mat. end ( ) ,
196200 false => mat. start ( ) ,
197201 } ;
198- if let Some ( skip) = self . skip_suffix {
199- if right < sequence. len ( ) && sequence[ right..] . starts_with ( skip) {
200- continue ;
201- }
202+ if sequence. as_bytes ( ) . get ( right) . map_or ( false , |b| self . skip_suffix [ ( b - b'A' ) as usize ] ) {
203+ continue ;
202204 }
203205 sites. push ( DigestSite {
204206 site : left..right,
@@ -218,7 +220,7 @@ impl Enzyme {
218220
219221impl EnzymeParameters {
220222 pub fn cleavage_sites ( & self , sequence : & str ) -> Vec < DigestSite > {
221- match & self . enyzme {
223+ match & self . enzyme {
222224 Some ( enzyme) => enzyme. cleavage_sites ( sequence) ,
223225 None => {
224226 // Perform a non-specific digest
@@ -260,7 +262,7 @@ impl EnzymeParameters {
260262 }
261263
262264 fn is_semi_enzymatic ( & self ) -> bool {
263- match & self . enyzme {
265+ match & self . enzyme {
264266 Some ( enzyme) => enzyme. semi_enzymatic ,
265267 None => false ,
266268 }
@@ -297,7 +299,7 @@ impl EnzymeParameters {
297299 let mut sites = self . cleavage_sites ( sequence) ;
298300 // Allowing missed_cleavages with non-specific digest causes OOB panics
299301 // in the below indexing code
300- let missed_cleavages = match self . enyzme {
302+ let missed_cleavages = match self . enzyme {
301303 None => 0 ,
302304 _ => self . missed_cleavages ,
303305 } ;
@@ -420,7 +422,7 @@ mod test {
420422 min_len : 2 ,
421423 max_len : 50 ,
422424 missed_cleavages : 0 ,
423- enyzme : Enzyme :: new ( "KR" , Some ( 'P' ) , true , false ) ,
425+ enzyme : Enzyme :: new ( "KR" , "P" , true , false ) ,
424426 } ;
425427
426428 assert_eq ! (
@@ -453,7 +455,7 @@ mod test {
453455 min_len : 0 ,
454456 max_len : 50 ,
455457 missed_cleavages : 1 ,
456- enyzme : Enzyme :: new ( "KR" , Some ( 'P' ) , true , false ) ,
458+ enzyme : Enzyme :: new ( "KR" , "P" , true , false ) ,
457459 } ;
458460
459461 assert_eq ! (
@@ -490,7 +492,7 @@ mod test {
490492 min_len : 0 ,
491493 max_len : 50 ,
492494 missed_cleavages : 2 ,
493- enyzme : Enzyme :: new ( "KR" , Some ( 'P' ) , true , false ) ,
495+ enzyme : Enzyme :: new ( "KR" , "P" , true , false ) ,
494496 } ;
495497
496498 assert_eq ! (
@@ -518,7 +520,7 @@ mod test {
518520 min_len : 2 ,
519521 max_len : 50 ,
520522 missed_cleavages : 0 ,
521- enyzme : Enzyme :: new ( "KR" , None , true , false ) ,
523+ enzyme : Enzyme :: new ( "KR" , "" , true , false ) ,
522524 } ;
523525
524526 assert_eq ! (
@@ -539,7 +541,7 @@ mod test {
539541 min_len : 1 ,
540542 max_len : 50 ,
541543 missed_cleavages : 0 ,
542- enyzme : Enzyme :: new ( "D" , None , false , false ) ,
544+ enzyme : Enzyme :: new ( "D" , "" , false , false ) ,
543545 } ;
544546
545547 assert_eq ! (
@@ -568,7 +570,7 @@ mod test {
568570 min_len : 1 ,
569571 max_len : 50 ,
570572 missed_cleavages : 0 ,
571- enyzme : Enzyme :: new ( "FYWL" , None , true , false ) ,
573+ enzyme : Enzyme :: new ( "FYWL" , "" , true , false ) ,
572574 } ;
573575
574576 assert_eq ! (
@@ -594,7 +596,7 @@ mod test {
594596 min_len : 5 ,
595597 max_len : 5 ,
596598 missed_cleavages : 0 ,
597- enyzme : None ,
599+ enzyme : None ,
598600 } ;
599601
600602 assert_eq ! (
@@ -623,7 +625,7 @@ mod test {
623625 min_len : 5 ,
624626 max_len : 7 ,
625627 missed_cleavages : 0 ,
626- enyzme : Enzyme :: new ( "" , None , true , false ) ,
628+ enzyme : Enzyme :: new ( "" , "" , true , false ) ,
627629 } ;
628630
629631 assert_eq ! (
@@ -644,7 +646,7 @@ mod test {
644646 min_len : 0 ,
645647 max_len : usize:: MAX ,
646648 missed_cleavages : 0 ,
647- enyzme : Enzyme :: new ( "$" , None , true , false ) ,
649+ enzyme : Enzyme :: new ( "$" , "" , true , false ) ,
648650 } ;
649651
650652 assert_eq ! (
@@ -665,7 +667,7 @@ mod test {
665667 min_len : 2 ,
666668 max_len : usize:: MAX ,
667669 missed_cleavages : 0 ,
668- enyzme : Enzyme :: new ( "KR" , None , true , false ) ,
670+ enzyme : Enzyme :: new ( "KR" , "" , true , false ) ,
669671 } ;
670672
671673 assert_eq ! (
@@ -688,7 +690,7 @@ mod test {
688690 min_len : 2 ,
689691 max_len : 50 ,
690692 missed_cleavages : 0 ,
691- enyzme : Enzyme :: new ( "KR" , None , true , true ) ,
693+ enzyme : Enzyme :: new ( "KR" , "P" , true , true ) ,
692694 } ;
693695
694696 assert_eq ! (
@@ -738,7 +740,7 @@ mod test {
738740 min_len : 3 ,
739741 max_len : 50 ,
740742 missed_cleavages : 1 ,
741- enyzme : Enzyme :: new ( "KR" , None , true , true ) ,
743+ enzyme : Enzyme :: new ( "KR" , "P" , true , true ) ,
742744 } ;
743745
744746 for ( digest, expected) in tryp
@@ -794,7 +796,7 @@ mod test {
794796 min_len : 3 ,
795797 max_len : 50 ,
796798 missed_cleavages : 2 ,
797- enyzme : Enzyme :: new ( "KR" , None , true , true ) ,
799+ enzyme : Enzyme :: new ( "KR" , "" , true , true ) ,
798800 } ;
799801
800802 for digest in tryp. digest ( & sequence, Arc :: default ( ) ) {
0 commit comments