Created
March 4, 2019 03:07
-
-
Save spazm/a85f04ebbbf4f3defca4dd6d6bba5490 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
use itertools::Itertools; | |
use std::collections::HashMap; | |
/// Split a string into substrings of length sub_size | |
/// Final element may be shorter than sub_size | |
fn sub_strings(source: &str, sub_size: usize) -> Vec<String> { | |
source | |
.chars() | |
.chunks(sub_size) | |
.into_iter() | |
.map(|chunk| chunk.collect()) | |
.collect() | |
} | |
pub struct CodonsInfo<'a> { | |
names: HashMap<&'a str, &'a str>, | |
} | |
impl<'a> CodonsInfo<'a> { | |
/// translate three character strings describing a protein sequence | |
/// to an Option string of the full name of the amino acid. | |
/// | |
/// The protein sequence should be in uppercase. | |
/// The amino acid name should be in lowercase. | |
/// | |
/// e.g. "AUG" => Some("methionine"). | |
/// | |
/// | Codon | Protein | | |
/// |---------------------|---------------| | |
/// | AUG | Methionine | | |
/// | UUU, UUC | Phenylalanine | | |
/// | UUA, UUG | Leucine | | |
/// | UCU, UCC, UCA, UCG | Serine | | |
/// | UAU, UAC | Tyrosine | | |
/// | UGU, UGC | Cysteine | | |
/// | UGG | Tryptophan | | |
/// | UAA, UAG, UGA | STOP | | |
/// | |
pub fn name_for(&self, codon: &str) -> Option<&'a str> { | |
self.names.get(codon).map(|&s| s) | |
} | |
/// Return a list of protein names that correspond to the RNA string or None if the RNA string is invalid | |
/// | |
/// iterate through rna string by codon (three characters at a time) and | |
/// check for valid amino names | |
/// stop if a STOP codon is reached. | |
pub fn of_rna(&self, rna: &str) -> Option<Vec<&'a str>> { | |
let mut v = vec![]; | |
for codon in sub_strings(rna, 3) { | |
match self.name_for(&codon) { | |
None => break, | |
Some("stop codon") => break, | |
Some(amino_name) => v.push(amino_name), | |
} | |
} | |
if v.is_empty() { | |
None | |
} else { | |
Some(v) | |
} | |
} | |
} | |
/// Consume a collection of pairs of (RNA protein sequence, Amino acid name) to | |
/// create a CodonsInfo object that can map from RNA protein sequence to Amino acid name | |
pub fn parse<'a>(pairs: Vec<(&'a str, &'a str)>) -> CodonsInfo<'a> { | |
let mut names = HashMap::new(); | |
for (rna, amino) in pairs { | |
names.insert(rna, amino); | |
} | |
CodonsInfo { names } | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment