Marcel Ehrhardt
Free University Berlin, de.NBI
Enrico Seiler
Max Planck Institute for Molecular Genetics
// 5′‑ATGCCACGAGTTGAC‑3′
seqan3::dna4_vector sequence{"ATGCCACGAGTTGAC"_dna4};
// 5′‑TACGGTGCTCAACTG‑3′
auto complement = sequence | seqan3::views::complement;
// 3′‑TACGGTGCTCAACTG‑5′ = "GTCAACTCGTGGCAT"_dna4
auto reverse_complement = complement | std::views::reverse;
// Met-Pro-Arg-Val-Asp = "MPRVD"_aa27
auto amino_acid_sequence = sequence
| seqan3::views::translate_single;
auto scoring_scheme = seqan3::nucleotide_scoring_scheme{
seqan3::match_score{4},
seqan3::mismatch_score{-5}};
auto config =
seqan3::align_cfg::method_global{} |
seqan3::align_cfg::scoring_scheme{scoring_scheme} |
seqan3::align_cfg::gap_cost_affine{
seqan3::align_cfg::open_score{0},
seqan3::align_cfg::extension_score{-1}
} |
seqan3::align_cfg::parallel{5} |
seqan3::align_cfg::output_score |
seqan3::align_cfg::output_alignment;
auto sequence1 = "ATGCCACGAGTTGAC"_dna4;
auto sequence2 = "ACGCCACGAGTGAC"_dna4;
auto sequence_pair = std::tie(sequence1, sequence2);
auto alignments =
seqan3::align_pairwise(sequence_pair, config);
for (seqan3::alignment_result & result: alignments)
{
// ATGCCACGAGTTGAC
// | ||||||||| |||
// ACGCCACGAGT-GAC
auto & [gapped_sequence1, gapped_sequence2]
= result.alignment();
// matches: 13, mismatches: 1, indel: 1
// score: 13 * 4 + 1 * - 5 + 1 * -1 = 46
std::signed_integral auto score = result.score();
}
seqan3::dna4_vector genome{
"ATCTGACGAAGGCTAGCTAGCTAAGGGA"_dna4};
seqan3::fm_index index{genome}; // build index
{ // storing the index
std::ofstream os{"my_index.index", std::ios::binary};
cereal::BinaryOutputArchive oarchive{os};
oarchive(index);
}
{ // loading the index
std::ifstream is{"my_index.index", std::ios::binary};
cereal::BinaryInputArchive iarchive{is};
iarchive(index);
}
auto config =
seqan3::search_cfg::hit_all{} |
seqan3::search_cfg::max_error_total{
seqan3::search_cfg::error_count{1}} |
seqan3::search_cfg::parallel{8} |
seqan3::search_cfg::output_query_id{} |
seqan3::search_cfg::output_reference_id{} |
seqan3::search_cfg::output_reference_begin_position{};
std::vector<seqan3::dna4_vector> queries{
"GCT"_dna4, "ACCC"_dna4};
auto results = seqan3::search(queries, index, config);
for (seqan3::search_result & result : results)
{
// 0 - GCT
size_t query_id = result.query_id();
// ATCTGACGAAGGCTAGCTAGCTAAGGGA
// e.g. 11, 15, 19
size_t position = result.reference_begin_position();
}
.fa,.fq,.embl,.gbk
).sam,.bam
)
seqan3::sequence_file_input fin{"my.fastq"};
for (seqan3::sequence_record & record : fin)
{
// e.g. "sequence1"
std::string id = record.id();
// e.g. "ATCTGACGAAGGCTAGCTAGCTAAGGGA"_dna5
seqan3::dna5_vector sequence = record.sequence();
// e.g. "##!!####!!####!!####!!####!!"_phred42
std::vector<seqan3::phred42> qualities
= record.base_qualities();
}
seqan3::sequence_file_input fin{"my.fastq"};
seqan3::sequence_file_output fout{"my.fasta"};
for (seqan3::sequence_record & record : fin)
{
fout.push_back(record);
}
seqan3::sequence_file_input fin{"my.fastq"};
seqan3::sequence_file_output fout{"my.fasta"};
fout = fin;
test
s of the last release
seqan3::phred94
seqan3::literals
namespace
using namespace seqan3::literals;
seqan3::dna4 adenine = 'A'_dna4;
seqan3::sequence_file_input fin{"my.fastq"};
for (seqan3::sequence_record & record: fin)
std::cout << "id: " << record.id() << '\n';
doc/ APP documentation
include/ APP header files
lib/ external libraries (default: SeqAn3)
src/ APP source files
test/ APP tests
test/
api/ API tests
benchmark/ Benchmarks
cli/ CLI tests
coverage/ Code Coverage
data/ Test Data