Skip to content

Commit

Permalink
Merge branch 'master' of ssh://igit.odena.eu:5000/franklin/asgart
Browse files Browse the repository at this point in the history
  • Loading branch information
delehef committed Sep 24, 2019
2 parents ca63f68 + fe439e5 commit 7cc43ff
Show file tree
Hide file tree
Showing 18 changed files with 1,286 additions and 843 deletions.
40 changes: 32 additions & 8 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -238,18 +238,24 @@ plots, flat plots, genome plots and Circos plots.
- `--features FILE` add an additional track containing features to
plot alongside the duplications.

- `--restrict-fragments A B ...` only plots fragments whose names are given
- `--restrict-fragments A B ...` only plots fragments whose names
are given

- `--exclude-fragments A B ...` do not plot fragments whose names are given
- `--exclude-fragments A B ...` do not plot fragments whose names
are given

- `--filter-features DISTANCE` don't plot duplications that are
farther away then `DISTANCE` bp from the features in the track.

- `--colorize TYPE` set the method used to colorize the duplicons. Options are
`by-type` (different colors for direct and palindromic duplications); `by-position` (color
depends on the duplication position within the input file(s)); `by-fragment` (each
duplication is colorized according to its left-most duplicons); `none` (all are drawn in
medium grey).
- `--min-thickness` set the minimal graphical width of a duplicon
(default: 0.1)

- `--colorize TYPE` set the method used to colorize the duplicons.
Options are `by-type` (different colors for direct and palindromic
duplications); `by-position` (color depends on the duplication
position within the input file(s)); `by-fragment` (each
duplication is colorized according to its left-most duplicons);
`none` (all are drawn in medium grey).

### Features File Format

Expand Down Expand Up @@ -340,13 +346,31 @@ manually replaced.

_Please note that ASGART following the [semver](https://semver.org/) versioning scheme, where an increase in the major version number reflects a non backward-compatible update._

## v2.1.0
## v2.1.1

- Various minor refactoring & bug-fixes

## v2.1.1

- `asgart-concat` has been renamed to `asgart-cat`
- `asgart-cat` now offers filtering options
- `asgart-cat` now takes advantage of multi-cores CPU when possible
- `asgart-plot` now offers more filtering options
- `asgart-plot` now let the user customizes the minimal graphical
width of a duplicon with `--min-thickness`
- `asgart-plot` now offer several algorithms to set duplicons colors
- Various bug-fixes

## v2.1.1
- Fix manifest file

## v2.1.0

- Ensure that multiple fragments in a mFASTA file are processed separately
- Add a flag to specify the minimum width of a chord
- Add filtering options
- Add tooltips to chord graphs
- Fix output files naming scheme

## v2.0.2

Expand Down
207 changes: 129 additions & 78 deletions src/automaton.rs
Original file line number Diff line number Diff line change
@@ -1,15 +1,14 @@
extern crate rayon;
extern crate indicatif;
extern crate rayon;

use std::cmp;
use std::fmt;

use super::divsufsort::*;
use super::structs::{RunSettings, ProtoSD, ProtoSDsFamily};
use super::searcher::Searcher;
use std::sync::atomic::{AtomicUsize, Ordering};
use super::structs::{ProtoSD, ProtoSDsFamily, RunSettings};
use rayon::prelude::*;

use std::sync::atomic::{AtomicUsize, Ordering};

#[derive(Clone)]
pub struct Segment {
Expand All @@ -20,11 +19,13 @@ pub struct Segment {

impl fmt::Debug for Segment {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f,
"S{{{} -> {}}} ({})",
self.start,
self.end,
self.end - self.start)
write!(
f,
"S{{{} -> {}}} ({})",
self.start,
self.end,
self.end - self.start
)
}
}
impl Segment {
Expand All @@ -33,15 +34,13 @@ impl Segment {
}
}

#[derive(Debug,Clone)]
#[derive(Debug, Clone)]
pub struct ProtoProtoSD {
bottom: usize,
top: usize,
matches: Vec<Segment>,
}



#[derive(Debug)]
struct Arm {
left: Segment,
Expand All @@ -53,111 +52,164 @@ struct Arm {
}

enum Operation {
ExtendArm {i: usize, l_end: usize, r_end: usize},
NewArm {i: usize, m_start: usize, m_end: usize},
ExtendArm {
i: usize,
l_end: usize,
r_end: usize,
},
NewArm {
i: usize,
m_start: usize,
m_end: usize,
},
}

#[allow(clippy::too_many_arguments)]
pub fn search_duplications(
id: usize,
needle: &[u8], needle_offset: usize,
strand: &[u8], sa: &[SAIdx],
needle: &[u8],
needle_offset: usize,
strand: &[u8],
sa: &[SAIdx],
searcher: &Searcher,
progress: &AtomicUsize,
settings: RunSettings
settings: RunSettings,
) -> Vec<ProtoSDsFamily> {
fn try_extend_arms(arms: &[Arm], m: &Segment, e: i64, i: usize, ps: usize) -> Operation {
for (j, a) in arms.iter().enumerate() {
if a.active && d_ss(&a.right, m) < cmp::max(e, (0.1*a.left.len() as f64) as i64) as i64 && m.end > a.right.end {
return Operation::ExtendArm {i: j, l_end: i + ps, r_end: m.end}
if a.active
&& d_ss(&a.right, m) < cmp::max(e, (0.1 * a.left.len() as f64) as i64) as i64
&& m.end > a.right.end
{
return Operation::ExtendArm {
i: j,
l_end: i + ps,
r_end: m.end,
};
}
}

Operation::NewArm {i, m_start: m.start, m_end: m.end}
Operation::NewArm {
i,
m_start: m.start,
m_end: m.end,
}
}

let mut arms : Vec<Arm> = Vec::new();
let mut arms: Vec<Arm> = Vec::new();
let mut i = 0;
let mut r = Vec::new();
let mut current_family_id = 1;
let step_size = settings.probe_size/2;
let step_size = settings.probe_size / 2;

if needle.len() < settings.min_duplication_length {
return Vec::new();
}

while i < needle.len() - settings.probe_size - step_size {
i += step_size;
progress.store(i, Ordering::Relaxed);

if needle[i] == b'N' { continue }
let matches: Vec<Segment> = searcher.search(strand, sa, &needle[i .. i + settings.probe_size])
if needle[i] == b'N' {
continue;
}
let matches: Vec<Segment> = searcher
.search(strand, sa, &needle[i..i + settings.probe_size])
.into_iter()
.filter(|m| m.start != i)
.filter(|m| if !settings.reverse { m.start > i + needle_offset }
else { m.start >= needle_offset + needle.len() - i })
.filter(|m| {
if !settings.reverse {
m.start > i + needle_offset
} else {
m.start >= needle_offset + needle.len() - i
}
})
.collect();
if matches.len() > settings.max_cardinality {continue}
if matches.len() > settings.max_cardinality {
continue;
}

// Reset dirty bits of arms
arms.iter_mut().for_each(|arm| arm.dirty = false);

let todo = matches
.par_iter()
.with_min_len(8)
.map(|m| try_extend_arms(&arms, m, i64::from(settings.max_gap_size), i, settings.probe_size) )
.map(|m| {
try_extend_arms(
&arms,
m,
i64::from(settings.max_gap_size),
i,
settings.probe_size,
)
})
.collect::<Vec<_>>();

todo.iter()
.for_each(|op| {
if let Operation::ExtendArm {i, l_end, r_end} = op {
arms[*i].left.end = *l_end;
arms[*i].right.end = *r_end;
arms[*i].dirty = true;
arms[*i].gap = 0;
}
});

todo.iter()
.for_each(|op| {
if let Operation::NewArm {i, m_start, m_end} = op {
arms.push(Arm{
left: Segment{start: *i, end: *i + settings.probe_size, tag: 0},
right: Segment{start: *m_start, end: *m_end, tag: 0},
family_id: format!("{}-{}", id, current_family_id),
active: true, dirty: false,
gap: 0
})
}
});
todo.iter().for_each(|op| {
if let Operation::ExtendArm { i, l_end, r_end } = op {
arms[*i].left.end = *l_end;
arms[*i].right.end = *r_end;
arms[*i].dirty = true;
arms[*i].gap = 0;
}
});

todo.iter().for_each(|op| {
if let Operation::NewArm { i, m_start, m_end } = op {
arms.push(Arm {
left: Segment {
start: *i,
end: *i + settings.probe_size,
tag: 0,
},
right: Segment {
start: *m_start,
end: *m_end,
tag: 0,
},
family_id: format!("{}-{}", id, current_family_id),
active: true,
dirty: false,
gap: 0,
})
}
});

// Update the gaps of non-dirty arms
arms.iter_mut()
.filter(|a| !a.dirty)
.for_each(|a|{
a.gap += step_size;
if a.gap as u32 >= settings.max_gap_size { a.active = false }
});
arms.iter_mut().filter(|a| !a.dirty).for_each(|a| {
a.gap += step_size;
if a.gap as u32 >= settings.max_gap_size {
a.active = false
}
});

if arms.len() > 200 {
arms.retain(|a| {
a.active || a.left.len() >= settings.min_duplication_length || a.right.len() >= settings.min_duplication_length
a.active
|| a.left.len() >= settings.min_duplication_length
|| a.right.len() >= settings.min_duplication_length
});
}

// Check if there are still extending arms
if !arms.is_empty() && arms.iter().all(|a| !a.active) {
let family: ProtoSDsFamily = arms.iter()
let family: ProtoSDsFamily = arms
.iter()
.filter(|a| a.right.len() >= settings.min_duplication_length)
.map(|a| {
ProtoSD {
left: a.left.start,
right: a.right.start,
left_length: a.left.len(),
right_length: a.right.len(),
identity: 0.,
reversed: false,
complemented: false,
}})
.map(|a| ProtoSD {
left: a.left.start,
right: a.right.start,
left_length: a.left.len(),
right_length: a.right.len(),
identity: 0.,
reversed: false,
complemented: false,
})
.collect();
if !family.is_empty() { r.push(family); }
if !family.is_empty() {
r.push(family);
}
arms.clear();

current_family_id += 1;
Expand All @@ -167,14 +219,13 @@ pub fn search_duplications(
r
}



fn d_ss(a: &Segment, m: &Segment) -> i64 {
if (m.start >= a.start && m.start <= a.end)
|| (m.end >= a.start && m.end <= a.end) {
0
} else {
cmp::min((a.start as i64 - m.end as i64).abs(),
(a.end as i64 - m.start as i64).abs())
}
if (m.start >= a.start && m.start <= a.end) || (m.end >= a.start && m.end <= a.end) {
0
} else {
cmp::min(
(a.start as i64 - m.end as i64).abs(),
(a.end as i64 - m.start as i64).abs(),
)
}
}
1 change: 0 additions & 1 deletion src/bin/asgart-cat.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@ use asgart::log::LevelFilter;
use asgart::exporters::Exporter;
use asgart::exporters;
use asgart::logger::Logger;
use asgart::rayon::prelude::*;
use asgart::errors::*;
use asgart::structs::*;

Expand Down
Loading

0 comments on commit 7cc43ff

Please sign in to comment.