-
Notifications
You must be signed in to change notification settings - Fork 74
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
time windows in statistics #2948
base: main
Are you sure you want to change the base?
Changes from 2 commits
aeda4dc
0a30696
1a988c0
18ffda0
c6f9562
7a3149f
71da7ad
2a44909
4460db9
bbec6a9
da5f205
6b3ab4f
f2d857b
b8f4ba5
c9f6c06
59ea266
96ac0ce
26a7f09
8a8c05b
0d48891
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -1233,8 +1233,7 @@ | |||||
} | ||||||
|
||||||
static int | ||||||
tsk_treeseq_check_time_windows(tsk_size_t num_windows, | ||||||
const double *windows) | ||||||
tsk_treeseq_check_time_windows(tsk_size_t num_windows, const double *windows) | ||||||
{ | ||||||
int ret = TSK_ERR_BAD_WINDOWS; | ||||||
tsk_size_t j; | ||||||
|
@@ -1245,10 +1244,11 @@ | |||||
} | ||||||
|
||||||
if (windows[0] < 0) { | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. if currently the code assumes this is 0, should check for == here |
||||||
goto out; | ||||||
goto out; | ||||||
} | ||||||
if (windows[num_windows] > INFINITY) { | ||||||
goto out; | ||||||
|
||||||
if (windows[0] != 0) { | ||||||
goto out; | ||||||
} | ||||||
|
||||||
for (j = 0; j < num_windows; j++) { | ||||||
|
@@ -1256,7 +1256,7 @@ | |||||
goto out; | ||||||
} | ||||||
} | ||||||
ret = 0; | ||||||
out: | ||||||
return ret; | ||||||
} | ||||||
|
@@ -3513,35 +3513,21 @@ | |||||
return ret; | ||||||
} | ||||||
|
||||||
#define MAX(a,b) ((a) > (b) ? (a) : (b)) | ||||||
#define MIN(a,b) ((a) < (b) ? (a) : (b)) | ||||||
|
||||||
/* int getValue_nDimensions( int * baseAddress, int * indexes, int nDimensions ) { */ | ||||||
/* int i; */ | ||||||
/* int offset = 0; */ | ||||||
/* for( i = 0; i < nDimensions; i++ ) { */ | ||||||
/* offset += pow(LEN,i) * indexes[nDimensions - (i + 1)]; */ | ||||||
/* } */ | ||||||
|
||||||
/* return *(baseAddress + offset); */ | ||||||
/* } */ | ||||||
|
||||||
static int TSK_WARN_UNUSED | ||||||
tsk_treeseq_update_branch_afs(const tsk_treeseq_t *self, tsk_id_t u, double right, | ||||||
double *restrict last_update, | ||||||
const double *restrict time, tsk_id_t *restrict parent, const double *time_windows, | ||||||
const double *counts, tsk_size_t num_sample_sets, | ||||||
tsk_size_t num_time_windows, tsk_size_t window_index, tsk_size_t time_window_index, | ||||||
const tsk_size_t *result_dims, tsk_flags_t options, double *result) | ||||||
double *restrict last_update, const double *restrict time, tsk_id_t *restrict parent, | ||||||
const double *time_windows, const double *counts, tsk_size_t num_sample_sets, | ||||||
tsk_size_t num_time_windows, tsk_size_t window_index, const tsk_size_t *result_dims, | ||||||
tsk_flags_t options, double *result) | ||||||
{ | ||||||
int ret = 0; | ||||||
tsk_size_t afs_size; | ||||||
tsk_size_t k; | ||||||
tsk_size_t time_window_index; | ||||||
double *afs; | ||||||
tsk_size_t *coordinate = tsk_malloc(num_sample_sets * sizeof(*coordinate)); | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Gee, wouldn't it be better to malloc this outside this function, and pass it in? (I honestly don't know...) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. HA! IDK, I'll let it there for now, but yeah maybe! |
||||||
bool polarised = !!(options & TSK_STAT_POLARISED); | ||||||
const double *count_row = GET_2D_ROW(counts, num_sample_sets + 1, u); | ||||||
/* double x = (right - last_update[u]) * branch_length[u]; */ | ||||||
double x = 0; | ||||||
double t_v = 0; | ||||||
double tw_branch_length = 0; | ||||||
|
@@ -3550,23 +3536,31 @@ | |||||
ret = TSK_ERR_NO_MEMORY; | ||||||
goto out; | ||||||
} | ||||||
if (parent[u] != -1){ | ||||||
t_v = time[parent[u]]; | ||||||
if (0 < all_samples && all_samples < self->num_samples) { | ||||||
for (time_window_index = 0; time_window_index < num_time_windows; time_window_index++){ | ||||||
afs_size = result_dims[num_sample_sets]; | ||||||
afs = result + afs_size * (window_index * num_time_windows + time_window_index); | ||||||
for (k = 0; k < num_sample_sets; k++) { | ||||||
coordinate[k] = (tsk_size_t) count_row[k]; | ||||||
} | ||||||
if (!polarised){ | ||||||
fold(coordinate, result_dims, num_sample_sets); | ||||||
} | ||||||
tw_branch_length = MIN(time_windows[time_window_index + 1], t_v) - MAX(time_windows[0], time[u]); | ||||||
x = (right - last_update[u]) * tw_branch_length; | ||||||
increment_nd_array_value(afs, num_sample_sets, result_dims, coordinate, x); | ||||||
} | ||||||
} | ||||||
if (parent[u] != TSK_NULL) { | ||||||
t_v = time[parent[u]]; | ||||||
if (0 < all_samples && all_samples < self->num_samples) { | ||||||
time_window_index = 0; | ||||||
while (time_window_index < num_time_windows | ||||||
&& time_windows[time_window_index] < t_v) { | ||||||
/* for (time_window_index = 0; time_window_index < num_time_windows; | ||||||
* time_window_index++){ */ | ||||||
afs_size = result_dims[num_sample_sets]; | ||||||
afs = result | ||||||
+ afs_size * (window_index * num_time_windows + time_window_index); | ||||||
for (k = 0; k < num_sample_sets; k++) { | ||||||
coordinate[k] = (tsk_size_t) count_row[k]; | ||||||
} | ||||||
if (!polarised) { | ||||||
fold(coordinate, result_dims, num_sample_sets); | ||||||
} | ||||||
tw_branch_length = TSK_MIN(time_windows[time_window_index + 1], t_v) | ||||||
- TSK_MAX(time_windows[0], time[u]); | ||||||
x = (right - last_update[u]) * tw_branch_length; | ||||||
increment_nd_array_value( | ||||||
afs, num_sample_sets, result_dims, coordinate, x); | ||||||
time_window_index++; | ||||||
} | ||||||
} | ||||||
} | ||||||
last_update[u] = right; | ||||||
out: | ||||||
|
@@ -3582,7 +3576,7 @@ | |||||
{ | ||||||
int ret = 0; | ||||||
tsk_id_t u, v; | ||||||
tsk_size_t window_index, time_window_index; | ||||||
tsk_size_t window_index; | ||||||
tsk_size_t num_nodes = self->tables->nodes.num_rows; | ||||||
const tsk_id_t num_edges = (tsk_id_t) self->tables->edges.num_rows; | ||||||
const tsk_id_t *restrict I = self->tables->indexes.edge_insertion_order; | ||||||
|
@@ -3616,26 +3610,23 @@ | |||||
tk = 0; | ||||||
t_left = 0; | ||||||
window_index = 0; | ||||||
time_window_index = 0; | ||||||
while (tj < num_edges || t_left < sequence_length) { | ||||||
tsk_bug_assert(window_index < num_windows); | ||||||
while (tk < num_edges && edge_right[O[tk]] == t_left) { | ||||||
h = O[tk]; | ||||||
tk++; | ||||||
u = edge_child[h]; | ||||||
v = edge_parent[h]; | ||||||
ret = tsk_treeseq_update_branch_afs(self, u, t_left, | ||||||
last_update, node_time, parent, time_windows, counts, num_sample_sets, | ||||||
num_time_windows, window_index, time_window_index, | ||||||
result_dims, options, result); | ||||||
ret = tsk_treeseq_update_branch_afs(self, u, t_left, last_update, node_time, | ||||||
parent, time_windows, counts, num_sample_sets, num_time_windows, | ||||||
window_index, result_dims, options, result); | ||||||
if (ret != 0) { | ||||||
goto out; | ||||||
} | ||||||
while (v != TSK_NULL) { | ||||||
ret = tsk_treeseq_update_branch_afs(self, v, t_left, | ||||||
last_update, node_time, parent, time_windows, counts, | ||||||
num_sample_sets, num_time_windows, window_index, | ||||||
time_window_index, result_dims, options, result); | ||||||
ret = tsk_treeseq_update_branch_afs(self, v, t_left, last_update, | ||||||
node_time, parent, time_windows, counts, num_sample_sets, | ||||||
num_time_windows, window_index, result_dims, options, result); | ||||||
if (ret != 0) { | ||||||
goto out; | ||||||
} | ||||||
|
@@ -3654,10 +3645,9 @@ | |||||
parent[u] = v; | ||||||
branch_length[u] = node_time[v] - node_time[u]; | ||||||
while (v != TSK_NULL) { | ||||||
ret = tsk_treeseq_update_branch_afs(self, v, t_left, | ||||||
last_update, node_time, parent, time_windows, counts, | ||||||
num_sample_sets, num_time_windows, window_index, | ||||||
time_window_index, result_dims, options, result); | ||||||
ret = tsk_treeseq_update_branch_afs(self, v, t_left, last_update, | ||||||
node_time, parent, time_windows, counts, num_sample_sets, | ||||||
num_time_windows, window_index, result_dims, options, result); | ||||||
if (ret != 0) { | ||||||
goto out; | ||||||
} | ||||||
|
@@ -3679,10 +3669,9 @@ | |||||
/* Flush the contributions of all nodes to the current window */ | ||||||
for (u = 0; u < (tsk_id_t) num_nodes; u++) { | ||||||
tsk_bug_assert(last_update[u] < w_right); | ||||||
ret = tsk_treeseq_update_branch_afs(self, u, w_right, | ||||||
last_update, node_time, parent, time_windows, counts, | ||||||
num_sample_sets, num_time_windows, window_index, | ||||||
time_window_index, result_dims, options, result); | ||||||
ret = tsk_treeseq_update_branch_afs(self, u, w_right, last_update, | ||||||
node_time, parent, time_windows, counts, num_sample_sets, | ||||||
num_time_windows, window_index, result_dims, options, result); | ||||||
if (ret != 0) { | ||||||
goto out; | ||||||
} | ||||||
|
@@ -3755,8 +3744,12 @@ | |||||
num_time_windows = 1; | ||||||
time_windows = default_time_windows; | ||||||
} else { | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. After this line is probably the right place to check if it's |
||||||
ret = tsk_treeseq_check_time_windows( | ||||||
num_time_windows, time_windows); | ||||||
if (stat_site | ||||||
&& tsk_memcmp(time_windows, default_time_windows, sizeof(double)) != 0) { | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hm, this is a bit awkward - what if instead we used There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. But time_windows are always initialized by default as [0, inf], so num_time_windows=2, comparing to the default was the clearest I found for now. But maybe the problem lies in the initialization caused by the parsing of the windows in the first place. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Oh, wait - we're already in the
Suggested change
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. So, if someone explicitly specifies |
||||||
ret = TSK_ERR_UNSUPPORTED_STAT_MODE; | ||||||
goto out; | ||||||
} | ||||||
ret = tsk_treeseq_check_time_windows(num_time_windows, time_windows); | ||||||
if (ret != 0) { | ||||||
goto out; | ||||||
} | ||||||
|
@@ -3796,7 +3789,6 @@ | |||||
count_row[num_sample_sets] = 1; | ||||||
} | ||||||
result_dims[num_sample_sets] = (tsk_size_t) afs_size; | ||||||
// Initiate memory for result array | ||||||
tsk_memset(result, 0, num_windows * num_time_windows * afs_size * sizeof(*result)); | ||||||
|
||||||
if (stat_site) { | ||||||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2263,7 +2263,7 @@ def test_output_dims(self): | |
n = len(samples) | ||
time_windows = [0, np.inf] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. we should be testing with more than one time window as well here, probably - adding another |
||
|
||
for mode in ["site", "branch"]: | ||
for mode in ["branch"]: | ||
for s in [[n], [n - 2, 2], [n - 4, 2, 2], [1] * n]: | ||
s = np.array(s, dtype=np.uint32) | ||
windows = [0, L] | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
uh-oh, are these tabs?