Skip to content

Commit e3cf866

Browse files
Spotted a few bugs but not sorted the problem out with unpacking to the new top_task->ci/cj arrays. Code now crashes when trying to recurse and add entries to the top_task->ci/cj arrays
1 parent 42e4332 commit e3cf866

File tree

2 files changed

+64
-28
lines changed

2 files changed

+64
-28
lines changed

src/runner_doiact_functions_hydro_gpu.h

Lines changed: 15 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -331,14 +331,16 @@ void runner_recurse_gpu(struct runner *r, struct scheduler *s,
331331
else if (CELL_IS_ACTIVE(ci, e) || CELL_IS_ACTIVE(cj, e)) {
332332
/* if any cell empty: skip */
333333
if(ci->hydro.count == 0 || cj->hydro.count == 0) return;
334+
int leafs_found = *n_leafs_found;
334335
/*for all leafs to be sent add to cell list */
335-
cells_left[*n_leafs_found] = ci;
336-
cells_right[*n_leafs_found] = cj;
336+
cells_left[leafs_found] = ci;
337+
cells_right[leafs_found] = cj;
337338
/*Add leaf cells to list for each top_level task*/
338-
pack_vars->leaf_list[pack_vars->top_tasks_packed].ci[*n_leafs_found] = ci;
339-
pack_vars->leaf_list[pack_vars->top_tasks_packed].cj[*n_leafs_found] = cj;
339+
pack_vars->leaf_list[pack_vars->top_tasks_packed].ci[leafs_found] = ci;
340+
pack_vars->leaf_list[pack_vars->top_tasks_packed].cj[leafs_found] = cj;
340341
pack_vars->leaf_list[pack_vars->top_tasks_packed].n_leaves++;
341-
*n_leafs_found = *n_leafs_found + 1;
342+
// error("stop");
343+
*n_leafs_found = leafs_found + 1;
342344
if(*n_leafs_found >= n_expected_tasks)
343345
error("Created %i more than expected leaf cells. depth %i", *n_leafs_found, depth);
344346
}
@@ -1601,10 +1603,10 @@ void runner_dopair1_unpack_f4(
16011603
int topid;
16021604
int pack_length_unpack = 0;
16031605
ticks total_cpu_unpack_ticks = 0;
1604-
for (topid = 0; topid < pack_vars->top_tasks_packed; topid++) {
1606+
for (topid = 0; topid < pack_vars->top_tasks_packed - 1; topid++) {
16051607
//lock top level cell here
1606-
struct cell * cii = pack_vars->top_task_list[topid]->ci;
1607-
struct cell * cjj = pack_vars->top_task_list[topid]->cj;
1608+
// struct cell * cii = pack_vars->top_task_list[topid]->ci;
1609+
// struct cell * cjj = pack_vars->top_task_list[topid]->cj;
16081610
const ticks tic = getticks();
16091611
/* Do the copy */
16101612

@@ -1613,6 +1615,11 @@ void runner_dopair1_unpack_f4(
16131615
//Get pointers to the leaf cells. SEEMS I'm NOT GETTING A CORRECT POINTER
16141616
struct cell * cii_l = pack_vars->leaf_list[topid].ci[tid];
16151617
struct cell * cjj_l = pack_vars->leaf_list[topid].cj[tid];
1618+
message("loc %f %f %f topid %i tid %i nleaves %i", pack_vars->leaf_list[topid].ci[tid]->loc[0]
1619+
, pack_vars->leaf_list[topid].ci[tid]->loc[1]
1620+
, pack_vars->leaf_list[topid].ci[tid]->loc[2]
1621+
, topid, tid, n_leaves_in_task);
1622+
// if(*cii_l == NULL || *cjj_l == NULL)error("stop");
16161623
runner_do_ci_cj_gpu_unpack_neat_aos_f4(
16171624
r, cii_l, cjj_l, parts_recv, 0, &pack_length_unpack, tid,
16181625
2 * pack_vars->count_max_parts, e);

src/runner_main_clean.cu

Lines changed: 49 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -547,17 +547,17 @@ void *runner_main2(void *data) {
547547
struct leaf_cell_list l_list[target_n_tasks];
548548
pack_vars_pair_dens->leaf_list = (struct leaf_cell_list *)calloc(target_n_tasks, sizeof(struct leaf_cell_list));
549549
for (int i = 0; i < target_n_tasks; i++){
550-
l_list[i].ci = (struct cell **)calloc(n_leaves_max, sizeof(struct cell *));
551-
l_list[i].cj = (struct cell **)calloc(n_leaves_max, sizeof(struct cell *));
552-
l_list[i].n_leaves = 0;
553-
pack_vars_pair_dens->leaf_list[i].ci = (struct cell **)calloc(n_leaves_max, sizeof(struct cell *));
554-
pack_vars_pair_dens->leaf_list[i].cj = (struct cell **)calloc(n_leaves_max, sizeof(struct cell *));
550+
// l_list[i].ci = (struct cell **)calloc(n_leaves_max, sizeof(struct cell *));
551+
// l_list[i].cj = (struct cell **)calloc(n_leaves_max, sizeof(struct cell *));
552+
// l_list[i].n_leaves = 0;
553+
pack_vars_pair_dens->leaf_list[i].ci = malloc(n_leaves_max * sizeof(struct cell *));
554+
pack_vars_pair_dens->leaf_list[i].cj = malloc(n_leaves_max * sizeof(struct cell *));
555555
pack_vars_pair_dens->leaf_list[i].n_leaves = 0;
556-
for (int j = 0; j < n_leaves_max; j++){
557-
pack_vars_pair_dens->leaf_list[i].ci[j] = l_list[i].ci[j];
558-
pack_vars_pair_dens->leaf_list[i].cj[j] = l_list[i].cj[j];
559-
560-
}
556+
pack_vars_pair_dens->leaf_list[i].n_packed = 0;
557+
// for (int j = 0; j < n_leaves_max; j++){
558+
// pack_vars_pair_dens->leaf_list[i].ci[j] = l_list[i].ci[j];
559+
// pack_vars_pair_dens->leaf_list[i].cj[j] = l_list[i].cj[j];
560+
// }
561561
}
562562
// pack_vars_pair_dens->leaf_list = l_list;
563563
// pack_vars_pair_dens->leaf_list->ci =
@@ -965,26 +965,46 @@ void *runner_main2(void *data) {
965965
//We need to allocate a list to put cell pointers into for each new task
966966
int n_expected_tasks = 4096; //A. Nasar: Need to come up with a good estimate for this
967967
int n_leaves_found = 0;
968+
int top_tasks_packed = pack_vars_pair_dens->top_tasks_packed;
968969
int depth = 0;
969970
struct cell * cells_left[n_expected_tasks];
970971
struct cell * cells_right[n_expected_tasks];
972+
pack_vars_pair_dens->leaf_list[top_tasks_packed].n_leaves = 0;
971973
runner_recurse_gpu(r, sched, pack_vars_pair_dens, ci, cj, t,
972974
parts_aos_pair_f4_send, e, fparti_fpartj_lparti_lpartj_dens, &n_leaves_found,
973975
cells_left, cells_right, depth, n_expected_tasks);
974976
n_leafs_total += n_leaves_found;
975-
977+
// if(n_leaves_found > 4 && r->cpuid == 0){
978+
// fprintf(stderr, "leaves found %i\n", n_leaves_found);
979+
// for (int i = 0; i< n_leaves_found; i++){
980+
// int tt = pack_vars_pair_dens->top_tasks_packed;
981+
// fprintf(stderr, "ci->loc %f %f %f\n",
982+
// pack_vars_pair_dens->leaf_list[tt].ci[i]->loc[0],
983+
// pack_vars_pair_dens->leaf_list[tt].ci[i]->loc[1],
984+
// pack_vars_pair_dens->leaf_list[tt].ci[i]->loc[2]);
985+
//
986+
// fprintf(stderr, "cj->loc %f %f %f\n\n",
987+
// pack_vars_pair_dens->leaf_list[tt].cj[i]->loc[0],
988+
// pack_vars_pair_dens->leaf_list[tt].cj[i]->loc[1],
989+
// pack_vars_pair_dens->leaf_list[tt].cj[i]->loc[2]);
990+
// }
991+
// error("stop");
992+
// }
976993
int cstart = 0, cid = 0;
977-
pack_vars_pair_dens->top_task_list[pack_vars_pair_dens->top_tasks_packed] = t;
994+
pack_vars_pair_dens->top_task_list[top_tasks_packed] = t;
995+
//This might be abit iffy setting it to zero here. What if we loop through a task twice for recursion but do not offload the second time? We could be unpacking to the wrong leaves
996+
pack_vars_pair_dens->leaf_list[top_tasks_packed].n_packed = 0;
978997
pack_vars_pair_dens->top_tasks_packed++;
979998
pack_vars_pair_dens->task_locked = 1;
980-
//This might be abit iffy setting it to zero here. What if we loop through a task twice for recursion but do not offload the second time? We could be unpacking to the wrong leaves
981-
pack_vars_pair_dens->leaf_list[pack_vars_pair_dens->top_tasks_packed - 1].n_packed = 0;
982999
int t_s, t_e;
9831000
t_s = 0;
9841001
int n_t_tasks = pack_vars_pair_dens->target_n_tasks;
9851002
t->total_cpu_pack_ticks += getticks() - tic_cpu_pack;
9861003
while(cstart < n_leaves_found){
9871004
tic_cpu_pack = getticks();
1005+
1006+
pack_vars_pair_dens->launch_leftovers = 0;
1007+
pack_vars_pair_dens->launch = 0;
9881008
/*Loop through n_daughters such that the pack_vars_pair_dens counters are updated*/
9891009
while(cstart < n_leaves_found && pack_vars_pair_dens->tasks_packed < n_t_tasks){
9901010
packing_time_pair += runner_dopair1_pack_f4(
@@ -1025,21 +1045,30 @@ void *runner_main2(void *data) {
10251045
Otherwise, reset the index since we will be grabbing a new task*/
10261046
if(cstart == n_leaves_found){
10271047
pack_vars_pair_dens->top_tasks_packed = 0;
1048+
pack_vars_pair_dens->tasks_packed = 0;
1049+
pack_vars_pair_dens->leaf_list[0].ci = NULL;
1050+
pack_vars_pair_dens->leaf_list[0].cj = NULL;
1051+
pack_vars_pair_dens->leaf_list[0].n_leaves = 0;
1052+
pack_vars_pair_dens->leaf_list[0].n_packed = 0;
10281053
}
10291054
else{
1030-
pack_vars_pair_dens->top_tasks_packed = 1;
1031-
pack_vars_pair_dens->top_task_list[0] = t;
1055+
pack_vars_pair_dens->leaf_list[0].ci[0] =
1056+
pack_vars_pair_dens->leaf_list[pack_vars_pair_dens->top_tasks_packed - 1].ci[pack_vars_pair_dens->tasks_packed - 1];
1057+
pack_vars_pair_dens->leaf_list[0].cj[0] =
1058+
pack_vars_pair_dens->leaf_list[pack_vars_pair_dens->top_tasks_packed - 1].cj[pack_vars_pair_dens->tasks_packed - 1];
1059+
1060+
pack_vars_pair_dens->tasks_packed = 0;
1061+
pack_vars_pair_dens->top_tasks_packed = 1;
1062+
pack_vars_pair_dens->top_task_list[0] = t;
10321063
// pack_vars_pair_dens->leaf_list[0].n_packed = 0;
1033-
pack_vars_pair_dens->leaf_list[0].ci[0] =
1034-
pack_vars_pair_dens->leaf_list[pack_vars_pair_dens->top_tasks_packed - 1].ci[pack_vars_pair_dens->tasks_packed - 1];
1035-
pack_vars_pair_dens->leaf_list[0].cj[0] =
1036-
pack_vars_pair_dens->leaf_list[pack_vars_pair_dens->top_tasks_packed - 1].cj[pack_vars_pair_dens->tasks_packed - 1];
10371064
}
10381065
/*This makes it such that the remaining leaf tasks are packed starting from a
10391066
fresh list since we are still in the while cstart < n_leaves_found loop*/
10401067

10411068
pack_vars_pair_dens->leaf_list[0].n_packed = 0;
10421069
pack_vars_pair_dens->tasks_packed = 0;
1070+
pack_vars_pair_dens->launch_leftovers = 0;
1071+
pack_vars_pair_dens->launch = 0;
10431072
}
10441073
///////////////////////////////////////////////////////////////////////
10451074
}

0 commit comments

Comments
 (0)