@@ -547,17 +547,17 @@ void *runner_main2(void *data) {
547547 struct leaf_cell_list l_list[target_n_tasks];
548548 pack_vars_pair_dens->leaf_list = (struct leaf_cell_list *)calloc (target_n_tasks, sizeof (struct leaf_cell_list ));
549549 for (int i = 0 ; i < target_n_tasks; i++){
550- l_list[i].ci = (struct cell **)calloc (n_leaves_max, sizeof (struct cell *));
551- l_list[i].cj = (struct cell **)calloc (n_leaves_max, sizeof (struct cell *));
552- l_list[i].n_leaves = 0 ;
553- pack_vars_pair_dens->leaf_list [i].ci = ( struct cell **) calloc (n_leaves_max, sizeof (struct cell *));
554- pack_vars_pair_dens->leaf_list [i].cj = ( struct cell **) calloc (n_leaves_max, sizeof (struct cell *));
550+ // l_list[i].ci = (struct cell **)calloc(n_leaves_max, sizeof(struct cell *));
551+ // l_list[i].cj = (struct cell **)calloc(n_leaves_max, sizeof(struct cell *));
552+ // l_list[i].n_leaves = 0;
553+ pack_vars_pair_dens->leaf_list [i].ci = malloc (n_leaves_max * sizeof (struct cell *));
554+ pack_vars_pair_dens->leaf_list [i].cj = malloc (n_leaves_max * sizeof (struct cell *));
555555 pack_vars_pair_dens->leaf_list [i].n_leaves = 0 ;
556- for ( int j = 0 ; j < n_leaves_max; j++){
557- pack_vars_pair_dens-> leaf_list [i]. ci [j] = l_list[i]. ci [j];
558- pack_vars_pair_dens->leaf_list [i].cj [j] = l_list[i].cj [j];
559-
560- }
556+ pack_vars_pair_dens-> leaf_list [i]. n_packed = 0 ;
557+ // for (int j = 0; j < n_leaves_max; j++){
558+ // pack_vars_pair_dens->leaf_list[i].ci [j] = l_list[i].ci [j];
559+ // pack_vars_pair_dens->leaf_list[i].cj[j] = l_list[i].cj[j];
560+ // }
561561 }
562562// pack_vars_pair_dens->leaf_list = l_list;
563563// pack_vars_pair_dens->leaf_list->ci =
@@ -965,26 +965,46 @@ void *runner_main2(void *data) {
965965 // We need to allocate a list to put cell pointers into for each new task
966966 int n_expected_tasks = 4096 ; // A. Nasar: Need to come up with a good estimate for this
967967 int n_leaves_found = 0 ;
968+ int top_tasks_packed = pack_vars_pair_dens->top_tasks_packed ;
968969 int depth = 0 ;
969970 struct cell * cells_left[n_expected_tasks];
970971 struct cell * cells_right[n_expected_tasks];
972+ pack_vars_pair_dens->leaf_list [top_tasks_packed].n_leaves = 0 ;
971973 runner_recurse_gpu (r, sched, pack_vars_pair_dens, ci, cj, t,
972974 parts_aos_pair_f4_send, e, fparti_fpartj_lparti_lpartj_dens, &n_leaves_found,
973975 cells_left, cells_right, depth, n_expected_tasks);
974976 n_leafs_total += n_leaves_found;
975-
977+ // if(n_leaves_found > 4 && r->cpuid == 0){
978+ // fprintf(stderr, "leaves found %i\n", n_leaves_found);
979+ // for (int i = 0; i< n_leaves_found; i++){
980+ // int tt = pack_vars_pair_dens->top_tasks_packed;
981+ // fprintf(stderr, "ci->loc %f %f %f\n",
982+ // pack_vars_pair_dens->leaf_list[tt].ci[i]->loc[0],
983+ // pack_vars_pair_dens->leaf_list[tt].ci[i]->loc[1],
984+ // pack_vars_pair_dens->leaf_list[tt].ci[i]->loc[2]);
985+ //
986+ // fprintf(stderr, "cj->loc %f %f %f\n\n",
987+ // pack_vars_pair_dens->leaf_list[tt].cj[i]->loc[0],
988+ // pack_vars_pair_dens->leaf_list[tt].cj[i]->loc[1],
989+ // pack_vars_pair_dens->leaf_list[tt].cj[i]->loc[2]);
990+ // }
991+ // error("stop");
992+ // }
976993 int cstart = 0 , cid = 0 ;
977- pack_vars_pair_dens->top_task_list [pack_vars_pair_dens->top_tasks_packed ] = t;
994+ pack_vars_pair_dens->top_task_list [top_tasks_packed] = t;
995+ // This might be abit iffy setting it to zero here. What if we loop through a task twice for recursion but do not offload the second time? We could be unpacking to the wrong leaves
996+ pack_vars_pair_dens->leaf_list [top_tasks_packed].n_packed = 0 ;
978997 pack_vars_pair_dens->top_tasks_packed ++;
979998 pack_vars_pair_dens->task_locked = 1 ;
980- // This might be abit iffy setting it to zero here. What if we loop through a task twice for recursion but do not offload the second time? We could be unpacking to the wrong leaves
981- pack_vars_pair_dens->leaf_list [pack_vars_pair_dens->top_tasks_packed - 1 ].n_packed = 0 ;
982999 int t_s, t_e;
9831000 t_s = 0 ;
9841001 int n_t_tasks = pack_vars_pair_dens->target_n_tasks ;
9851002 t->total_cpu_pack_ticks += getticks () - tic_cpu_pack;
9861003 while (cstart < n_leaves_found){
9871004 tic_cpu_pack = getticks ();
1005+
1006+ pack_vars_pair_dens->launch_leftovers = 0 ;
1007+ pack_vars_pair_dens->launch = 0 ;
9881008 /* Loop through n_daughters such that the pack_vars_pair_dens counters are updated*/
9891009 while (cstart < n_leaves_found && pack_vars_pair_dens->tasks_packed < n_t_tasks){
9901010 packing_time_pair += runner_dopair1_pack_f4 (
@@ -1025,21 +1045,30 @@ void *runner_main2(void *data) {
10251045 Otherwise, reset the index since we will be grabbing a new task*/
10261046 if (cstart == n_leaves_found){
10271047 pack_vars_pair_dens->top_tasks_packed = 0 ;
1048+ pack_vars_pair_dens->tasks_packed = 0 ;
1049+ pack_vars_pair_dens->leaf_list [0 ].ci = NULL ;
1050+ pack_vars_pair_dens->leaf_list [0 ].cj = NULL ;
1051+ pack_vars_pair_dens->leaf_list [0 ].n_leaves = 0 ;
1052+ pack_vars_pair_dens->leaf_list [0 ].n_packed = 0 ;
10281053 }
10291054 else {
1030- pack_vars_pair_dens->top_tasks_packed = 1 ;
1031- pack_vars_pair_dens->top_task_list [0 ] = t;
1055+ pack_vars_pair_dens->leaf_list [0 ].ci [0 ] =
1056+ pack_vars_pair_dens->leaf_list [pack_vars_pair_dens->top_tasks_packed - 1 ].ci [pack_vars_pair_dens->tasks_packed - 1 ];
1057+ pack_vars_pair_dens->leaf_list [0 ].cj [0 ] =
1058+ pack_vars_pair_dens->leaf_list [pack_vars_pair_dens->top_tasks_packed - 1 ].cj [pack_vars_pair_dens->tasks_packed - 1 ];
1059+
1060+ pack_vars_pair_dens->tasks_packed = 0 ;
1061+ pack_vars_pair_dens->top_tasks_packed = 1 ;
1062+ pack_vars_pair_dens->top_task_list [0 ] = t;
10321063// pack_vars_pair_dens->leaf_list[0].n_packed = 0;
1033- pack_vars_pair_dens->leaf_list [0 ].ci [0 ] =
1034- pack_vars_pair_dens->leaf_list [pack_vars_pair_dens->top_tasks_packed - 1 ].ci [pack_vars_pair_dens->tasks_packed - 1 ];
1035- pack_vars_pair_dens->leaf_list [0 ].cj [0 ] =
1036- pack_vars_pair_dens->leaf_list [pack_vars_pair_dens->top_tasks_packed - 1 ].cj [pack_vars_pair_dens->tasks_packed - 1 ];
10371064 }
10381065 /* This makes it such that the remaining leaf tasks are packed starting from a
10391066 fresh list since we are still in the while cstart < n_leaves_found loop*/
10401067
10411068 pack_vars_pair_dens->leaf_list [0 ].n_packed = 0 ;
10421069 pack_vars_pair_dens->tasks_packed = 0 ;
1070+ pack_vars_pair_dens->launch_leftovers = 0 ;
1071+ pack_vars_pair_dens->launch = 0 ;
10431072 }
10441073 // /////////////////////////////////////////////////////////////////////
10451074 }
0 commit comments