| #include"SimpleMOC_header.h" |
| |
| /* Efficient version of attenuate fluxes which determines the change in angular |
| * flux along a particular track across a fine axial region and tallies the |
| * contribution to the scalar flux in the fine axial region. This function |
| * assumes a quadratic source, which is calculated on the fly using neighboring |
| * source values. |
| * |
| * This version decomposes the work into many for loops for efficient SIMD |
| * instructions and to reduce register pressure. For a more descriptive |
| * (but less effiient) version of the code in terms of the underlying physics, |
| * see alt_attenuate_fluxes which solves the problem in a more naive, |
| * straightforward manner. */ |
| void attenuate_fluxes( Track * track, bool forward, Source * QSR, Input * I_in, |
| Params * params_in, float ds, float mu, float az_weight, |
| AttenuateVars * A ) |
| { |
| Input I = *I_in; |
| Params params = *params_in; |
| |
| // unload attenuate vars |
| float * restrict q0 = A->q0; |
| float * restrict q1 = A->q1; |
| float * restrict q2 = A->q2; |
| float * restrict sigT = A->sigT; |
| float * restrict tau = A->tau; |
| float * restrict sigT2 = A->sigT2; |
| float * restrict expVal = A->expVal; |
| float * restrict reuse = A->reuse; |
| float * restrict flux_integral = A->flux_integral; |
| float * restrict tally = A->tally; |
| float * restrict t1 = A->t1; |
| float * restrict t2 = A->t2; |
| float * restrict t3 = A->t3; |
| float * restrict t4 = A->t4; |
| |
| // compute fine axial interval spacing |
| float dz = I.height / (I.fai * I.decomp_assemblies_ax * I.cai); |
| |
| // compute z height in cell |
| float zin = track->z_height - dz * |
| ( (int)( track->z_height / dz ) + 0.5f ); |
| |
| // compute fine axial region ID |
| int fine_id = (int) ( track->z_height / dz ) % I.fai; |
| |
| // compute weight (azimuthal * polar) |
| // NOTE: real app would also have volume weight component |
| float weight = track->p_weight * az_weight; |
| float mu2 = mu * mu; |
| |
| // load fine source region flux vector |
| float * FSR_flux = QSR -> fine_flux[fine_id]; |
| |
| if( fine_id == 0 ) |
| { |
| // adjust z height to account for edge |
| zin -= dz; |
| |
| // cycle over energy groups |
| #ifdef INTEL |
| #pragma simd |
| #elif defined IBM |
| #pragma simd_level(10) |
| #endif |
| for( int g = 0; g < I.n_egroups; g++) |
| { |
| // load neighboring sources |
| float y1 = QSR->fine_source[fine_id][g]; |
| float y2 = QSR->fine_source[fine_id+1][g]; |
| float y3 = QSR->fine_source[fine_id+2][g]; |
| |
| // do quadratic "fitting" |
| float c0 = y2; |
| float c1 = (y1 - y3) / (2.f*dz); |
| float c2 = (y1 - 2.f*y2 + y3) / (2.f*dz*dz); |
| |
| // calculate q0, q1, q2 |
| q0[g] = c0 + c1*zin + c2*zin*zin; |
| q1[g] = c1 + 2.f*c2*zin; |
| q2[g] = c2; |
| } |
| } |
| else if ( fine_id == I.fai - 1 ) |
| { |
| // adjust z height to account for edge |
| zin += dz; |
| |
| // cycle over energy groups |
| #ifdef INTEL |
| #pragma simd |
| #elif defined IBM |
| #pragma simd_level(10) |
| #endif |
| for( int g = 0; g < I.n_egroups; g++) |
| { |
| // load neighboring sources |
| float y1 = QSR->fine_source[fine_id-2][g]; |
| float y2 = QSR->fine_source[fine_id-1][g]; |
| float y3 = QSR->fine_source[fine_id][g]; |
| |
| // do quadratic "fitting" |
| float c0 = y2; |
| float c1 = (y1 - y3) / (2.f*dz); |
| float c2 = (y1 - 2.f*y2 + y3) / (2.f*dz*dz); |
| |
| // calculate q0, q1, q2 |
| q0[g] = c0 + c1*zin + c2*zin*zin; |
| q1[g] = c1 + 2.f*c2*zin; |
| q2[g] = c2; |
| } |
| } |
| else |
| { |
| // cycle over energy groups |
| #ifdef INTEL |
| #pragma simd |
| #elif defined IBM |
| #pragma simd_level(10) |
| #endif |
| for( int g = 0; g < I.n_egroups; g++) |
| { |
| // load neighboring sources |
| float y1 = QSR->fine_source[fine_id-1][g]; |
| float y2 = QSR->fine_source[fine_id][g]; |
| float y3 = QSR->fine_source[fine_id+1][g]; |
| |
| // do quadratic "fitting" |
| float c0 = y2; |
| float c1 = (y1 - y3) / (2.f*dz); |
| float c2 = (y1 - 2.f*y2 + y3) / (2.f*dz*dz); |
| |
| // calculate q0, q1, q2 |
| q0[g] = c0 + c1*zin + c2*zin*zin; |
| q1[g] = c1 + 2.f*c2*zin; |
| q2[g] = c2; |
| } |
| } |
| |
| // cycle over energy groups |
| #ifdef INTEL |
| #pragma simd |
| #elif defined IBM |
| #pragma simd_level(10) |
| #endif |
| for( int g = 0; g < I.n_egroups; g++) |
| { |
| // load total cross section |
| sigT[g] = QSR->sigT[g]; |
| |
| // calculate common values for efficiency |
| tau[g] = sigT[g] * ds; |
| sigT2[g] = sigT[g] * sigT[g]; |
| } |
| |
| // cycle over energy groups |
| #ifdef INTEL |
| #pragma simd |
| #elif defined IBM |
| #pragma simd_level(10) |
| #endif |
| for( int g = 0; g < I.n_egroups; g++) |
| expVal[g] = interpolateTable( params.expTable, tau[g] ); |
| |
| // Flux Integral |
| |
| // Re-used Term |
| #ifdef INTEL |
| #pragma simd |
| #elif defined IBM |
| #pragma simd_level(10) |
| #endif |
| for( int g = 0; g < I.n_egroups; g++) |
| { |
| reuse[g] = tau[g] * (tau[g] - 2.f) + 2.f * expVal[g] |
| / (sigT[g] * sigT2[g]); |
| } |
| |
| |
| float * psi; |
| if(forward) |
| psi = track->f_psi; |
| else |
| psi = track->b_psi; |
| |
| //#pragma vector nontemporal |
| #ifdef INTEL |
| #pragma simd |
| #elif defined IBM |
| #pragma simd_level(10) |
| #endif |
| for( int g = 0; g < I.n_egroups; g++) |
| { |
| // add contribution to new source flux |
| flux_integral[g] = (q0[g] * tau[g] + (sigT[g] * psi[g] - q0[g]) |
| * expVal[g]) / sigT2[g] + q1[g] * mu * reuse[g] + q2[g] * mu2 |
| * (tau[g] * (tau[g] * (tau[g] - 3.f) + 6.f) - 6.f * expVal[g]) |
| / (3.f * sigT2[g] * sigT2[g]); |
| } |
| |
| #ifdef INTEL |
| #pragma simd |
| #elif defined IBM |
| #pragma simd_level(10) |
| #endif |
| for( int g = 0; g < I.n_egroups; g++) |
| { |
| // Prepare tally |
| tally[g] = weight * flux_integral[g]; |
| } |
| |
| #ifdef OPENMP |
| omp_set_lock(QSR->locks + fine_id); |
| #endif |
| |
| #ifdef INTEL |
| #pragma simd |
| #elif defined IBM |
| #pragma simd_level(10) |
| #endif |
| for( int g = 0; g < I.n_egroups; g++) |
| { |
| FSR_flux[g] += tally[g]; |
| } |
| |
| #ifdef OPENMP |
| omp_unset_lock(QSR->locks + fine_id); |
| #endif |
| |
| // Term 1 |
| #ifdef INTEL |
| #pragma simd |
| #elif defined IBM |
| #pragma simd_level(10) |
| #endif |
| for( int g = 0; g < I.n_egroups; g++) |
| { |
| t1[g] = q0[g] * expVal[g] / sigT[g]; |
| } |
| // Term 2 |
| #ifdef INTEL |
| #pragma simd |
| #elif defined IBM |
| #pragma simd_level(10) |
| #endif |
| for( int g = 0; g < I.n_egroups; g++) |
| { |
| t2[g] = q1[g] * mu * (tau[g] - expVal[g]) / sigT2[g]; |
| } |
| // Term 3 |
| #ifdef INTEL |
| #pragma simd |
| #elif defined IBM |
| #pragma simd_level(10) |
| #endif |
| for( int g = 0; g < I.n_egroups; g++) |
| { |
| t3[g] = q2[g] * mu2 * reuse[g]; |
| } |
| // Term 4 |
| #ifdef INTEL |
| #pragma simd |
| #elif defined IBM |
| #pragma simd_level(10) |
| #endif |
| for( int g = 0; g < I.n_egroups; g++) |
| { |
| t4[g] = psi[g] * (1.f - expVal[g]); |
| } |
| // Total psi |
| #ifdef INTEL |
| #pragma simd |
| #elif defined IBM |
| #pragma simd_level(10) |
| #endif |
| for( int g = 0; g < I.n_egroups; g++) |
| { |
| psi[g] = t1[g] + t2[g] + t3[g] + t4[g]; |
| } |
| } |
| |
| // single direction transport sweep |
| void transport_sweep( Params * params, Input * I ) |
| { |
| if(I->mype==0) printf("Starting transport sweep ...\n"); |
| |
| // calculate the height of a node's domain and of each FSR |
| double node_delta_z = I->height / I->decomp_assemblies_ax; |
| double fine_delta_z = node_delta_z / (I->cai * I->fai); |
| |
| /* loop over tracks (implicitly azimuthal angles, tracks in azimuthal |
| * angles, polar angles, and z stacked rays) */ |
| |
| //print_Input_struct( I ); |
| long segments_processed = 0; |
| |
| #pragma omp parallel default(none) \ |
| shared( I, params, node_delta_z, fine_delta_z ) \ |
| reduction(+ : segments_processed ) |
| { |
| #ifdef OPENMP |
| int thread = omp_get_thread_num(); |
| int nthreads = omp_get_num_threads(); |
| unsigned int seed = time(NULL) * (thread+1); |
| #endif |
| //print_Input_struct( I ); |
| |
| #ifdef PAPI |
| int eventset = PAPI_NULL; |
| int num_papi_events; |
| #pragma omp critical |
| { |
| counter_init(&eventset, &num_papi_events, I); |
| } |
| #endif |
| |
| AttenuateVars A; |
| float * ptr = (float * ) malloc( I->n_egroups * 14 * sizeof(float)); |
| A.q0 = ptr; |
| ptr += I->n_egroups; |
| A.q1 = ptr; |
| ptr += I->n_egroups; |
| A.q2 = ptr; |
| ptr += I->n_egroups; |
| A.sigT = ptr; |
| ptr += I->n_egroups; |
| A.tau = ptr; |
| ptr += I->n_egroups; |
| A.sigT2 = ptr; |
| ptr += I->n_egroups; |
| A.expVal = ptr; |
| ptr += I->n_egroups; |
| A.reuse = ptr; |
| ptr += I->n_egroups; |
| A.flux_integral = ptr; |
| ptr += I->n_egroups; |
| A.tally = ptr; |
| ptr += I->n_egroups; |
| A.t1 = ptr; |
| ptr += I->n_egroups; |
| A.t2 = ptr; |
| ptr += I->n_egroups; |
| A.t3 = ptr; |
| ptr += I->n_egroups; |
| A.t4 = ptr; |
| |
| #pragma omp for schedule( dynamic ) |
| for (long i = 0; i < I->ntracks_2D; i++) |
| { |
| #if TIMING_INFO | 0 |
| // print progress |
| #ifdef OPENMP |
| if(I->mype==0 && thread == 0) |
| { |
| printf("\rAttenuating Tracks... (%.0lf%% completed)", |
| (i / ( (double)I->ntracks_2D / (double) nthreads )) |
| / (double) nthreads * 100.0); |
| } |
| #else |
| if( i % 50 == 0) |
| if(I->mype==0) |
| printf("%s%ld%s%ld\n","2D Tracks Completed = ", i," / ", |
| I->ntracks_2D ); |
| #endif |
| #endif |
| |
| |
| // treat positive-z traveling rays first |
| bool pos_z_dir = true; |
| for( int j = 0; j < I->n_polar_angles; j++) |
| { |
| if( j == I->n_polar_angles / 2 ) |
| pos_z_dir = false; |
| float p_angle = params->polar_angles[j]; |
| float mu = cos(p_angle); |
| |
| // start with all z stacked rays |
| int begin_stacked = 0; |
| int end_stacked = I->z_stacked; |
| |
| for( int n = 0; n < params->tracks_2D[i].n_segments; n++) |
| { |
| // calculate distance traveled in cell if segment completed |
| float s_full = params->tracks_2D[i].segments[n].length |
| / sin(p_angle); |
| |
| // allocate varaible for distance traveled in an FSR |
| float ds = 0; |
| |
| // loop over remaining z-stacked rays |
| for( int k = begin_stacked; k < end_stacked; k++) |
| { |
| // initialize s to full length |
| float s = s_full; |
| |
| // select current track |
| Track * track = ¶ms->tracks[i][j][k]; |
| |
| // set flag for completeion of segment |
| bool seg_complete = false; |
| |
| // calculate interval |
| int curr_interval; |
| if( pos_z_dir) |
| curr_interval = get_pos_interval(track->z_height, |
| fine_delta_z); |
| else |
| curr_interval = get_neg_interval(track->z_height, |
| fine_delta_z); |
| |
| while( !seg_complete ) |
| { |
| // flag to reset z position |
| bool reset = false; |
| |
| |
| /* calculate new height based on s |
| * (distance traveled in FSR) */ |
| float z = track->z_height + s * cos(p_angle); |
| |
| // check if still in same FSR (fine axial interval) |
| int new_interval; |
| if( pos_z_dir ) |
| new_interval = get_pos_interval(z, |
| fine_delta_z); |
| else |
| new_interval = get_neg_interval(z, |
| fine_delta_z); |
| |
| if( new_interval == curr_interval ) |
| { |
| seg_complete = true; |
| ds = s; |
| } |
| |
| // otherwise, we need to recalculate distances |
| else |
| { |
| // correct z |
| if( pos_z_dir ) |
| { |
| curr_interval++; |
| z = fine_delta_z * (float) curr_interval; |
| } |
| else{ |
| curr_interval--; |
| z = fine_delta_z * (float) curr_interval; |
| } |
| |
| // calculate distance travelled in FSR (ds) |
| ds = (z - track->z_height) / cos(p_angle); |
| |
| // update track length remaining |
| s -= ds; |
| |
| /* check remaining track length to protect |
| * against potential roundoff errors */ |
| if( s <= 0 ) |
| seg_complete = true; |
| |
| // check if out of bounds or track complete |
| if( z <= 0 || z >= node_delta_z ) |
| { |
| // mark segment as completed |
| seg_complete = true; |
| |
| // remember to no longer treat this track |
| if ( pos_z_dir ) |
| end_stacked--; |
| else |
| begin_stacked++; |
| |
| // reset z height |
| reset = true; |
| } |
| } |
| |
| // pick a random FSR (cache miss expected) |
| #ifdef OPENMP |
| long QSR_id = rand_r(&seed) % |
| I->n_source_regions_per_node; |
| #else |
| long QSR_id = rand() % |
| I->n_source_regions_per_node; |
| #endif |
| |
| /* update sources and fluxes from attenuation |
| * over FSR */ |
| if( I->axial_exp == 2 ) |
| { |
| attenuate_fluxes( track, true, |
| ¶ms->sources[QSR_id], |
| I, params, ds, mu, |
| params->tracks_2D[i].az_weight, &A ); |
| |
| segments_processed++; |
| } |
| |
| else if( I->axial_exp == 0 ) |
| { |
| attenuate_FSR_fluxes( track, true, |
| ¶ms->sources[QSR_id], |
| I, params, ds, mu, |
| params->tracks_2D[i].az_weight, &A ); |
| |
| segments_processed++; |
| } |
| else |
| { |
| printf("Error: invalid axial expansion order"); |
| printf("\n Please input 0 or 2\n"); |
| exit(1); |
| } |
| |
| // update with new z height or reset if finished |
| if( n == params->tracks_2D[i].n_segments - 1 |
| || reset) |
| { |
| if( pos_z_dir) |
| track->z_height = I->axial_z_sep * k; |
| else |
| track->z_height = I->axial_z_sep * (k+1); |
| } |
| else |
| track->z_height = z; |
| |
| } |
| } |
| } |
| } |
| } |
| #ifdef OPENMP |
| if(thread == 0 && I->mype==0) printf("\n"); |
| #endif |
| |
| #ifdef PAPI |
| if( thread == 0 ) |
| { |
| printf("\n"); |
| border_print(); |
| center_print("PAPI COUNTER RESULTS", 79); |
| border_print(); |
| printf("Count \tSmybol \tDescription\n"); |
| } |
| { |
| #pragma omp barrier |
| } |
| counter_stop(&eventset, num_papi_events, I); |
| #endif |
| } |
| I->segments_processed = segments_processed; |
| |
| return; |
| } |
| |
| |
| // run one full transport sweep, return k |
| void two_way_transport_sweep( Params * params, Input * I ) |
| { |
| if(I->mype==0) printf("Starting transport sweep ...\n"); |
| |
| // calculate the height of a node's domain and of each FSR |
| double node_delta_z = I->height / I->decomp_assemblies_ax; |
| int num_intervals = (I->cai * I->fai); |
| double fine_delta_z = node_delta_z / num_intervals; |
| |
| /* loop over tracks (implicitly azimuthal angles, tracks in azimuthal |
| * angles, polar angles, and z stacked rays) */ |
| long segments_processed = 0; |
| |
| #pragma omp parallel default(none) \ |
| shared( I, params, node_delta_z, fine_delta_z, num_intervals ) \ |
| reduction(+ : segments_processed ) |
| { |
| #ifdef OPENMP |
| int thread = omp_get_thread_num(); |
| int nthreads = omp_get_num_threads(); |
| unsigned int seed = time(NULL) * (thread+1); |
| #endif |
| //print_Input_struct( I ); |
| |
| #ifdef PAPI |
| int eventset = PAPI_NULL; |
| int num_papi_events; |
| #pragma omp critical |
| { |
| counter_init(&eventset, &num_papi_events, I); |
| } |
| #endif |
| |
| |
| AttenuateVars A; |
| float * ptr = (float * ) malloc( I->n_egroups * 14 * sizeof(float)); |
| A.q0 = ptr; |
| ptr += I->n_egroups; |
| A.q1 = ptr; |
| ptr += I->n_egroups; |
| A.q2 = ptr; |
| ptr += I->n_egroups; |
| A.sigT = ptr; |
| ptr += I->n_egroups; |
| A.tau = ptr; |
| ptr += I->n_egroups; |
| A.sigT2 = ptr; |
| ptr += I->n_egroups; |
| A.expVal = ptr; |
| ptr += I->n_egroups; |
| A.reuse = ptr; |
| ptr += I->n_egroups; |
| A.flux_integral = ptr; |
| ptr += I->n_egroups; |
| A.tally = ptr; |
| ptr += I->n_egroups; |
| A.t1 = ptr; |
| ptr += I->n_egroups; |
| A.t2 = ptr; |
| ptr += I->n_egroups; |
| A.t3 = ptr; |
| ptr += I->n_egroups; |
| A.t4 = ptr; |
| |
| #pragma omp for schedule( dynamic ) |
| for (long i = 0; i < I->ntracks_2D; i++) |
| { |
| // print progress |
| #ifdef OPENMP |
| if(I->mype==0 && thread == 0) |
| { |
| printf("\rAttenuating Tracks... (%.0lf%% completed)", |
| (i / ( (double)I->ntracks_2D / (double) nthreads )) |
| / (double) nthreads * 100.0); |
| } |
| #else |
| if( i % 50 == 0) |
| if(I->mype==0) |
| printf("%s%ld%s%ld\n","2D Tracks Completed = ", i," / ", |
| I->ntracks_2D ); |
| #endif |
| |
| // allocate arrays for segment storage FIXME |
| double ** seg_dist = malloc( I->z_stacked * sizeof(double *) ); |
| Source *** seg_src = malloc( I->z_stacked * sizeof(Source**) ); |
| int * seg_idx = malloc( I->z_stacked * sizeof(int) ); |
| int * seg_size = malloc( I->z_stacked * sizeof(int) ); |
| |
| // fill matrix with arrays FIXME |
| for( int k = 0; k < I->z_stacked; k++) |
| { |
| seg_size[k] = 2 * I->segments_per_track; |
| seg_dist[k] = malloc( seg_size[k] * sizeof(double) ); |
| seg_src[k] = malloc( seg_size[k] * sizeof(Source *) ); |
| seg_idx[k] = 0; |
| } |
| |
| // treat positive-z traveling rays first |
| bool pos_z_dir = true; |
| for( int j = 0; j < I->n_polar_angles; j++) |
| { |
| if( j == I->n_polar_angles / 2 ) |
| pos_z_dir = false; |
| float p_angle = params->polar_angles[j]; |
| float mu = cos(p_angle); |
| |
| // start with all z stacked rays |
| int begin_stacked = 0; |
| int end_stacked = I->z_stacked; |
| |
| // reset semgnet indexes |
| for( int k = 0; k < I->z_stacked; k++) |
| seg_idx[k] = 0; |
| |
| for( int n = 0; n < params->tracks_2D[i].n_segments; n++) |
| { |
| // calculate distance traveled in cell if segment completed |
| float s_full = params->tracks_2D[i].segments[n].length |
| / sin(p_angle); |
| |
| // allocate varaible for distance traveled in an FSR |
| float ds = 0; |
| |
| // loop over remaining z-stacked rays |
| int tracks_completed = 0; |
| for( int k = begin_stacked; k < end_stacked; k++) |
| { |
| // select current track |
| Track * track = ¶ms->tracks[i][j][k]; |
| |
| // determine current axial interval |
| int interval = (int) track->z_height / fine_delta_z; |
| |
| // calculate distance to domain boundary |
| float bound_dist; |
| if( pos_z_dir) |
| bound_dist = (node_delta_z - track->z_height) / mu; |
| else |
| bound_dist = -track->z_height / mu; |
| |
| // determine track length |
| float s; |
| if( s_full < bound_dist ) |
| s = s_full; |
| else |
| { |
| // note completion of track |
| s = bound_dist; |
| tracks_completed++; |
| } |
| |
| // set flag for completeion of segment |
| bool seg_complete = false; |
| |
| while( !seg_complete ) |
| { |
| // initialize tracking variables |
| long QSR_id = interval + num_intervals * n; |
| float ds; |
| float z; |
| |
| // calculate z height of next fine axial interval |
| float fai_z_height; |
| if( pos_z_dir ) |
| fai_z_height = (interval + 1) * fine_delta_z ; |
| else |
| fai_z_height = interval * fine_delta_z; |
| |
| // calculate z distance to next fine axial interval |
| float z_dist_to_fai = |
| fai_z_height - track->z_height; |
| |
| /* calculate total distance (s) to fine axial |
| * interval */ |
| float s_dist_to_fai = z_dist_to_fai / mu; |
| |
| // determine if a fine axial interval is crossed |
| if( s_dist_to_fai < s ) |
| { |
| if( pos_z_dir ) |
| interval++; |
| else |
| interval--; |
| ds = s_dist_to_fai; |
| z = track->z_height + z_dist_to_fai; |
| } |
| else |
| { |
| ds = s; |
| z = track->z_height + s * mu; |
| } |
| |
| /* shorten remaining segment length and check if |
| * completed (accounting for potential roundoff) */ |
| s -= ds; |
| if( s <= 0 || interval < 0 |
| || interval >= num_intervals) |
| seg_complete = true; |
| |
| // pick a random FSR (cache miss expected) |
| #ifdef OPENMP |
| QSR_id = rand_r(&seed) % |
| I->n_source_regions_per_node; |
| #else |
| QSR_id = rand() % I->n_source_regions_per_node; |
| #endif |
| |
| /* update sources and fluxes from attenuation |
| * over FSR */ |
| if( I->axial_exp == 2 ) |
| { |
| attenuate_fluxes( track, true, |
| ¶ms->sources[QSR_id], |
| I, params, ds, mu, |
| params->tracks_2D[i].az_weight, &A ); |
| segments_processed++; |
| } |
| |
| else if( I->axial_exp == 0 ) |
| attenuate_FSR_fluxes( track, true, |
| ¶ms->sources[QSR_id], |
| I, params, ds, mu, |
| params->tracks_2D[i].az_weight, &A ); |
| else |
| { |
| printf("Error: invalid axial expansion order"); |
| printf("\n Please input 0 or 2\n"); |
| exit(1); |
| } |
| |
| // update track height |
| track->z_height = z; |
| |
| // save segment length and source FIXME |
| seg_dist[k][seg_idx[k]] = ds; |
| seg_src[k][seg_idx[k]] = ¶ms->sources[QSR_id]; |
| seg_idx[k]++; |
| |
| // check if array needs to grow FIXME |
| if( seg_idx[k] >= seg_size[k] ) |
| { |
| seg_size[k] *= 2; |
| seg_dist[k] = (double *) realloc( seg_dist[k], |
| seg_size[k] * sizeof(double) ); |
| seg_src[k] = (Source **) realloc( seg_src[k], |
| seg_size[k] * sizeof(Source *) ); |
| } |
| } |
| } |
| if(pos_z_dir) |
| end_stacked -= tracks_completed; |
| else |
| begin_stacked += tracks_completed; |
| } |
| |
| // loop over all z stacked rays again |
| for( int k = 0; k < I->z_stacked; k++ ) |
| { |
| for( int n = seg_idx[k]-1; n >= 0; n--) |
| { |
| // load distance |
| float ds = seg_dist[k][n]; |
| |
| // select current track |
| Track * track = ¶ms->tracks[i][j][k]; |
| |
| // update sources and fluxes from attenuation over FSR |
| if( I->axial_exp == 2 ) |
| { |
| attenuate_fluxes( track, false, |
| seg_src[k][n], |
| I, params, ds, -mu, |
| params->tracks_2D[i].az_weight, &A ); |
| segments_processed++; |
| } |
| |
| else if( I->axial_exp == 0 ) |
| attenuate_FSR_fluxes( track, false, |
| seg_src[k][n], |
| I, params, ds, -mu, |
| params->tracks_2D[i].az_weight, &A ); |
| |
| // update z height |
| track->z_height -= ds * mu; |
| } |
| } |
| |
| |
| /* Update all tracks with correct starting z location again |
| * NOTE: this is only here to acocunt for roundoff error */ |
| for( int k = 0; k < I->z_stacked; k++) |
| { |
| Track * track = ¶ms->tracks[i][j][k]; |
| if( pos_z_dir) |
| track->z_height = I->axial_z_sep * k; |
| else |
| track->z_height = I->axial_z_sep * (k+1); |
| } |
| } |
| |
| // free memory |
| for( int k = 0; k < I->z_stacked; k++) |
| { |
| free(seg_dist[k]); |
| free(seg_src[k]); |
| } |
| free(seg_dist); |
| free(seg_src); |
| free(seg_idx); |
| free(seg_size); |
| |
| } |
| #ifdef OPENMP |
| if(thread == 0 && I->mype==0) printf("\n"); |
| #endif |
| |
| #ifdef PAPI |
| if( thread == 0 ) |
| { |
| printf("\n"); |
| border_print(); |
| center_print("PAPI COUNTER RESULTS", 79); |
| border_print(); |
| printf("Count \tSmybol \tDescription\n"); |
| } |
| { |
| #pragma omp barrier |
| } |
| counter_stop(&eventset, num_papi_events, I); |
| #endif |
| } |
| //printf("Number of segments processed: %ld\n", segments_processed); |
| I->segments_processed = segments_processed; |
| |
| return; |
| } |
| |
| /* returns integer number for axial interval for tracks traveling in the |
| * positive direction */ |
| int get_pos_interval( float z, float dz) |
| { |
| int interval = (int) (z/dz); |
| return interval; |
| } |
| |
| /* returns integer number for axial interval for tracks traveling in the |
| * negative direction */ |
| int get_neg_interval( float z, float dz) |
| { |
| // NOTE: a bit of trickery using floors to obtain ceils |
| int interval = INT_MAX - (int) ( (double) INT_MAX |
| - (double) ( z / dz ) ); |
| return interval; |
| } |
| |
| int calc_next_fai( float z, float dz, bool pos_dir) |
| { |
| int interval = z/dz; |
| float lower_z = dz * (float) interval; |
| if(pos_dir) |
| return interval + 1; |
| else |
| return interval; |
| } |
| |
| /* Determines the change in angular flux along a particular track across a fine |
| * axial region and tallies the contribution to the scalar flux in the fine |
| * axial region. This function assumes a quadratic source, which is calculated |
| * on the fly using neighboring source values. |
| * |
| * This legacy function is unused since it is less efficient than the current |
| * attenuate_fluxes function. However, it provides a more straightforward |
| * description of the underlying physical problem. */ |
| void alt_attenuate_fluxes( Track * track, bool forward, Source * QSR, Input * I, |
| Params * params, float ds, float mu, float az_weight ) |
| { |
| // compute fine axial interval spacing |
| float dz = I->height / (I->fai * I->decomp_assemblies_ax * I->cai); |
| |
| // compute z height in cell |
| float zin = track->z_height - dz * ( (int)( track->z_height / dz ) + 0.5 ); |
| |
| // compute fine axial region ID |
| int fine_id = (int) ( track->z_height / dz ) % I->fai; |
| |
| // compute weight (azimuthal * polar) |
| // NOTE: real app would also have volume weight component |
| float weight = track->p_weight * az_weight; |
| float mu2 = mu * mu; |
| |
| // load fine source region flux vector |
| float * FSR_flux = QSR -> fine_flux[fine_id]; |
| |
| // cycle over energy groups |
| for( int g = 0; g < I->n_egroups; g++) |
| { |
| // load total cross section |
| float sigT = QSR->sigT[g]; |
| |
| // define source parameters |
| float q0, q1, q2; |
| |
| // calculate source components |
| if( fine_id == 0 ) |
| { |
| // load neighboring sources |
| float y2 = QSR->fine_source[fine_id][g]; |
| float y3 = QSR->fine_source[fine_id+1][g]; |
| |
| // do linear "fitting" |
| float c0 = y2; |
| float c1 = (y3 - y2) / dz; |
| |
| // calculate q0, q1, q2 |
| q0 = c0 + c1*zin; |
| q1 = c1; |
| q2 = 0; |
| } |
| else if( fine_id == I->fai - 1 ) |
| { |
| // load neighboring sources |
| float y1 = QSR->fine_source[fine_id-1][g]; |
| float y2 = QSR->fine_source[fine_id][g]; |
| |
| // do linear "fitting" |
| float c0 = y2; |
| float c1 = (y2 - y1) / dz; |
| |
| // calculate q0, q1, q2 |
| q0 = c0 + c1*zin; |
| q1 = c1; |
| q2 = 0; |
| } |
| else |
| { |
| // load neighboring sources |
| float y1 = QSR->fine_source[fine_id-1][g]; |
| float y2 = QSR->fine_source[fine_id][g]; |
| float y3 = QSR->fine_source[fine_id+1][g]; |
| |
| // do quadratic "fitting" |
| float c0 = y2; |
| float c1 = (y1 - y3) / (2*dz); |
| float c2 = (y1 - 2*y2 + y3) / (2*dz*dz); |
| |
| // calculate q0, q1, q2 |
| q0 = c0 + c1*zin + c2*zin*zin; |
| q1 = c1 + 2*c2*zin; |
| q2 = c2; |
| } |
| |
| // calculate common values for efficiency |
| float tau = sigT * ds; |
| float sigT2 = sigT * sigT; |
| |
| // compute exponential ( 1 - exp(-x) ) using table lookup |
| float expVal = interpolateTable( params->expTable, tau ); |
| |
| // load correct angular flux vector |
| float * psi; |
| if(forward) |
| psi = track->f_psi; |
| else |
| psi = track->b_psi; |
| |
| // add contribution to new source flux |
| float flux_integral = (q0 * tau + (sigT * psi[g] - q0) * expVal) |
| / sigT2 |
| + q1 * mu * (tau * (tau - 2) + 2 * expVal) |
| / (sigT * sigT2) |
| + q2 * mu2 * (tau * (tau * (tau - 3) + 6) - 6 * expVal) |
| / (3 * sigT2 * sigT2); |
| |
| #pragma omp atomic |
| FSR_flux[g] += weight * flux_integral; |
| |
| // update angular flux |
| psi[g] = psi[g] * (1.0 - expVal) + q0 * expVal / sigT |
| + q1 * mu * (tau - expVal) / sigT2 + q2 * mu2 * |
| (tau * (tau - 2) + 2 * expVal) / (sigT2 * sigT); |
| } |
| } |
| |
| /* Determines the change in angular flux along a particular track across a fine |
| * axial region and tallies the contribution to the scalar flux in the fine |
| * axial region. This function assumes a constant source. */ |
| void attenuate_FSR_fluxes( Track * track, bool forward, Source * FSR, Input * I, |
| Params * params_in, float ds, float mu, float az_weight, |
| AttenuateVars *A) |
| { |
| // upack attenuate vars struct |
| float * restrict tally = A->tally; |
| float * restrict expVal = A->expVal; |
| float * restrict sigT = A->sigT; |
| float * restrict tau = A->tau; |
| |
| Params params = * params_in; |
| |
| // compute fine axial interval spacing |
| float dz = I->height / (I->fai * I->decomp_assemblies_ax * I->cai); |
| |
| // compute z height in cell |
| float zin = track->z_height - dz * |
| ( (int)( track->z_height / dz ) + 0.5f ); |
| |
| // compute fine axial region ID |
| int fine_id = (int) ( track->z_height / dz ) % I->fai; |
| |
| // compute weight (azimuthal * polar) |
| // NOTE: real app would also have volume weight component |
| float weight = track->p_weight * az_weight * mu; |
| |
| // load fine source region flux vector |
| float * FSR_flux = FSR -> fine_flux[fine_id]; |
| |
| // cycle over energy groups |
| #ifdef INTEL |
| #pragma simd |
| #elif defined IBM |
| #pragma simd_level(10) |
| #endif |
| for( int g = 0; g < I->n_egroups; g++) |
| { |
| // load total cross section |
| sigT[g] = FSR->sigT[g]; |
| tau[g] = sigT[g] * ds; |
| } |
| |
| // compute exponential ( 1 - exp(-x) ) using table lookup |
| #ifdef INTEL |
| #pragma simd |
| #elif defined IBM |
| #pragma simd_level(10) |
| #endif |
| for(int g = 0; g < I->n_egroups; g++) |
| { |
| expVal[g] = interpolateTable( params.expTable, tau[g] ); |
| } |
| |
| float * psi; |
| if(forward) |
| psi = track->f_psi; |
| else |
| psi = track->b_psi; |
| |
| #ifdef INTEL |
| #pragma simd |
| #elif defined IBM |
| #pragma simd_level(10) |
| #endif |
| for( int g = 0; g < I->n_egroups; g++) |
| { |
| // compute angular flux attenuation |
| float q = FSR->fine_source[fine_id][g] / sigT[g]; |
| float delta_psi = (psi[g] - q) * expVal[g]; |
| |
| // add contribution to new source flux |
| tally[g] = weight * delta_psi; |
| |
| // update angular flux |
| psi[g] -= delta_psi; |
| } |
| |
| |
| |
| #ifdef OPENMP |
| omp_set_lock(&FSR->locks[fine_id]); |
| #endif |
| |
| #ifdef INTEL |
| #pragma simd |
| #elif defined IBM |
| #pragma simd_level(10) |
| #endif |
| for( int g = 0; g < I->n_egroups; g++) |
| { |
| FSR_flux[g] += tally[g]; |
| } |
| |
| #ifdef OPENMP |
| omp_unset_lock(&FSR->locks[fine_id]); |
| #endif |
| |
| |
| } |
| |
| /* Renormalizes scalar and angular flux for next transport sweep iteration. |
| * Calculation requires multiple pair-wise sums and a reduction accross all |
| * nodes. */ |
| void renormalize_flux( Params params, Input I, CommGrid grid ) |
| { |
| if( I.mype == 0 ) printf("Renormalizing Flux...\n"); |
| float node_fission_rate = 0; |
| #ifdef OPENMP |
| #pragma omp parallel default(none) shared(params, I, grid) \ |
| reduction(+ : node_fission_rate) |
| { |
| #endif |
| // tally total fission rate (pair-wise sum) |
| float * fission_rates = malloc( I.n_source_regions_per_node |
| * sizeof(float) ); |
| |
| float * fine_fission_rates = malloc( I.fai * sizeof(float) ); |
| float * g_fission_rates = malloc( I.n_egroups * sizeof(float) ); |
| |
| // accumulate total fission rate on node domain |
| #pragma omp for schedule(dynamic) |
| for( int i = 0; i < I.n_source_regions_per_node; i++) |
| { |
| Source src = params.sources[i]; |
| for( int j = 0; j < I.fai; j++) |
| { |
| for( int g = 0; g < I.n_egroups; g++) |
| g_fission_rates[g] = src.fine_flux[j][g] * src.vol |
| * src.XS[g][0]; |
| fine_fission_rates[j] = pairwise_sum( g_fission_rates, |
| I.n_egroups ); |
| } |
| fission_rates[i] = pairwise_sum( fine_fission_rates, I.fai ); |
| } |
| node_fission_rate = pairwise_sum(fission_rates, |
| I.n_source_regions_per_node); |
| |
| // free allocated memory |
| free(fission_rates); |
| free(fine_fission_rates); |
| free(g_fission_rates); |
| |
| #ifdef OPENMP |
| } |
| #endif |
| |
| #ifdef MPI |
| // accumulate total fission rate by MPI Allreduce |
| float total_fission_rate = 0; |
| MPI_Barrier(grid.cart_comm_3d); |
| MPI_Allreduce( &node_fission_rate, // Send Buffer |
| &total_fission_rate, // Receive Buffer |
| 1, // Element Count |
| MPI_FLOAT, // Element Type |
| MPI_SUM, // Reduciton Operation Type |
| grid.cart_comm_3d ); // MPI Communicator |
| MPI_Barrier(grid.cart_comm_3d); |
| #else |
| float total_fission_rate = node_fission_rate; |
| #endif |
| |
| |
| // normalize fluxes by fission reaction rate |
| float norm_factor = 1.0 / total_fission_rate; |
| |
| #pragma omp parallel for default(none) \ |
| shared(I, params) private(norm_factor) schedule(dynamic) |
| for( int i = 0; i < I.n_source_regions_per_node; i++) |
| { |
| Source * src = ¶ms.sources[i]; |
| float adjust = norm_factor * 4 * M_PI * I.fai / src->vol; |
| for( int k = 0; k < I.fai; k++) |
| for( int g = 0; g < I.n_egroups; g++) |
| src->fine_flux[k][g] *= adjust; |
| } |
| |
| // normalize boundary fluxes by same factor |
| #pragma omp parallel for default(none) \ |
| shared(I, params) private(norm_factor) schedule(dynamic) |
| for( long i = 0; i < I.ntracks_2D; i++) |
| for( int j = 0; j < I.n_polar_angles; j++) |
| for( int k = 0; k < I.z_stacked; k++) |
| for( int g = 0; g < I.n_egroups; g++) |
| { |
| params.tracks[i][j][k].f_psi[g] *= norm_factor; |
| params.tracks[i][j][k].b_psi[g] *= norm_factor; |
| } |
| |
| if( I.mype == 0 ) printf("Renormalizing Flux Complete.\n"); |
| return; |
| } |
| |
| /* Updates sources for next iteration by computing scattering and fission |
| * components. Calculation includes multiple pair-wise sums and reductions |
| * accross all nodes */ |
| float update_sources( Params params, Input I, float keff ) |
| { |
| // source residual |
| float residual; |
| |
| // calculate inverse multiplication facotr for efficiency |
| float inverse_k = 1.0 / keff; |
| |
| // allocate residual arrays |
| float * group_res = (float *) malloc(I.n_egroups * sizeof(float)); |
| float * fine_res = (float *) malloc(I.fai * sizeof(float)); |
| float * residuals = (float *) malloc(I.n_source_regions_per_node |
| * sizeof(float)); |
| |
| // allocate arrays for summation |
| float * fission_rates = malloc(I.n_egroups * sizeof(float)); |
| float * scatter_rates = malloc(I.n_egroups * sizeof(float)); |
| |
| // cycle through all coarse axial intervals to update source |
| for( long i = 0; i < I.n_source_regions_per_node; i++) |
| { |
| Source src = params.sources[i]; |
| |
| // cycle thorugh all fine axial regions to calculate new source |
| for( int j = 0; j < I.fai; j++) |
| { |
| // calculate total fission source and scattering source |
| float fission_source; |
| float scatter_source; |
| |
| // compute total fission source |
| for( int g = 0; g < I.n_egroups; g++ ) |
| fission_rates[g] = src.fine_flux[j][g] * src.XS[g][0]; |
| fission_source = pairwise_sum( fission_rates, (long) I.n_egroups); |
| |
| // normalize fission source by multiplication factor |
| fission_source *= inverse_k; |
| |
| // compute scattering and new total source for each group |
| for( int g = 0; g < I.n_egroups; g++ ) |
| { |
| for( int g2 = 0; g2 < I.n_egroups; g2++ ) |
| { |
| // compute scatter source originating from g2 -> g |
| scatter_rates[g2] = src.scattering_matrix[g][g2] * |
| src.fine_flux[j][g2]; |
| } |
| scatter_source = pairwise_sum(scatter_rates, |
| (long) I.n_egroups); |
| |
| // compuate new total source |
| float chi = src.XS[g][2]; |
| |
| // calculate new fine source |
| float newSrc = (fission_source * chi + scatter_source) |
| / (4.0 * M_PI); |
| |
| // calculate residual |
| float oldSrc = src.fine_source[j][g]; |
| group_res[g] = (newSrc - oldSrc) * (newSrc - oldSrc) |
| / (oldSrc * oldSrc); |
| |
| /* calculate new source in fine axial interval assuming |
| * isotropic source components */ |
| src.fine_source[j][g] = newSrc; |
| } |
| fine_res[j] = pairwise_sum(group_res, (long) I.n_egroups); |
| } |
| residuals[i] = pairwise_sum(fine_res, (long) I.fai); |
| } |
| |
| // calculate source residual |
| residual = pairwise_sum(residuals, I.n_source_regions_per_node); |
| |
| // free memory |
| free(fission_rates); |
| free(scatter_rates); |
| free(group_res); |
| free(fine_res); |
| free(residuals); |
| |
| |
| // NOTE: See code around line 600 of CPUSolver.cpp in ClosedMOC/ OpenMOC |
| |
| return residual; |
| } |
| |
| /* Computes globall k-effective using multiple pair-wise summations and finally |
| * a reduction accross all nodes */ |
| float compute_keff(Params params, Input I, CommGrid grid) |
| { |
| // allocate temporary memory |
| float * sigma = malloc( I.n_egroups * sizeof(float) ); |
| float * group_rates = malloc( I.n_egroups * sizeof(float) ); |
| float * fine_rates = malloc( I.fai * sizeof(float) ); |
| float * QSR_rates = malloc( I.n_source_regions_per_node * sizeof(float) ); |
| |
| /////////////////////////////////////////////////////////////////////////// |
| |
| // compute total absorption rate, looping over source regions |
| for( long i = 0; i < I.n_source_regions_per_node; i++) |
| { |
| // load absorption XS data |
| Source src = params.sources[i]; |
| for( int g = 0; g < I.n_egroups; g++) |
| sigma[g] = src.XS[g][1]; |
| |
| for( int j = 0; j < I.fai; j++ ) |
| { |
| // calculate absorption rates |
| float * fine_flux = src.fine_flux[j]; |
| for( int g = 0; g < I.n_egroups; g++) |
| group_rates[g] = sigma[g] * fine_flux[g]; |
| |
| // sum absorption over all energy groups |
| fine_rates[j] = pairwise_sum( group_rates, (long) I.n_egroups ); |
| } |
| // sum absorption over all fine axial intervals |
| QSR_rates[i] = pairwise_sum( fine_rates, (long) I.fai ); |
| } |
| // sum absorption over all source regions in a node |
| float node_abs = pairwise_sum( QSR_rates, I.n_source_regions_per_node); |
| |
| /////////////////////////////////////////////////////////////////////////// |
| |
| // compute total absorption rate, looping over source regions |
| for( long i = 0; i < I.n_source_regions_per_node; i++) |
| { |
| // load nuSigmaF XS data |
| Source src = params.sources[i]; |
| for( int g = 0; g < I.n_egroups; g++) |
| sigma[g] = src.XS[g][0]; |
| |
| for( int j = 0; j < I.fai; j++ ) |
| { |
| // calculate absorption rates |
| float * fine_flux = src.fine_flux[j]; |
| for( int g = 0; g < I.n_egroups; g++) |
| group_rates[g] = sigma[g] * fine_flux[g]; |
| |
| // sum fission over all energy groups |
| fine_rates[j] = pairwise_sum( group_rates, (long) I.n_egroups ); |
| } |
| // sum fission over all fine axial intervals |
| QSR_rates[i] = pairwise_sum( fine_rates, (long) I.fai ); |
| } |
| // sum fission over all source regions in a node |
| float node_fission = pairwise_sum( QSR_rates, I.n_source_regions_per_node); |
| |
| /////////////////////////////////////////////////////////////////////////// |
| |
| // MPi Reduction |
| float tot_abs = 0; |
| float tot_fission = 0; |
| float leakage = 0; |
| |
| #ifdef MPI |
| |
| // Total Absorption Reduction |
| MPI_Reduce( &node_abs, // Send Buffer |
| &tot_abs, // Receive Buffer |
| 1, // Element Count |
| MPI_FLOAT, // Element Type |
| MPI_SUM, // Reduciton Operation Type |
| 0, // Master Rank |
| grid.cart_comm_3d ); // MPI Communicator |
| |
| // Total Fission Reduction |
| MPI_Reduce( &node_fission, // Send Buffer |
| &tot_fission, // Receive Buffer |
| 1, // Element Count |
| MPI_FLOAT, // Element Type |
| MPI_SUM, // Reduciton Operation Type |
| 0, // Master Rank |
| grid.cart_comm_3d ); // MPI Communicator |
| |
| // Total Leakage Reduction |
| MPI_Reduce( params.leakage, // Send Buffer |
| &leakage, // Receive Buffer |
| 1, // Element Count |
| MPI_FLOAT, // Element Type |
| MPI_SUM, // Reduciton Operation Type |
| 0, // Master Rank |
| grid.cart_comm_3d ); // MPI Communicator |
| |
| MPI_Barrier(grid.cart_comm_3d); |
| |
| // calculate keff |
| float keff = tot_fission/ (tot_abs + leakage); |
| #else |
| float keff = node_fission / (node_abs + *params.leakage); |
| #endif |
| |
| /////////////////////////////////////////////////////////////////////////// |
| |
| // free memory |
| free(sigma); |
| free(group_rates); |
| free(fine_rates); |
| free(QSR_rates); |
| |
| return keff; |
| } |
| |
| /* Interpolates a formed exponential table to compute ( 1- exp(-x) ) |
| * at the desired x value */ |
| float interpolateTable( Table table, float x) |
| { |
| // check to ensure value is in domain |
| if( x > table.maxVal ) |
| return 1.0f; |
| else |
| { |
| int interval = (int) ( x / table.dx + 0.5f * table.dx ); |
| /* |
| if( interval >= table.N || interval < 0) |
| { |
| printf( "Interval = %d\n", interval); |
| printf( "N = %d\n", table.N); |
| printf( "x = %f\n", x); |
| printf( "dx = %f\n", table.dx); |
| exit(1); |
| } |
| */ |
| float slope = table.values[ 2 * interval ]; |
| float intercept = table.values[ 2 * interval + 1 ]; |
| float val = slope * x + intercept; |
| return val; |
| } |
| } |
| |