MC-GPU
|
00001 00002 //////////////////////////////////////////////////////////////////////////////// 00003 // 00004 // **************************** 00005 // *** MC-GPU , version 1.3 *** 00006 // **************************** 00007 // 00008 //! Header file containing the declarations for the MC-GPU code. 00009 //! This file declares all the host and device functions and structures, 00010 //! the library files to include in the compilation, various constants parameters 00011 //! of the simulation, pre-processor macro functions, etc. 00012 // 00013 // 00014 // 00015 // ** DISCLAIMER ** 00016 // 00017 // This software and documentation (the "Software") were developed at the Food and 00018 // Drug Administration (FDA) by employees of the Federal Government in the course 00019 // of their official duties. Pursuant to Title 17, Section 105 of the United States 00020 // Code, this work is not subject to copyright protection and is in the public 00021 // domain. Permission is hereby granted, free of charge, to any person obtaining a 00022 // copy of the Software, to deal in the Software without restriction, including 00023 // without limitation the rights to use, copy, modify, merge, publish, distribute, 00024 // sublicense, or sell copies of the Software or derivatives, and to permit persons 00025 // to whom the Software is furnished to do so. FDA assumes no responsibility 00026 // whatsoever for use by other parties of the Software, its source code, 00027 // documentation or compiled executables, and makes no guarantees, expressed or 00028 // implied, about its quality, reliability, or any other characteristic. Further, 00029 // use of this code in no way implies endorsement by the FDA or confers any 00030 // advantage in regulatory decisions. Although this software can be redistributed 00031 // and/or modified freely, we ask that any derivative works bear some notice that 00032 // they are derived from it, and any modified versions bear some notice that they 00033 // have been modified. 00034 // 00035 // 00036 //! @file MC-GPU_v1.3.h 00037 //! @author Andreu Badal (Andreu.Badal-Soler@fda.hhs.gov) 00038 //! @date 2012/12/12 00039 // Older versions: 2010/05/14 00040 // 00041 //////////////////////////////////////////////////////////////////////////////// 00042 00043 00044 #ifndef MCGPU_H_ 00045 #define MCGPU_H_ 00046 00047 00048 // *** To use CUDA, compile with "-DUSING_CUDA" or uncomment the following line: 00049 //#define USING_CUDA 00050 00051 // *** To use MPI to simulate multiple CT projections in different GPUs compile with "-DUSING_MPI" or uncomment the following line: 00052 //#define USING_MPI 00053 00054 00055 //! MPI macro: mark commands to be executed only by the master thread (myID==0). 00056 #define MASTER_THREAD if(0==myID) 00057 00058 //! Maximum number of projections allowed in the CT simulation (not limited by the constant memory because stored in global and shared memory): 00059 #define MAX_NUM_PROJECTIONS 720 00060 00061 //! Constants values for the Compton and Rayleigh models: 00062 #define MAX_MATERIALS 15 00063 #define MAX_SHELLS 30 00064 #define NP_RAYLEIGH 128 00065 #define MAX_ENERGYBINS_RAYLEIGH 25005 00066 00067 //! Maximum number of energy bins in the input x-ray energy spectrum. 00068 #define MAX_ENERGY_BINS 256 00069 00070 00071 #define PI 3.14159265358979323846 00072 #define RAD2DEG 180.0/PI 00073 #define DEG2RAD PI/180.0 00074 00075 // Other constants used in the code: 00076 //! Value to scale the deposited energy in the pixels so that it can be stored as a long long integer 00077 //! instead of a double precision float. The integer values have to be used in order to use the 00078 //! atomicAdd function in CUDA. 00079 #define SCALE_eV 100.0f 00080 00081 //! Offset value to make sure the particles are completely inside, or outside, the voxel bounding box. 00082 //! For example, to start a particle track the source may have to translate a particle from the focal spot to the plane Y=0, but 00083 //! in reality the particle will be moved to Y=EPS_SOURCE to make sure it is unmistakenly located inside a voxel. 00084 //! If "EPS_SOURCE" is too small an artifact with weird concentric circular shadows may appear at the center of the simulated image. 00085 #define EPS_SOURCE 0.000015f 00086 00087 #define NEG_EPS_SOURCE -EPS_SOURCE 00088 #define INF 500000.0f 00089 #define INF_minus1 499999.0f 00090 #define NEG_INF -500000.0f 00091 00092 //! Preprocessor macro to calculate maximum and minimum values: 00093 #define max_value( a, b ) ( ((a) > (b)) ? (a) : (b) ) 00094 #define min_value( a, b ) ( ((a) < (b)) ? (a) : (b) ) 00095 00096 00097 // Include standard header files: 00098 #include <stdlib.h> 00099 #include <stdio.h> 00100 #include <string.h> 00101 #include <math.h> 00102 #include <time.h> 00103 00104 #include "zlib.h" // Library used to read gzip material and voxel files (non-compressed files can also be read). Compile with option -lz 00105 00106 // #ifndef _GNU_SOURCE 00107 // This definition may be required to use the "sincos" function from math.h !!DeBuG!! 00108 // #define _GNU_SOURCE 00109 // #endif 00110 00111 00112 #ifdef USING_CUDA 00113 // Include CUDA functions: 00114 00115 // CUDA runtime 00116 #include <cuda_runtime.h> 00117 // Helper functions and utilities to work with CUDA 00118 #include <helper_functions.h> 00119 00120 // !!DEBUG!! CUDA 5.0: USING ONLY CPU TIMERS. Remove: #include <cutil_inline.h> 00121 #include <helper_cuda.h> 00122 00123 00124 // #include <timer.h> // !!DEBUG!! Not necessary?? 00125 00126 #include <vector_types.h> 00127 00128 #else 00129 // Include the definition of the vector structures (float3, int2...) that are useful in the GPU (multiple values can be read simultaneously from the slow main memory): 00130 struct int2 { int x, y; }; typedef struct int2 int2; 00131 struct int3 { int x, y, z; }; typedef struct int3 int3; 00132 struct float2 { float x, y; }; typedef struct float2 float2; 00133 struct float3 { float x, y, z; }; typedef struct float3 float3; 00134 struct double2 { double x, y; }; typedef struct double2 double2; 00135 struct double3 { double x, y, z; }; typedef struct double3 double3; 00136 struct short3 { short x, y, z; }; typedef struct short3 short3; 00137 struct ulonglong2 { unsigned long long int x, y; }; typedef struct ulonglong2 ulonglong2; 00138 #endif 00139 00140 00141 00142 #ifdef USING_MPI 00143 // Include MPI functions: 00144 #include <mpi.h> 00145 #endif 00146 00147 00148 00149 // MC-GPU structure declarations: 00150 00151 //! Structure storing the data defining the source model (except for the energy spectrum). 00152 //! When a CT is simulated, multiple sources will be stored in an array (one source instance per projection angle). 00153 struct 00154 #ifdef USING_CUDA 00155 __align__(16) 00156 #endif 00157 source_struct // Define a cone beam x-ray source. 00158 { 00159 float3 position, 00160 direction; 00161 float rot_fan[9], // Rotation (from axis (0,1,0)) defined by the source direction (needed by the fan beam source; its inverse is used by the detector). 00162 cos_theta_low, // Angles for the fan beam model (pyramidal source). 00163 phi_low, 00164 D_cos_theta, 00165 D_phi, 00166 max_height_at_y1cm; 00167 }; 00168 00169 00170 //! Structure storing the source energy spectrum data to be sampled using the Walker aliasing algorithm. 00171 struct 00172 #ifdef USING_CUDA 00173 __align__(16) 00174 #endif 00175 source_energy_struct // Define a cone beam x-ray source. 00176 { 00177 int num_bins_espc; // Number of bins in the input energy spectrum 00178 float espc[MAX_ENERGY_BINS], // Energy values of the input x-ray energy spectrum 00179 espc_cutoff[MAX_ENERGY_BINS]; // Cutoffs for the Walker aliasing sampling of the spectrum 00180 short int espc_alias[MAX_ENERGY_BINS]; // Aliases for the Walker aliasing sampling algorithm (stored as short to save memory) 00181 }; 00182 00183 00184 //! Structure storing the data defining the x-ray detector. 00185 //! For a CT, the struct stores for each angle the detector location and the rotations to 00186 //! transport the detector to a plane perpendicular to +Y. 00187 //! To simulate multiple projections, an array of MAX_NUM_PROJECTIONS of instances of this struct have to be defined! !!DeBuG!! shared 00188 struct 00189 #ifdef USING_CUDA 00190 __align__(16) 00191 #endif 00192 detector_struct // Define a 2D detector plane, located in front of the defined source (centered at the focal spot and perpendicular to the initial direction). 00193 { // The radiograohic image will be stored in the global variable "unsigned long long int *image". 00194 float sdd; // Store the source-detector distance 00195 float3 corner_min_rotated_to_Y, 00196 center; 00197 float rot_inv[9], // Rotation to transport a particle on the detector plane to a frame where the detector is perpendicular to +Y. 00198 width_X, 00199 height_Z, 00200 inv_pixel_size_X, 00201 inv_pixel_size_Z; 00202 int2 num_pixels; 00203 int total_num_pixels, 00204 rotation_flag; // Flag >0 if the initial source direction is not (0,1,0); ==0 otherwise (ie, detector perpendicular to +Y axis and rotation not required). 00205 }; 00206 00207 00208 //! Structure defining a voxelized box with the back-lower corner at the coordinate origin. 00209 struct 00210 #ifdef USING_CUDA 00211 __align__(16) 00212 #endif 00213 voxel_struct // Define a voxelized box with the back-lower corner at the coordinate origin. 00214 { // Voxel material and densities are stored in a local variable. 00215 int3 num_voxels; 00216 float3 inv_voxel_size, 00217 size_bbox; 00218 }; 00219 00220 00221 //! Structure with the basic data required by the linear interpolation of the mean free paths: number of values and energy grid. 00222 struct 00223 #ifdef USING_CUDA 00224 __align__(16) 00225 #endif 00226 linear_interp // Constant data for linear interpolation of mean free paths 00227 { // The parameters 'a' and 'b' are stored in local variables float4 *a, *b; 00228 int num_values; // --> Number of iterpolation points (eg, 2^12 = 4096). 00229 float e0, // --> Minimum energy 00230 ide; // --> Inverse of the energy bin width 00231 }; 00232 00233 00234 //! Structure storing the data of the Compton interaction sampling model (equivalent to PENELOPE's common block /CGCO/). 00235 struct 00236 #ifdef USING_CUDA 00237 __align__(16) 00238 #endif 00239 compton_struct // Data from PENELOPE's common block CGCO: Compton interaction data 00240 { 00241 float fco[MAX_MATERIALS*MAX_SHELLS], 00242 uico[MAX_MATERIALS*MAX_SHELLS], 00243 fj0[MAX_MATERIALS*MAX_SHELLS]; 00244 int noscco[MAX_MATERIALS]; 00245 }; 00246 00247 //! Structure storing the data of the Rayleigh interaction sampling model (equivalent to PENELOPE's common block /CGRA/). 00248 struct 00249 #ifdef USING_CUDA 00250 __align__(16) 00251 #endif 00252 rayleigh_struct 00253 { 00254 float xco[NP_RAYLEIGH*MAX_MATERIALS], 00255 pco[NP_RAYLEIGH*MAX_MATERIALS], 00256 aco[NP_RAYLEIGH*MAX_MATERIALS], 00257 bco[NP_RAYLEIGH*MAX_MATERIALS], 00258 pmax[MAX_ENERGYBINS_RAYLEIGH*MAX_MATERIALS]; 00259 unsigned char itlco[NP_RAYLEIGH*MAX_MATERIALS], 00260 ituco[NP_RAYLEIGH*MAX_MATERIALS]; 00261 }; 00262 00263 00264 00265 //// *** HOST FUNCTIONS *** //// 00266 00267 void read_input(int argc, char** argv, int myID, unsigned long long int* total_histories, int* gpu_id, int* seed_input, int* num_threads_per_block, int* histories_per_thread, struct detector_struct* detector_data, unsigned long long int** image_ptr, int* image_bytes, struct source_struct* source_data, struct source_energy_struct* source_energy_data, char* file_name_voxels, char file_name_materials[MAX_MATERIALS][250], char* file_name_output, char* file_name_espc, int* num_projections, double* D_angle, double* angularROI_0, double* angularROI_1, double* initial_angle, ulonglong2** voxels_Edep_ptr, int* voxels_Edep_bytes, char* file_dose_output, short int* dose_ROI_x_min, short int* dose_ROI_x_max, short int* dose_ROI_y_min, short int* dose_ROI_y_max, short int* dose_ROI_z_min, short int* dose_ROI_z_max, double* SRotAxisD, double* vertical_translation_per_projection, int* flag_material_dose); 00268 void load_voxels(int myID, char* file_name_voxels, float* density_max, struct voxel_struct* voxel_data, float2** voxel_mat_dens_ptr, unsigned int* voxel_mat_dens_bytes, short int* dose_ROI_x_max, short int* dose_ROI_y_max, short int* dose_ROI_z_max); 00269 void load_material(int myID, char file_name_materials[MAX_MATERIALS][250], float* density_max, float* density_nominal, struct linear_interp* mfp_table_data, float2** mfp_Woodcock_table, int* mfp_Woodcock_table_bytes, float3** mfp_table_a_ptr, float3** mfp_table_b_ptr, int* mfp_table_bytes, struct rayleigh_struct *rayleigh_table_ptr, struct compton_struct *compton_table_ptr); 00270 void trim_name(char* input_line, char* file_name); 00271 char* fgets_trimmed(char* trimmed_line, int num, FILE* file_ptr); 00272 int report_image(char* file_name_output, struct detector_struct* detector_data, struct source_struct* source_data, float mean_energy_spectrum, unsigned long long int* image, double time_elapsed, unsigned long long int total_histories, int current_projection, int num_projections, double D_angle, double initial_angle, int myID, int numprocs); 00273 void set_CT_trajectory(int myID, int num_projections, double D_angle, double angularROI_0, double angularROI_1, double SRotAxisD, struct source_struct* source_data, struct detector_struct* detector_data, double vertical_translation_per_projection); 00274 int report_voxels_dose(char* file_dose_output, int num_projections, struct voxel_struct* voxel_data, float2* voxel_mat_dens, ulonglong2* voxels_Edep, double time_elapsed_total, unsigned long long int total_histories, short int dose_ROI_x_min, short int dose_ROI_x_max, short int dose_ROI_y_min, short int dose_ROI_y_max, short int dose_ROI_z_min, short int dose_ROI_z_max, struct source_struct* source_data); 00275 void init_energy_spectrum(char* file_name_espc, struct source_energy_struct* source_energy_data, float *mean_energy_spectrum); 00276 void update_seed_PRNG(int batch_number, unsigned long long int total_histories, int* seed); 00277 void IRND0(float *W, float *F, short int *K, int N); 00278 int report_materials_dose(int num_projections, unsigned long long int total_histories, float *density_nominal, ulonglong2 *materials_dose, double *mass_materials); 00279 00280 00281 // #ifdef USING_CUDA 00282 // __host__ __device__ // Enable this code to be able to call the function from the host (CPU) and also from the device (GPU). 00283 // #endif 00284 int seeki_walker(float *cutoff, short int *alias, float randno, int n); // (This function is not actually called in the code.) 00285 00286 #ifdef USING_CUDA 00287 int guestimate_GPU_performance(int gpu_id); 00288 void init_CUDA_device( int* gpu_id, int myID, int numprocs, 00289 /*Variables to GPU constant memory:*/ struct voxel_struct* voxel_data, struct source_struct* source_data, struct source_energy_struct* source_energy_data, struct detector_struct* detector_data, struct linear_interp* mfp_table_data, 00290 /*Variables to GPU global memory:*/ float2* voxel_mat_dens, float2** voxel_mat_dens_device, unsigned int voxel_mat_dens_bytes, 00291 unsigned long long int* image, unsigned long long int** image_device, int image_bytes, 00292 float2* mfp_Woodcock_table, float2** mfp_Woodcock_table_device, int mfp_Woodcock_table_bytes, 00293 float3* mfp_table_a, float3* mfp_table_b, float3** mfp_table_a_device, float3** mfp_table_b_device, int mfp_table_bytes, 00294 struct rayleigh_struct* rayleigh_table, struct rayleigh_struct** rayleigh_table_device, 00295 struct compton_struct* compton_table, struct compton_struct** compton_table_device, 00296 struct detector_struct** detector_data_device, struct source_struct** source_data_device, 00297 ulonglong2* voxels_Edep, ulonglong2** voxels_Edep_device, int voxels_Edep_bytes, short int* dose_ROI_x_min, short int* dose_ROI_x_max, short int* dose_ROI_y_min, short int* dose_ROI_y_max, short int* dose_ROI_z_min, short int* dose_ROI_z_max, 00298 ulonglong2* materials_dose, ulonglong2** materials_dose_device, int flag_material_dose, int num_projections); 00299 #endif 00300 00301 00302 00303 //// *** DEVICE CONSTANT MEMORY DECLARATION (~global variables in the GPU) *** //// 00304 00305 // -- Constant memory (defined as global variables): 00306 00307 //! Global variable to be stored in the GPU constant memory defining the coordinates of the dose deposition region of interest. 00308 #ifdef USING_CUDA 00309 __constant__ 00310 #endif 00311 short int dose_ROI_x_min_CONST, dose_ROI_x_max_CONST, dose_ROI_y_min_CONST, dose_ROI_y_max_CONST, dose_ROI_z_min_CONST, dose_ROI_z_max_CONST; 00312 00313 //! Global variable to be stored in the GPU constant memory defining the size of the voxel phantom. 00314 #ifdef USING_CUDA 00315 __constant__ 00316 #endif 00317 struct voxel_struct voxel_data_CONST; // Define the geometric constants 00318 00319 //! Global variable to be stored in the GPU constant memory defining the x-ray source. 00320 #ifdef USING_CUDA 00321 __constant__ 00322 #endif 00323 struct source_struct source_data_CONST; // Define a particles source. 00324 00325 //! Global variable to be stored in the GPU constant memory defining the x-ray detector. 00326 #ifdef USING_CUDA 00327 __constant__ 00328 #endif 00329 struct detector_struct detector_data_CONST; // Define a detector layer perpendicular to the y axis 00330 00331 //! Global variable to be stored in the GPU constant memory defining the linear interpolation data. 00332 #ifdef USING_CUDA 00333 __constant__ 00334 #endif 00335 struct linear_interp mfp_table_data_CONST; // Define size of interpolation arrays 00336 00337 //! Global variable to be stored in the GPU constant memory defining the source energy spectrum. 00338 #ifdef USING_CUDA 00339 __constant__ 00340 #endif 00341 struct source_energy_struct source_energy_data_CONST; 00342 00343 00344 //// *** GLOBAL FUNCTIONS *** //// 00345 00346 #ifdef USING_CUDA 00347 __global__ 00348 void init_image_array_GPU(unsigned long long int* image, int pixels_per_image); 00349 __global__ 00350 void init_dose_array_GPU(ulonglong2* voxels_Edep, int num_voxels_dose); 00351 #endif 00352 00353 #ifdef USING_CUDA 00354 __global__ 00355 void track_particles(int histories_per_thread, int num_p, int seed_input, unsigned long long int* image, ulonglong2* voxels_Edep, float2* voxel_mat_dens, float2* mfp_Woodcock_table, float3* mfp_table_a, float3* mfp_table_b, struct rayleigh_struct* rayleigh_table, struct compton_struct* compton_table, struct detector_struct* detector_data_array, struct source_struct* source_data_array, ulonglong2* materials_dose); 00356 #else 00357 void track_particles(int history_batch, int histories_per_thread, int num_p, int seed_input, unsigned long long int* image, ulonglong2* voxels_Edep, float2* voxel_mat_dens, float2* mfp_Woodcock_table, float3* mfp_table_a, float3* mfp_table_b, struct rayleigh_struct* rayleigh_table, struct compton_struct* compton_table, struct detector_struct* detector_data_array, struct source_struct* source_data_array, ulonglong2* materials_dose); 00358 #endif 00359 00360 00361 //// *** DEVICE FUNCTIONS *** //// 00362 00363 #ifdef USING_CUDA 00364 __device__ 00365 #endif 00366 inline void source(float3* position, float3* direction, float* energy, int2* seed, int* absvox, struct source_struct* source_data_SHARED, struct detector_struct* detector_data_SHARED); 00367 #ifdef USING_CUDA 00368 __device__ 00369 #endif 00370 inline void move_to_bbox(float3* position, float3* direction, float3 size_bbox, int* intersection_flag); 00371 #ifdef USING_CUDA 00372 __device__ 00373 #endif 00374 inline void tally_image(float* energy, float3* position, float3* direction, signed char* scatter_state, unsigned long long int* image, struct source_struct* source_data_SHARED, struct detector_struct* detector_data_SHARED); 00375 #ifdef USING_CUDA 00376 __device__ 00377 #endif 00378 inline void init_PRNG(int history_batch, int histories_per_thread, int seed_input, int2* seed); 00379 #ifdef USING_CUDA 00380 __host__ __device__ 00381 #endif 00382 inline int abMODm(int m, int a, int s); 00383 #ifdef USING_CUDA 00384 __device__ 00385 #endif 00386 inline float ranecu(int2* seed); 00387 #ifdef USING_CUDA 00388 __device__ 00389 #endif 00390 inline double ranecu_double(int2* seed); 00391 #ifdef USING_CUDA 00392 __device__ 00393 #endif 00394 inline int locate_voxel(float3* position, short3* voxel_coord); 00395 #ifdef USING_CUDA 00396 __device__ 00397 #endif 00398 inline void rotate_double(float3* direction, double cos_theta, double phi); 00399 #ifdef USING_CUDA 00400 __device__ 00401 #endif 00402 inline void GRAa(float *energy, double *costh_Rayleigh, int *mat, float *pmax_current, int2 *seed, struct rayleigh_struct* cgra); 00403 #ifdef USING_CUDA 00404 __device__ 00405 #endif 00406 inline void GCOa(float *energy, double *costh_Compton, int *mat, int2 *seed, struct compton_struct* cgco_SHARED); 00407 00408 #ifdef USING_CUDA 00409 __device__ 00410 #endif 00411 inline void tally_voxel_energy_deposition(float* Edep, short3* voxel_coord, ulonglong2* dose); 00412 00413 #ifdef USING_CUDA 00414 __device__ 00415 #endif 00416 inline 00417 void tally_materials_dose(float* Edep, int* material, ulonglong2* materials_dose); 00418 00419 00420 // -- END OF THE "ifndef MCGPU_H_" statement: 00421 #endif