MC-GPU
MC-GPU_v1.3.h
Go to the documentation of this file.
00001 
00002 ////////////////////////////////////////////////////////////////////////////////
00003 //
00004 //              ****************************
00005 //              *** MC-GPU , version 1.3 ***
00006 //              ****************************
00007 //                                          
00008 //!   Header file containing the declarations for the MC-GPU code.
00009 //!   This file declares all the host and device functions and structures,
00010 //!   the library files to include in the compilation, various constants parameters
00011 //!   of the simulation, pre-processor macro functions, etc.
00012 //
00013 //
00014 //
00015 //          ** DISCLAIMER **
00016 //
00017 // This software and documentation (the "Software") were developed at the Food and
00018 // Drug Administration (FDA) by employees of the Federal Government in the course
00019 // of their official duties. Pursuant to Title 17, Section 105 of the United States
00020 // Code, this work is not subject to copyright protection and is in the public
00021 // domain. Permission is hereby granted, free of charge, to any person obtaining a
00022 // copy of the Software, to deal in the Software without restriction, including
00023 // without limitation the rights to use, copy, modify, merge, publish, distribute,
00024 // sublicense, or sell copies of the Software or derivatives, and to permit persons
00025 // to whom the Software is furnished to do so. FDA assumes no responsibility
00026 // whatsoever for use by other parties of the Software, its source code,
00027 // documentation or compiled executables, and makes no guarantees, expressed or
00028 // implied, about its quality, reliability, or any other characteristic. Further,
00029 // use of this code in no way implies endorsement by the FDA or confers any
00030 // advantage in regulatory decisions.  Although this software can be redistributed
00031 // and/or modified freely, we ask that any derivative works bear some notice that
00032 // they are derived from it, and any modified versions bear some notice that they
00033 // have been modified.
00034 //                                                                            
00035 //
00036 //!                     @file    MC-GPU_v1.3.h
00037 //!                     @author  Andreu Badal (Andreu.Badal-Soler@fda.hhs.gov)
00038 //!                     @date    2012/12/12
00039 //                      Older versions: 2010/05/14
00040 //
00041 ////////////////////////////////////////////////////////////////////////////////
00042 
00043 
00044 #ifndef MCGPU_H_
00045 #define MCGPU_H_
00046 
00047 
00048 // *** To use CUDA, compile with "-DUSING_CUDA" or uncomment the following line:
00049 //#define USING_CUDA
00050 
00051 // *** To use MPI to simulate multiple CT projections in different GPUs compile with "-DUSING_MPI" or uncomment the following line:
00052 //#define USING_MPI
00053 
00054 
00055 //! MPI macro: mark commands to be executed only by the master thread (myID==0).
00056 #define MASTER_THREAD if(0==myID)
00057 
00058 //! Maximum number of projections allowed in the CT simulation (not limited by the constant memory because stored in global and shared memory):
00059 #define  MAX_NUM_PROJECTIONS  720
00060 
00061 //! Constants values for the Compton and Rayleigh models:
00062 #define  MAX_MATERIALS      15
00063 #define  MAX_SHELLS         30
00064 #define  NP_RAYLEIGH       128
00065 #define  MAX_ENERGYBINS_RAYLEIGH  25005
00066 
00067 //! Maximum number of energy bins in the input x-ray energy spectrum.
00068 #define  MAX_ENERGY_BINS   256
00069 
00070 
00071 #define  PI      3.14159265358979323846
00072 #define  RAD2DEG 180.0/PI
00073 #define  DEG2RAD PI/180.0
00074 
00075 // Other constants used in the code:
00076 //! Value to scale the deposited energy in the pixels so that it can be stored as a long long integer
00077 //! instead of a double precision float. The integer values have to be used in order to use the
00078 //! atomicAdd function in CUDA.
00079 #define SCALE_eV        100.0f
00080 
00081 //! Offset value to make sure the particles are completely inside, or outside, the voxel bounding box.
00082 //! For example, to start a particle track the source may have to translate a particle from the focal spot to the plane Y=0, but 
00083 //! in reality the particle will be moved to Y=EPS_SOURCE to make sure it is unmistakenly located inside a voxel.
00084 //! If "EPS_SOURCE" is too small an artifact with weird concentric circular shadows may appear at the center of the simulated image.
00085 #define EPS_SOURCE      0.000015f      
00086 
00087 #define NEG_EPS_SOURCE -EPS_SOURCE
00088 #define INF             500000.0f
00089 #define INF_minus1      499999.0f
00090 #define NEG_INF        -500000.0f
00091 
00092 //! Preprocessor macro to calculate maximum and minimum values:
00093 #define max_value( a, b ) ( ((a) > (b)) ? (a) : (b) )
00094 #define min_value( a, b ) ( ((a) < (b)) ? (a) : (b) )
00095 
00096 
00097 // Include standard header files:
00098 #include <stdlib.h>
00099 #include <stdio.h>
00100 #include <string.h>
00101 #include <math.h>
00102 #include <time.h>
00103 
00104 #include "zlib.h"    // Library used to read gzip material and voxel files (non-compressed files can also be read). Compile with option -lz
00105 
00106 // #ifndef _GNU_SOURCE
00107 // This definition may be required to use the "sincos" function from math.h  !!DeBuG!!
00108 // #define _GNU_SOURCE
00109 // #endif
00110 
00111 
00112 #ifdef USING_CUDA
00113 // Include CUDA functions:
00114 
00115   // CUDA runtime
00116   #include <cuda_runtime.h>
00117   // Helper functions and utilities to work with CUDA
00118   #include <helper_functions.h>
00119 
00120    // !!DEBUG!! CUDA 5.0: USING ONLY CPU TIMERS. Remove:  #include <cutil_inline.h>
00121   #include <helper_cuda.h>
00122 
00123 
00124 //   #include <timer.h>  // !!DEBUG!!   Not necessary??
00125 
00126   #include <vector_types.h>
00127 
00128 #else
00129   // Include the definition of the vector structures (float3, int2...) that are useful in the GPU (multiple values can be read simultaneously from the slow main memory):
00130   struct int2  { int x, y; };         typedef struct int2 int2;
00131   struct int3  { int x, y, z; };      typedef struct int3 int3;
00132   struct float2 { float x, y; };      typedef struct float2 float2;
00133   struct float3 { float x, y, z; };   typedef struct float3 float3;
00134   struct double2 { double x, y; };    typedef struct double2 double2;
00135   struct double3 { double x, y, z; }; typedef struct double3 double3;
00136   struct short3 { short x, y, z; };   typedef struct short3 short3;  
00137   struct ulonglong2 { unsigned long long int x, y; }; typedef struct ulonglong2 ulonglong2;
00138 #endif
00139   
00140 
00141 
00142 #ifdef USING_MPI
00143 // Include MPI functions:
00144   #include <mpi.h>
00145 #endif
00146 
00147 
00148 
00149 // MC-GPU structure declarations:
00150 
00151 //! Structure storing the data defining the source model (except for the energy spectrum).
00152 //! When a CT is simulated, multiple sources will be stored in an array (one source instance per projection angle).
00153 struct
00154 #ifdef USING_CUDA
00155   __align__(16)
00156 #endif
00157 source_struct       // Define a cone beam x-ray source.
00158 {
00159   float3 position,
00160          direction;
00161   float rot_fan[9],      // Rotation (from axis (0,1,0)) defined by the source direction (needed by the fan beam source; its inverse is used by the detector).
00162         cos_theta_low,   // Angles for the fan beam model (pyramidal source).
00163         phi_low,
00164         D_cos_theta,
00165         D_phi,
00166         max_height_at_y1cm;
00167 };
00168 
00169 
00170 //! Structure storing the source energy spectrum data to be sampled using the Walker aliasing algorithm.
00171 struct
00172 #ifdef USING_CUDA
00173   __align__(16)
00174 #endif
00175 source_energy_struct       // Define a cone beam x-ray source.
00176 {  
00177   int num_bins_espc;                     // Number of bins in the input energy spectrum
00178   float espc[MAX_ENERGY_BINS],           // Energy values of the input x-ray energy spectrum
00179         espc_cutoff[MAX_ENERGY_BINS];    // Cutoffs for the Walker aliasing sampling of the spectrum
00180   short int espc_alias[MAX_ENERGY_BINS]; // Aliases for the Walker aliasing sampling algorithm (stored as short to save memory)
00181 };
00182 
00183 
00184 //! Structure storing the data defining the x-ray detector. 
00185 //! For a CT, the struct stores for each angle the detector location and the rotations to
00186 //! transport the detector to a plane perpendicular to +Y.
00187 //! To simulate multiple projections, an array of MAX_NUM_PROJECTIONS of instances of this struct have to be defined!  !!DeBuG!! shared
00188 struct
00189 #ifdef USING_CUDA
00190   __align__(16)
00191 #endif
00192 detector_struct         // Define a 2D detector plane, located in front of the defined source (centered at the focal spot and perpendicular to the initial direction).
00193 {                       // The radiograohic image will be stored in the global variable "unsigned long long int *image".
00194   float sdd;                                // Store the source-detector distance
00195   float3 corner_min_rotated_to_Y,
00196          center;
00197   float rot_inv[9],    // Rotation to transport a particle on the detector plane to a frame where the detector is perpendicular to +Y.
00198         width_X,
00199         height_Z,
00200         inv_pixel_size_X,
00201         inv_pixel_size_Z;
00202   int2 num_pixels;
00203   int total_num_pixels,
00204       rotation_flag;    // Flag >0 if the initial source direction is not (0,1,0); ==0 otherwise (ie, detector perpendicular to +Y axis and rotation not required).
00205 };
00206 
00207 
00208 //! Structure defining a voxelized box with the back-lower corner at the coordinate origin.
00209 struct
00210 #ifdef USING_CUDA
00211   __align__(16)
00212 #endif
00213 voxel_struct        // Define a voxelized box with the back-lower corner at the coordinate origin.
00214 {                                  // Voxel material and densities are stored in a local variable.
00215   int3 num_voxels;
00216   float3 inv_voxel_size,
00217          size_bbox;
00218 };
00219 
00220 
00221 //! Structure with the basic data required by the linear interpolation of the mean free paths: number of values and energy grid.
00222 struct
00223 #ifdef USING_CUDA
00224   __align__(16)
00225 #endif
00226 linear_interp       // Constant data for linear interpolation of mean free paths
00227 {                                        // The parameters 'a' and 'b' are stored in local variables float4 *a, *b;
00228   int num_values;      // -->  Number of iterpolation points (eg, 2^12 = 4096).
00229   float e0,            // --> Minimum energy
00230         ide;           // --> Inverse of the energy bin width
00231 };
00232 
00233 
00234 //! Structure storing the data of the Compton interaction sampling model (equivalent to PENELOPE's common block /CGCO/).
00235 struct
00236 #ifdef USING_CUDA
00237   __align__(16)
00238 #endif
00239 compton_struct      // Data from PENELOPE's common block CGCO: Compton interaction data
00240 {
00241   float fco[MAX_MATERIALS*MAX_SHELLS],
00242         uico[MAX_MATERIALS*MAX_SHELLS],
00243         fj0[MAX_MATERIALS*MAX_SHELLS];
00244   int noscco[MAX_MATERIALS];
00245 };
00246 
00247 //! Structure storing the data of the Rayleigh interaction sampling model (equivalent to PENELOPE's common block /CGRA/).
00248 struct
00249 #ifdef USING_CUDA
00250   __align__(16)
00251 #endif
00252 rayleigh_struct
00253 {
00254   float xco[NP_RAYLEIGH*MAX_MATERIALS],
00255         pco[NP_RAYLEIGH*MAX_MATERIALS],
00256         aco[NP_RAYLEIGH*MAX_MATERIALS],
00257         bco[NP_RAYLEIGH*MAX_MATERIALS],
00258         pmax[MAX_ENERGYBINS_RAYLEIGH*MAX_MATERIALS];
00259   unsigned char itlco[NP_RAYLEIGH*MAX_MATERIALS],
00260                 ituco[NP_RAYLEIGH*MAX_MATERIALS];
00261 };
00262 
00263 
00264 
00265 //// *** HOST FUNCTIONS *** ////
00266 
00267 void read_input(int argc, char** argv, int myID, unsigned long long int* total_histories, int* gpu_id, int* seed_input, int* num_threads_per_block, int* histories_per_thread, struct detector_struct* detector_data, unsigned long long int** image_ptr, int* image_bytes, struct source_struct* source_data, struct source_energy_struct* source_energy_data, char* file_name_voxels, char file_name_materials[MAX_MATERIALS][250], char* file_name_output, char* file_name_espc, int* num_projections, double* D_angle, double* angularROI_0, double* angularROI_1, double* initial_angle, ulonglong2** voxels_Edep_ptr, int* voxels_Edep_bytes, char* file_dose_output, short int* dose_ROI_x_min, short int* dose_ROI_x_max, short int* dose_ROI_y_min, short int* dose_ROI_y_max, short int* dose_ROI_z_min, short int* dose_ROI_z_max, double* SRotAxisD, double* vertical_translation_per_projection, int* flag_material_dose);
00268 void load_voxels(int myID, char* file_name_voxels, float* density_max, struct voxel_struct* voxel_data, float2** voxel_mat_dens_ptr, unsigned int* voxel_mat_dens_bytes, short int* dose_ROI_x_max, short int* dose_ROI_y_max, short int* dose_ROI_z_max);
00269 void load_material(int myID, char file_name_materials[MAX_MATERIALS][250], float* density_max, float* density_nominal, struct linear_interp* mfp_table_data, float2** mfp_Woodcock_table, int* mfp_Woodcock_table_bytes, float3** mfp_table_a_ptr, float3** mfp_table_b_ptr, int* mfp_table_bytes, struct rayleigh_struct *rayleigh_table_ptr, struct compton_struct *compton_table_ptr);
00270 void trim_name(char* input_line, char* file_name);
00271 char* fgets_trimmed(char* trimmed_line, int num, FILE* file_ptr);
00272 int report_image(char* file_name_output, struct detector_struct* detector_data, struct source_struct* source_data, float mean_energy_spectrum, unsigned long long int* image, double time_elapsed, unsigned long long int total_histories, int current_projection, int num_projections, double D_angle, double initial_angle, int myID, int numprocs);
00273 void set_CT_trajectory(int myID, int num_projections, double D_angle, double angularROI_0, double angularROI_1, double SRotAxisD, struct source_struct* source_data, struct detector_struct* detector_data, double vertical_translation_per_projection);
00274 int report_voxels_dose(char* file_dose_output, int num_projections, struct voxel_struct* voxel_data, float2* voxel_mat_dens, ulonglong2* voxels_Edep, double time_elapsed_total, unsigned long long int total_histories, short int dose_ROI_x_min, short int dose_ROI_x_max, short int dose_ROI_y_min, short int dose_ROI_y_max, short int dose_ROI_z_min, short int dose_ROI_z_max, struct source_struct* source_data);
00275 void init_energy_spectrum(char* file_name_espc, struct source_energy_struct* source_energy_data, float *mean_energy_spectrum);
00276 void update_seed_PRNG(int batch_number, unsigned long long int total_histories, int* seed);
00277 void IRND0(float *W, float *F, short int *K, int N);
00278 int report_materials_dose(int num_projections, unsigned long long int total_histories, float *density_nominal, ulonglong2 *materials_dose, double *mass_materials);
00279 
00280 
00281 // #ifdef USING_CUDA
00282 //   __host__ __device__     // Enable this code to be able to call the function from the host (CPU) and also from the device (GPU).
00283 // #endif
00284 int seeki_walker(float *cutoff, short int *alias, float randno, int n);   // (This function is not actually called in the code.)
00285 
00286 #ifdef USING_CUDA
00287 int guestimate_GPU_performance(int gpu_id);
00288 void init_CUDA_device( int* gpu_id, int myID, int numprocs,
00289       /*Variables to GPU constant memory:*/ struct voxel_struct* voxel_data, struct source_struct* source_data, struct source_energy_struct* source_energy_data, struct detector_struct* detector_data, struct linear_interp* mfp_table_data,
00290       /*Variables to GPU global memory:*/ float2* voxel_mat_dens, float2** voxel_mat_dens_device, unsigned int voxel_mat_dens_bytes,
00291         unsigned long long int* image, unsigned long long int** image_device, int image_bytes,
00292         float2* mfp_Woodcock_table, float2** mfp_Woodcock_table_device, int mfp_Woodcock_table_bytes,
00293         float3* mfp_table_a, float3* mfp_table_b, float3** mfp_table_a_device, float3** mfp_table_b_device, int mfp_table_bytes,
00294         struct rayleigh_struct* rayleigh_table, struct rayleigh_struct** rayleigh_table_device,
00295         struct compton_struct* compton_table, struct compton_struct** compton_table_device,
00296         struct detector_struct** detector_data_device, struct source_struct** source_data_device,
00297         ulonglong2* voxels_Edep, ulonglong2** voxels_Edep_device, int voxels_Edep_bytes, short int* dose_ROI_x_min, short int* dose_ROI_x_max, short int* dose_ROI_y_min, short int* dose_ROI_y_max, short int* dose_ROI_z_min, short int* dose_ROI_z_max,
00298         ulonglong2* materials_dose, ulonglong2** materials_dose_device, int flag_material_dose, int num_projections);
00299 #endif
00300 
00301 
00302 
00303 //// *** DEVICE CONSTANT MEMORY DECLARATION (~global variables in the GPU) *** ////
00304 
00305 // -- Constant memory (defined as global variables):
00306 
00307 //! Global variable to be stored in the GPU constant memory defining the coordinates of the dose deposition region of interest.
00308 #ifdef USING_CUDA
00309 __constant__
00310 #endif
00311 short int dose_ROI_x_min_CONST, dose_ROI_x_max_CONST, dose_ROI_y_min_CONST, dose_ROI_y_max_CONST, dose_ROI_z_min_CONST, dose_ROI_z_max_CONST;
00312 
00313 //! Global variable to be stored in the GPU constant memory defining the size of the voxel phantom.
00314 #ifdef USING_CUDA
00315 __constant__
00316 #endif
00317 struct voxel_struct    voxel_data_CONST;      // Define the geometric constants
00318 
00319 //! Global variable to be stored in the GPU constant memory defining the x-ray source.
00320 #ifdef USING_CUDA
00321 __constant__
00322 #endif
00323 struct source_struct   source_data_CONST;     // Define a particles source.
00324 
00325 //! Global variable to be stored in the GPU constant memory defining the x-ray detector.
00326 #ifdef USING_CUDA
00327 __constant__
00328 #endif
00329 struct detector_struct detector_data_CONST;   // Define a detector layer perpendicular to the y axis
00330 
00331 //! Global variable to be stored in the GPU constant memory defining the linear interpolation data.
00332 #ifdef USING_CUDA
00333 __constant__
00334 #endif
00335 struct linear_interp   mfp_table_data_CONST;  // Define size of interpolation arrays
00336 
00337 //! Global variable to be stored in the GPU constant memory defining the source energy spectrum.
00338 #ifdef USING_CUDA
00339 __constant__
00340 #endif
00341 struct source_energy_struct source_energy_data_CONST;
00342 
00343 
00344 //// *** GLOBAL FUNCTIONS *** ////
00345 
00346 #ifdef USING_CUDA
00347 __global__
00348 void init_image_array_GPU(unsigned long long int* image, int pixels_per_image);
00349 __global__
00350 void init_dose_array_GPU(ulonglong2* voxels_Edep, int num_voxels_dose);
00351 #endif
00352 
00353 #ifdef USING_CUDA
00354 __global__
00355 void track_particles(int histories_per_thread, int num_p, int seed_input, unsigned long long int* image, ulonglong2* voxels_Edep, float2* voxel_mat_dens, float2* mfp_Woodcock_table, float3* mfp_table_a, float3* mfp_table_b, struct rayleigh_struct* rayleigh_table, struct compton_struct* compton_table, struct detector_struct* detector_data_array, struct source_struct* source_data_array, ulonglong2* materials_dose);
00356 #else
00357 void track_particles(int history_batch, int histories_per_thread, int num_p, int seed_input, unsigned long long int* image, ulonglong2* voxels_Edep, float2* voxel_mat_dens, float2* mfp_Woodcock_table, float3* mfp_table_a, float3* mfp_table_b, struct rayleigh_struct* rayleigh_table, struct compton_struct* compton_table, struct detector_struct* detector_data_array, struct source_struct* source_data_array, ulonglong2* materials_dose);
00358 #endif
00359 
00360 
00361 //// *** DEVICE FUNCTIONS *** ////
00362 
00363 #ifdef USING_CUDA
00364 __device__
00365 #endif
00366 inline void source(float3* position, float3* direction, float* energy, int2* seed, int* absvox, struct source_struct* source_data_SHARED, struct detector_struct* detector_data_SHARED);
00367 #ifdef USING_CUDA
00368 __device__
00369 #endif
00370 inline void move_to_bbox(float3* position, float3* direction, float3 size_bbox, int* intersection_flag);
00371 #ifdef USING_CUDA
00372 __device__
00373 #endif
00374 inline void tally_image(float* energy, float3* position, float3* direction, signed char* scatter_state, unsigned long long int* image, struct source_struct* source_data_SHARED, struct detector_struct* detector_data_SHARED);
00375 #ifdef USING_CUDA
00376 __device__
00377 #endif
00378 inline void init_PRNG(int history_batch, int histories_per_thread, int seed_input, int2* seed);
00379 #ifdef USING_CUDA
00380 __host__ __device__
00381 #endif
00382 inline int abMODm(int m, int a, int s);
00383 #ifdef USING_CUDA
00384 __device__
00385 #endif
00386 inline float ranecu(int2* seed);
00387 #ifdef USING_CUDA
00388 __device__
00389 #endif
00390 inline double ranecu_double(int2* seed);
00391 #ifdef USING_CUDA
00392 __device__
00393 #endif
00394 inline int locate_voxel(float3* position, short3* voxel_coord);
00395 #ifdef USING_CUDA
00396 __device__
00397 #endif
00398 inline void rotate_double(float3* direction, double cos_theta, double phi);
00399 #ifdef USING_CUDA
00400 __device__
00401 #endif
00402 inline void GRAa(float *energy, double *costh_Rayleigh, int *mat, float *pmax_current, int2 *seed, struct rayleigh_struct* cgra);
00403 #ifdef USING_CUDA
00404 __device__
00405 #endif
00406 inline void GCOa(float *energy, double *costh_Compton, int *mat, int2 *seed, struct compton_struct* cgco_SHARED);
00407 
00408 #ifdef USING_CUDA
00409 __device__
00410 #endif
00411 inline void tally_voxel_energy_deposition(float* Edep, short3* voxel_coord, ulonglong2* dose);
00412 
00413 #ifdef USING_CUDA
00414 __device__
00415 #endif
00416 inline 
00417 void tally_materials_dose(float* Edep, int* material, ulonglong2* materials_dose);
00418 
00419 
00420 // -- END OF THE "ifndef MCGPU_H_" statement:
00421 #endif