Created
July 27, 2015 11:47
-
-
Save ajaykumarsampath/97a76ea34c60f0a14f7e to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| /* | |
| * Effinet_data_generation.cuh | |
| * Created on: Jul 26, 2015 | |
| * Author: ajay | |
| */ | |
| /* | |
| * This header file has the implementation of the functions that allocate the memory in the | |
| * GPUs. | |
| */ | |
| #ifndef EFFINET_DATA_GENERATION_CUH_ | |
| #define EFFINET_DATA_GENERATION_CUH_ | |
| #include "api_effinet_cuda.cuh" | |
| #include "effinet_header.h" | |
| //void create_effinet_gpu(cublasHandle_t handle){ | |
| void create_effinet_gpu(){ | |
| real_t prob=1; | |
| //create_effinet_system(); | |
| /** system dynamics */ | |
| _CUDA(cudaMalloc((void**)&dev_A,N_NODES* NX* NX* sizeof(real_t))); | |
| _CUDA(cudaMalloc((void**)&dev_B,N_NODES* NX* NU* sizeof(real_t))); | |
| _CUDA(cudaMalloc((void**)&dev_F,2*N_NODES* NX* NX* sizeof(real_t))); | |
| _CUDA(cudaMalloc((void**)&dev_G, N_NODES* NU* NU* sizeof(real_t))); | |
| _CUDA(cudaMalloc((void**)&dev_L, NV*NU *sizeof(real_t))); | |
| _CUDA(cudaMalloc((void**)&dev_xmin,N_NODES*NX* sizeof(real_t))); | |
| _CUDA(cudaMalloc((void**)&dev_xmax,N_NODES*NX* sizeof(real_t))); | |
| _CUDA(cudaMalloc((void**)&dev_xs,N_NODES*NX* sizeof(real_t))); | |
| _CUDA(cudaMalloc((void**)&dev_umin,N_NODES*NU* sizeof(real_t))); | |
| _CUDA(cudaMalloc((void**)&dev_umax,N_NODES*NU* sizeof(real_t))); | |
| _CUDA(cudaMalloc((void**)&dev_TREE_VALUE,N_NODES*NX*sizeof(real_t))); | |
| //_CUDA(cudaMalloc((void**)&dev_TREE_VALUE,N_NODES*NX*sizeof(real_t))); | |
| /** cost function*/ | |
| _CUDA(cudaMalloc((void**)&dev_Q,N_NODES*NX*NX*sizeof(real_t))); | |
| _CUDA(cudaMalloc((void**)&dev_R,N_NONLEAF_NODES*NU*NU*sizeof(real_t))); | |
| /** pointers to system dynamics */ | |
| _CUDA(cudaMalloc((void**)&dev_ptr_A,N_NODES* sizeof(real_t*))); | |
| _CUDA(cudaMalloc((void**)&dev_ptr_B,N_NODES* sizeof(real_t*))); | |
| _CUDA(cudaMalloc((void**)&dev_ptr_F,N_NONLEAF_NODES* sizeof(real_t*))); | |
| _CUDA(cudaMalloc((void**)&dev_ptr_G,N_NONLEAF_NODES* sizeof(real_t*))); | |
| _CUDA(cudaMalloc((void**)&dev_ptr_g,N_NONLEAF_NODES* sizeof(real_t*))); | |
| _CUDA(cudaMalloc((void**)&dev_ptr_FN,K* sizeof(real_t*))); | |
| _CUDA(cudaMalloc((void**)&dev_ptr_gN,K* sizeof(real_t*))); | |
| _CUDA(cudaMalloc((void**)&dev_ptr_Tree_Value,N_NODES* sizeof(real_t*))); | |
| /** cost function pointers*/ | |
| _CUDA(cudaMalloc((void**)&dev_ptr_Q,N_NODES*sizeof(real_t*))); | |
| _CUDA(cudaMalloc((void**)&dev_ptr_R,N_NONLEAF_NODES*sizeof(real_t*))); | |
| real_t** ptr_A=(real_t**)malloc(N_NODES*sizeof(real_t*)); | |
| real_t** ptr_B=(real_t**)malloc(N_NODES*sizeof(real_t*)); | |
| real_t** ptr_F=(real_t**)malloc(N_NONLEAF_NODES*sizeof(real_t*)); | |
| real_t** ptr_G=(real_t**)malloc(N_NONLEAF_NODES*sizeof(real_t*)); | |
| real_t** ptr_g=(real_t**)malloc(N_NONLEAF_NODES*sizeof(real_t*)); | |
| real_t** ptr_FN=(real_t**)malloc(K*sizeof(real_t*)); | |
| real_t** ptr_GN=(real_t**)malloc(K*sizeof(real_t*)); | |
| real_t** ptr_Tree_value=(real_t**)malloc(N_NODES*sizeof(real_t*)); | |
| real_t** ptr_Q=(real_t**)malloc(N_NODES*sizeof(real_t*)); | |
| real_t** ptr_R=(real_t**)malloc(N_NONLEAF_NODES*sizeof(real_t*)); | |
| for(int i=0;i<N_NODES;i++){ | |
| if(i<N_NONLEAF_NODES){ | |
| ptr_F[i]=&dev_F[i*NX*NC]; | |
| ptr_G[i]=&dev_G[i*NU*NC]; | |
| ptr_g[i]=&dev_g[i*NC]; | |
| _CUDA(cudaMemcpy(&dev_Q[i*NX*NX],Q,NX*NX*sizeof(real_t),cudaMemcpyHostToDevice)); | |
| _CUDA(cudaMemcpy(&dev_R[i*NU*NU],R,NU*NU*sizeof(real_t),cudaMemcpyHostToDevice)); | |
| _CUBLAS(cublasSscal(handle,NU*NU,&TREE_PROB[i],&dev_R[i*NU*NU],1)); | |
| ptr_R[i]=&dev_R[i*NU*NU]; | |
| } | |
| ptr_A[i]=&dev_A[i*NX*NX]; | |
| ptr_B[i]=&dev_B[i*NU*NX]; | |
| ptr_Tree_value[i]=&dev_TREE_VALUE[i*NX]; | |
| ptr_Q[i]=&dev_Q[i*NX*NX]; | |
| if(i>=N_NONLEAF_NODES){ | |
| ptr_FN[i-N_NONLEAF_NODES]=&dev_FN[FN_ROWS_CUMUL[i-N_NONLEAF_NODES]*NX]; | |
| ptr_GN[i-N_NONLEAF_NODES]=&dev_gN[FN_ROWS_CUMUL[i-N_NONLEAF_NODES]*NU]; | |
| _CUDA(cudaMemcpy(&dev_Q[i*NX*NX],&V_Vf[(i-N_NONLEAF_NODES)*NX*NX],NX*NX*sizeof(real_t),cudaMemcpyHostToDevice)); | |
| } | |
| _CUBLAS(cublasSscal(handle,NX*NX,&TREE_PROB[i],&dev_Q[i*NX*NX],1)); | |
| } | |
| if(multi_uncertanity){ | |
| _CUDA(cudaMemcpy(dev_A,A,N_NODES*NX*NX*sizeof(real_t),cudaMemcpyHostToDevice)); | |
| _CUDA(cudaMemcpy(dev_B,B,N_NODES*NU*NX*sizeof(real_t),cudaMemcpyHostToDevice)); | |
| _CUDA(cudaMemcpy(dev_F,F,N_NONLEAF_NODES*NC*NX*sizeof(real_t),cudaMemcpyHostToDevice)); | |
| _CUDA(cudaMemcpy(dev_G,G,N_NONLEAF_NODES*NC*NU*sizeof(real_t),cudaMemcpyHostToDevice)); | |
| _CUDA(cudaMemcpy(dev_g,g,N_NONLEAF_NODES*NC*sizeof(real_t),cudaMemcpyHostToDevice)); | |
| }else{ | |
| for(int i=0;i<N_NODES;i++){ | |
| _CUDA(cudaMemcpy(&dev_A[i*NX*NX],A,NX*NX*sizeof(real_t),cudaMemcpyHostToDevice)); | |
| _CUDA(cudaMemcpy(&dev_B[i*NX*NU],B,NU*NX*sizeof(real_t),cudaMemcpyHostToDevice)); | |
| if(i<N_NONLEAF_NODES){ | |
| //prob=sqrt(TREE_PROB[i]); | |
| prob=sqrt(TREE_PROB[i]); | |
| _CUDA(cudaMemcpy(&dev_F[i*NC*NX],F,NC*NX*sizeof(real_t),cudaMemcpyHostToDevice)); | |
| _CUDA(cudaMemcpy(&dev_G[i*NC*NU],G,NC*NU*sizeof(real_t),cudaMemcpyHostToDevice)); | |
| _CUDA(cudaMemcpy(&dev_g[i*NC],g,NC*sizeof(real_t),cudaMemcpyHostToDevice)); | |
| _CUBLAS(cublasSscal(handle,NC*NX,&prob,&dev_F[i*NC*NX],1)); | |
| _CUBLAS(cublasSscal(handle,NC*NU,&prob,&dev_G[i*NC*NU],1)); | |
| _CUBLAS(cublasSscal(handle,NC,&prob,&dev_g[i*NC],1)); | |
| } | |
| } | |
| } | |
| _CUDA(cudaMemcpy(dev_FN,FN,FN_NUMEL*sizeof(real_t),cudaMemcpyHostToDevice)); | |
| _CUDA(cudaMemcpy(dev_gN,gN,GN_NUMEL*sizeof(real_t),cudaMemcpyHostToDevice)); | |
| _CUDA(cudaMemcpy(dev_TREE_VALUE,TREE_VALUE,NX*N_NODES*sizeof(real_t),cudaMemcpyHostToDevice)); | |
| _CUDA(cudaMemcpy(dev_ptr_A,ptr_A,N_NODES*sizeof(real_t*),cudaMemcpyHostToDevice)); | |
| _CUDA(cudaMemcpy(dev_ptr_B,ptr_B,N_NODES*sizeof(real_t*),cudaMemcpyHostToDevice)); | |
| _CUDA(cudaMemcpy(dev_ptr_F,ptr_F,N_NONLEAF_NODES*sizeof(real_t*),cudaMemcpyHostToDevice)); | |
| _CUDA(cudaMemcpy(dev_ptr_G,ptr_G,N_NONLEAF_NODES*sizeof(real_t*),cudaMemcpyHostToDevice)); | |
| _CUDA(cudaMemcpy(dev_ptr_g,ptr_g,N_NONLEAF_NODES*sizeof(real_t*),cudaMemcpyHostToDevice)); | |
| _CUDA(cudaMemcpy(dev_ptr_FN,ptr_FN,K*sizeof(real_t*),cudaMemcpyHostToDevice)); | |
| _CUDA(cudaMemcpy(dev_ptr_gN,ptr_GN,K*sizeof(real_t*),cudaMemcpyHostToDevice)); | |
| _CUDA(cudaMemcpy(dev_ptr_Tree_Value,ptr_Tree_value,N_NODES*sizeof(real_t*),cudaMemcpyHostToDevice)); | |
| _CUDA(cudaMemcpy(dev_ptr_Q,ptr_Q,N_NODES*sizeof(real_t*),cudaMemcpyHostToDevice)); | |
| _CUDA(cudaMemcpy(dev_ptr_R,ptr_R,N_NONLEAF_NODES*sizeof(real_t*),cudaMemcpyHostToDevice)); | |
| free(ptr_A); | |
| free(ptr_B); | |
| free(ptr_F); | |
| free(ptr_G); | |
| free(ptr_g); | |
| free(ptr_FN); | |
| free(ptr_GN); | |
| free(ptr_Tree_value); | |
| free(ptr_Q); | |
| free(ptr_R); | |
| printf("System data initialized\n"); | |
| } | |
| void init_off_line_data(void){ | |
| _CUDA(cudaMalloc((void**)&dev_GPAD_K,N_NONLEAF_NODES*NU*NX*sizeof(real_t))); | |
| _CUDA(cudaMalloc((void**)&dev_GPAD_THETA,((N_NONLEAF_NODES-1)*NX*NU+GN_NUMEL*NU)*sizeof(real_t))); | |
| _CUDA(cudaMalloc((void**)&dev_GPAD_PHI,N_NONLEAF_NODES*NU*NC*sizeof(real_t))); | |
| _CUDA(cudaMalloc((void**)&dev_GPAD_SIGMA,N_NONLEAF_NODES*NX*sizeof(real_t))); | |
| _CUDA(cudaMalloc((void**)&dev_GPAD_C,N_NONLEAF_NODES*NX*sizeof(real_t))); | |
| _CUDA(cudaMalloc((void**)&dev_GPAD_D,N_NONLEAF_NODES*NC*NX*sizeof(real_t))); | |
| _CUDA(cudaMalloc((void**)&dev_GPAD_F,((N_NONLEAF_NODES-1)*NX*NX+GN_NUMEL*NX)*sizeof(real_t))); | |
| //_CUDA(cudaMemset(dev_GPAD_K,0,N_NONLEAF_NODES*NU*NX)); | |
| real_t** ptr_GPAD_sigma=(real_t**)malloc(N_NONLEAF_NODES*sizeof(real_t*)); | |
| real_t** ptr_GPAD_phi=(real_t**)malloc(N_NONLEAF_NODES*sizeof(real_t*)); | |
| real_t** ptr_GPAD_theta=(real_t**)malloc((N_NODES-1)*sizeof(real_t*)); | |
| real_t** ptr_GPAD_C=(real_t**)malloc(N_NONLEAF_NODES*sizeof(real_t*)); | |
| real_t** ptr_GPAD_D=(real_t**)malloc(N_NONLEAF_NODES*sizeof(real_t*)); | |
| real_t** ptr_GPAD_F=(real_t**)malloc((N_NODES-1)*sizeof(real_t*)); | |
| real_t** ptr_GPAD_K=(real_t**)malloc(N_NONLEAF_NODES*sizeof(real_t*)); | |
| for(int i=0;i<N_NODES;i++){ | |
| if(i<N_NONLEAF_NODES){ | |
| ptr_GPAD_sigma[i]=&dev_GPAD_SIGMA[i*NU]; | |
| ptr_GPAD_phi[i]=&dev_GPAD_PHI[i*NU*NC]; | |
| ptr_GPAD_C[i]=&dev_GPAD_C[i*NX]; | |
| ptr_GPAD_D[i]=&dev_GPAD_D[i*NX*NC]; | |
| ptr_GPAD_K[i]=&dev_GPAD_K[i*NU*NX]; | |
| if(i<N_NONLEAF_NODES-1){ | |
| ptr_GPAD_theta[i]=&dev_GPAD_THETA[i*NX*NU]; | |
| ptr_GPAD_F[i]=&dev_GPAD_F[i*NX*NX]; | |
| } | |
| } | |
| if(i>N_NONLEAF_NODES-1){ | |
| ptr_GPAD_theta[i-1]=&dev_GPAD_THETA[FN_ROWS_CUMUL[i-N_NONLEAF_NODES]*NU+(N_NONLEAF_NODES-1)*NX*NU]; | |
| ptr_GPAD_F[i-1]=&dev_GPAD_F[FN_ROWS_CUMUL[i-N_NONLEAF_NODES]*NX+(N_NONLEAF_NODES-1)*NX*NX]; | |
| } | |
| } | |
| _CUDA(cudaMalloc((void**)&dev_ptr_GPAD_C,N_NONLEAF_NODES*sizeof(real_t*))); | |
| _CUDA(cudaMalloc((void**)&dev_ptr_GPAD_D,N_NONLEAF_NODES*sizeof(real_t*))); | |
| _CUDA(cudaMalloc((void**)&dev_ptr_GPAD_SIGMA,N_NONLEAF_NODES*sizeof(real_t*))); | |
| _CUDA(cudaMalloc((void**)&dev_ptr_GPAD_PHI,N_NONLEAF_NODES*sizeof(real_t*))); | |
| _CUDA(cudaMalloc((void**)&dev_ptr_GPAD_THETA,(N_NODES-1)*sizeof(real_t*))); | |
| _CUDA(cudaMalloc((void**)&dev_ptr_GPAD_F,(N_NODES-1)*sizeof(real_t*))); | |
| _CUDA(cudaMalloc((void**)&dev_ptr_GPAD_K,N_NONLEAF_NODES*sizeof(real_t*))); | |
| _CUDA(cudaMemcpy(dev_ptr_GPAD_C,ptr_GPAD_C,N_NONLEAF_NODES*sizeof(real_t*),cudaMemcpyHostToDevice)); | |
| _CUDA(cudaMemcpy(dev_ptr_GPAD_D,ptr_GPAD_D,N_NONLEAF_NODES*sizeof(real_t*),cudaMemcpyHostToDevice)); | |
| _CUDA(cudaMemcpy(dev_ptr_GPAD_PHI,ptr_GPAD_phi,N_NONLEAF_NODES*sizeof(real_t*),cudaMemcpyHostToDevice)); | |
| _CUDA(cudaMemcpy(dev_ptr_GPAD_SIGMA,ptr_GPAD_sigma,N_NONLEAF_NODES*sizeof(real_t*),cudaMemcpyHostToDevice)); | |
| _CUDA(cudaMemcpy(dev_ptr_GPAD_THETA,ptr_GPAD_theta,(N_NODES-1)*sizeof(real_t*),cudaMemcpyHostToDevice)); | |
| _CUDA(cudaMemcpy(dev_ptr_GPAD_F,ptr_GPAD_F,(N_NODES-1)*sizeof(real_t*),cudaMemcpyHostToDevice)); | |
| _CUDA(cudaMemcpy(dev_ptr_GPAD_K,ptr_GPAD_K,N_NONLEAF_NODES*sizeof(real_t*),cudaMemcpyHostToDevice)); | |
| /* | |
| for(int i=TREE_NODES_PER_STAGE_CUMUL[N-1];i<N_NONLEAF_NODES;i++){ | |
| printf("%d %p ", i,ptr_GPAD_K[i]); | |
| } | |
| printf("\n");*/ | |
| free(ptr_GPAD_sigma); | |
| free(ptr_GPAD_phi); | |
| free(ptr_GPAD_F); | |
| free(ptr_GPAD_K); | |
| free(ptr_GPAD_theta); | |
| free(ptr_GPAD_C); | |
| free(ptr_GPAD_D); | |
| printf("off-line data allocation\n"); | |
| } | |
| void init_solver_data(){ | |
| _CUDA(cudaMalloc((void**)&dev_x,NX*N_NODES*sizeof(real_t))); | |
| _CUDA(cudaMalloc((void**)&dev_u,NU*N_NONLEAF_NODES*sizeof(real_t))); | |
| _CUDA(cudaMalloc((void**)&dev_y,2*(NC*N_NONLEAF_NODES+GN_NUMEL)*sizeof(real_t))); | |
| _CUDA(cudaMalloc((void**)&dev_w,2*(NC*N_NONLEAF_NODES+GN_NUMEL)*sizeof(real_t))); | |
| _CUDA(cudaMalloc((void**)&dev_primal,2*(NC*N_NONLEAF_NODES+GN_NUMEL)*sizeof(real_t))); | |
| _CUDA(cudaMalloc((void**)&dev_primal_avg,(NC*N_NONLEAF_NODES+GN_NUMEL)*sizeof(real_t))); | |
| _CUDA(cudaMalloc((void**)&dev_primal_iterate,2*(NC*N_NONLEAF_NODES+GN_NUMEL)*sizeof(real_t))); | |
| if(NX*N_NODES>NU*N_NONLEAF_NODES){ | |
| _CUDA(cudaMalloc((void**)&dev_temp_cst,NX*N_NODES*sizeof(real_t))); | |
| }else{ | |
| _CUDA(cudaMalloc((void**)&dev_temp_cst,NU*N_NONLEAF_NODES*sizeof(real_t))); | |
| } | |
| grad=(real_t*)malloc(iterate[0]*sizeof(real_t)); | |
| prm_cst_value=(real_t*)malloc(iterate[0]*sizeof(real_t)); | |
| dual_cst_value=(real_t*)malloc(iterate[0]*sizeof(real_t)); | |
| prm_inf=(real_t*)malloc(iterate[0]*sizeof(real_t)); | |
| _CUDA(cudaMalloc((void**)&dev_q,GN_NUMEL*sizeof(real_t))); | |
| _CUDA(cudaMalloc((void**)&dev_theta,K*NU*sizeof(real_t))); | |
| _CUDA(cudaMalloc((void**)&dev_f,K*NX*sizeof(real_t))); | |
| _CUDA(cudaMalloc((void**)&dev_ptr_x,N_NODES*sizeof(real_t*))); | |
| _CUDA(cudaMalloc((void**)&dev_ptr_u,N_NONLEAF_NODES*sizeof(real_t*))); | |
| _CUDA(cudaMalloc((void**)&dev_ptr_y,N_NODES*sizeof(real_t*))); | |
| _CUDA(cudaMalloc((void**)&dev_ptr_w,N_NODES*sizeof(real_t*))); | |
| _CUDA(cudaMalloc((void**)&dev_ptr_temp_xcst,N_NODES*sizeof(real_t*))); | |
| _CUDA(cudaMalloc((void**)&dev_ptr_temp_ucst,N_NONLEAF_NODES*sizeof(real_t*))); | |
| _CUDA(cudaMalloc((void**)&dev_ptr_q,K*sizeof(real_t*))); | |
| _CUDA(cudaMalloc((void**)&dev_ptr_qN,K*sizeof(real_t*))); | |
| _CUDA(cudaMalloc((void**)&dev_ptr_theta,K*sizeof(real_t*))); | |
| _CUDA(cudaMalloc((void**)&dev_ptr_f,K*sizeof(real_t*))); | |
| real_t** ptr_x=(real_t**)malloc(N_NODES*sizeof(real_t*)); | |
| real_t** ptr_u=(real_t**)malloc(N_NONLEAF_NODES*sizeof(real_t*)); | |
| real_t** ptr_w=(real_t**)malloc(N_NODES*sizeof(real_t*)); | |
| real_t** ptr_y=(real_t**)malloc(N_NODES*sizeof(real_t*)); | |
| real_t** ptr_q=(real_t**)malloc(K*sizeof(real_t*)); | |
| real_t** ptr_qN=(real_t**)malloc(K*sizeof(real_t*)); | |
| real_t** ptr_theta=(real_t**)malloc(K*sizeof(real_t*)); | |
| real_t** ptr_f=(real_t**)malloc(K*sizeof(real_t*)); | |
| real_t** ptr_temp_xcst=(real_t**)malloc(N_NODES*sizeof(real_t*)); | |
| real_t** ptr_temp_ucst=(real_t**)malloc(N_NONLEAF_NODES*sizeof(real_t*)); | |
| for(int i=0;i<N_NODES;i++){ | |
| ptr_x[i]=&dev_x[i*NX]; | |
| ptr_temp_xcst[i]=&dev_temp_cst[i*NX]; | |
| if(i<N_NONLEAF_NODES){ | |
| ptr_u[i]=&dev_u[i*NU]; | |
| ptr_w[i]=&dev_w[i*NC]; | |
| ptr_y[i]=&dev_y[i*NC]; | |
| ptr_temp_ucst[i]=&dev_temp_cst[i*NU]; | |
| }else{ | |
| ptr_w[i]=&dev_w[N_NONLEAF_NODES*NC+FN_ROWS_CUMUL[i-N_NONLEAF_NODES]]; | |
| ptr_y[i]=&dev_y[N_NONLEAF_NODES*NC+FN_ROWS_CUMUL[i-N_NONLEAF_NODES]]; | |
| } | |
| if(i<K){ | |
| ptr_theta[i]=&dev_theta[i*NU]; | |
| ptr_f[i]=&dev_f[i*NX]; | |
| ptr_q[i]=&dev_q[i*NX]; | |
| ptr_qN[i]=&dev_q[FN_ROWS_CUMUL[i]]; | |
| } | |
| } | |
| _CUDA(cudaMemset(dev_u,0,NU*N_NONLEAF_NODES*sizeof(real_t))); | |
| _CUDA(cudaMemset(dev_y,0,(NC*N_NONLEAF_NODES+GN_NUMEL)*sizeof(real_t))); | |
| _CUDA(cudaMemcpy(dev_ptr_x,ptr_x,N_NODES*sizeof(real_t*),cudaMemcpyHostToDevice)); | |
| _CUDA(cudaMemcpy(dev_ptr_w,ptr_w,N_NODES*sizeof(real_t*),cudaMemcpyHostToDevice)); | |
| _CUDA(cudaMemcpy(dev_ptr_y,ptr_y,N_NODES*sizeof(real_t*),cudaMemcpyHostToDevice)); | |
| _CUDA(cudaMemcpy(dev_ptr_u,ptr_u,N_NONLEAF_NODES*sizeof(real_t*),cudaMemcpyHostToDevice)); | |
| _CUDA(cudaMemcpy(dev_ptr_q,ptr_q,K*sizeof(real_t*),cudaMemcpyHostToDevice)); | |
| _CUDA(cudaMemcpy(dev_ptr_qN,ptr_qN,K*sizeof(real_t*),cudaMemcpyHostToDevice)); | |
| _CUDA(cudaMemcpy(dev_ptr_theta,ptr_theta,K*sizeof(real_t*),cudaMemcpyHostToDevice)); | |
| _CUDA(cudaMemcpy(dev_ptr_f,ptr_f,K*sizeof(real_t*),cudaMemcpyHostToDevice)); | |
| _CUDA(cudaMemcpy(dev_ptr_temp_xcst,ptr_temp_xcst,N_NODES*sizeof(real_t*),cudaMemcpyHostToDevice)); | |
| _CUDA(cudaMemcpy(dev_ptr_temp_ucst,ptr_temp_ucst,N_NONLEAF_NODES*sizeof(real_t*),cudaMemcpyHostToDevice)); | |
| free(ptr_x); | |
| free(ptr_u); | |
| free(ptr_y); | |
| free(ptr_w); | |
| free(ptr_q); | |
| free(ptr_qN); | |
| free(ptr_theta); | |
| free(ptr_f); | |
| free(ptr_temp_xcst); | |
| free(ptr_temp_ucst); | |
| printf("Solver data is initalised \n"); | |
| } | |
| template<typename T> int allocate_data(char* filepath, T* data){ | |
| FILE *infile; | |
| int size; | |
| infile=fopen(filepath,"r"); | |
| if(infile==NULL){ | |
| printf("%s\n %p", filepath,infile); | |
| fprintf(stderr,"Error in opening the file %d \n",__LINE__); | |
| exit(100); | |
| }else{ | |
| fscanf(infile,"%d \n",&size); | |
| //printf("Size of the array is %d ",size); | |
| for(int i=0;i<size;i++){ | |
| fscanf(infile,"%f\n",&data[i]); | |
| } | |
| return 0; | |
| } | |
| } | |
| template<typename T>void check_correctness_memcpy(T* x,T *y,int n){ | |
| for(int i=0;i<n;i++){ | |
| //printf("%d %f ",i, x[i]-y[i]); | |
| if(fabs(x[i]-y[i])>1e-3){ | |
| printf("%d ",i); | |
| } | |
| } | |
| printf("SUCESS \n"); | |
| } | |
| void create_effinet_system(){ | |
| /**system data allocation */ | |
| A=(real_t*)malloc(NX*NX*sizeof(real_t)); | |
| B=(real_t*)malloc(NX*NU*sizeof(real_t)); | |
| F=(real_t*)malloc(N_NODES*2*NX*NX*sizeof(real_t)); | |
| G=(real_t*)malloc(N_NODES*NU*NU*sizeof(real_t)); | |
| L=(real_t*)malloc(NU*NV*sizeof(real_t)); | |
| xmin=(real_t*)malloc(N_NODES*NX*sizeof(real_t)); | |
| xmax=(real_t*)malloc(N_NODES*NX*sizeof(real_t)); | |
| u_min=(real_t*)malloc(N_NODES*NU*sizeof(real_t)); | |
| u_min=(real_t*)malloc(N_NODES*NU*sizeof(real_t)); | |
| TREE_VALUE=(real_t*)malloc(N_NODES*NX*sizeof(real_t)); | |
| P_test=(real_t*)malloc(NX*NX*sizeof(real_t)); | |
| char* filepath_A="Data_files/Effinet_A.h"; | |
| char* filepath_B="Data_files/Effinet_B.h"; | |
| char* filepath_F="Data_files/Effinet_F.h"; | |
| char* filepath_G="Data_files/Effinet_G.h"; | |
| char* filepath_L="Data_files/Effinet_L.h"; | |
| char* filepath_umax="Data_files/Effinet_umax.h"; | |
| char* filepath_umin="Data_files/Effinet_umin.h"; | |
| char* filepath_xmax="Data_files/Effinet_xmax.h"; | |
| char* filepath_xmin="Data_files/Effinet_xmin.h"; | |
| char* filepath_xs="Data_files/Effinet_xs.h"; | |
| char* filepath_P="Data_files/Effinet_P.h"; | |
| char* filepath_TREE_VALUE="Data_files/GPAD_Tree_Value.h"; | |
| allocate_data<real_t>(filepath_A,A); | |
| allocate_data<real_t>(filepath_B,B); | |
| allocate_data<real_t>(filepath_F,F); | |
| allocate_data<real_t>(filepath_G,G); | |
| allocate_data<real_t>(filepath_L,L); | |
| allocate_data<real_t>(filepath_umin,u_min); | |
| allocate_data<real_t>(filepath_umax,u_max); | |
| allocate_data<real_t>(filepath_xmin,xmin); | |
| allocate_data<real_t>(filepath_xmax,xmax); | |
| allocate_data<real_t>(filepath_xs,xs); | |
| allocate_data<real_t>(filepath_P,P_test); | |
| allocate_data<real_t>(filepath_TREE_VALUE,TREE_VALUE); | |
| printf("System data allocated OK!\n"); | |
| } | |
| template<typename T>void calculate_particular_soultion(){ | |
| linear_cost_b=(real_t*)malloc(NV*N_NODES*sizeof(real_t)); | |
| vhat=(real_t*)malloc(NU*N_NODES*sizeof(real_t)); | |
| disturb_w=(real_t*)malloc(NX*N_NODES*sizeof(real_t)); | |
| char* filepath_beta="Data_files/Effinet_beta.h"; | |
| char* filepath_vhat="Data_files/Effinet_vhat.h"; | |
| char* filepath_w="Data_files/Effinet_w.h"; | |
| allocate_data<real_t>(filepath_beta,linear_cost_b); | |
| allocate_data<real_t(filepath_vhat,vhat); | |
| allocate_data<real_t>(filepath_w,disturb_w); | |
| printf("Particular solutions is calculated for the demand \n"); | |
| } | |
| template<typename T>void transfer_data_gpu(T *dev_x,T *x,char* filepath,int size){ | |
| x=(T*)malloc(size*sizeof(T)); | |
| allocate_data<T>(filepath,x); | |
| _CUDA(cudaMalloc((void**)&dev_x,size*sizeof(T))); | |
| _CUDA(cudaMemcpy(dev_x,x,size*sizeof(T),cudaMemcpyHostToDevice)); | |
| free(x); | |
| } | |
| void free_host_mem(){ | |
| free(A); | |
| free(B); | |
| free(F); | |
| free(G); | |
| free(L); | |
| free(xmin); | |
| free(xmax); | |
| free(xs); | |
| free(u_min); | |
| free(u_max); | |
| free(P_test); | |
| free(TREE_VALUE); | |
| free(linear_cost_b); | |
| free(vhat); | |
| free(disturb_w); | |
| } | |
| #endif /* EFFINET_DATA_GENERATION_CUH_ */ |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment