sin3point14/main.cu

## main.cu
#include <Eigen/Dense>
#include <iostream>

using mat = Eigen::Matrix<float, 5, 5>;

template <typename MatrixType>
__device__ void printCudaMatrix(const MatrixType& matrix) {
	for (int row = 0; row < matrix.rows(); ++row)
	{
		printf("  Row %3d: [", row);
		for (int col = 0; col < matrix.cols(); ++col)
		{
			printf("%15.8e ", (double)matrix(row, col));
		}
		printf("]\n");
	}
}

__global__ void matrixInv(mat* A) {
    printf("kernel A:\n");
    printCudaMatrix(*A);
    mat B = (*A).inverse();
    printf("kernel B:\n");
    printCudaMatrix(B);
    *A = B;
    printf("kernel Ainv:\n");
    printCudaMatrix(*A);
}

int main() {
    srand(0);
    mat* d_A;
    mat h_A = mat::Random();

    std::cout << "CPU:\n" << h_A << std::endl;

    cudaMalloc((void**)&d_A, sizeof(mat));

    cudaMemcpy(d_A, &h_A, sizeof(mat), cudaMemcpyHostToDevice);

    matrixInv<<<1, 1>>>(d_A);
    cudaDeviceSynchronize();

    mat h_Ainv;
    cudaMemcpy(&h_Ainv, d_A, sizeof(mat), cudaMemcpyDeviceToHost);

    std::cout << "CPU final inv:\n" << h_A.inverse() << std::endl;
    std::cout << "GPU final inv:\n" << h_Ainv << std::endl;

    cudaFree(d_A);

    return 0;
}
	#include <Eigen/Dense>
	#include <iostream>

	using mat = Eigen::Matrix<float, 5, 5>;

	template <typename MatrixType>
	__device__ void printCudaMatrix(const MatrixType& matrix) {
	for (int row = 0; row < matrix.rows(); ++row)
	{
	printf(" Row %3d: [", row);
	for (int col = 0; col < matrix.cols(); ++col)
	{
	printf("%15.8e ", (double)matrix(row, col));
	}
	printf("]\n");
	}
	}

	__global__ void matrixInv(mat* A) {
	printf("kernel A:\n");
	printCudaMatrix(*A);
	mat B = (*A).inverse();
	printf("kernel B:\n");
	printCudaMatrix(B);
	*A = B;
	printf("kernel Ainv:\n");
	printCudaMatrix(*A);
	}

	int main() {
	srand(0);
	mat* d_A;
	mat h_A = mat::Random();

	std::cout << "CPU:\n" << h_A << std::endl;

	cudaMalloc((void**)&d_A, sizeof(mat));

	cudaMemcpy(d_A, &h_A, sizeof(mat), cudaMemcpyHostToDevice);

	matrixInv<<<1, 1>>>(d_A);
	cudaDeviceSynchronize();

	mat h_Ainv;
	cudaMemcpy(&h_Ainv, d_A, sizeof(mat), cudaMemcpyDeviceToHost);

	std::cout << "CPU final inv:\n" << h_A.inverse() << std::endl;
	std::cout << "GPU final inv:\n" << h_Ainv << std::endl;

	cudaFree(d_A);

	return 0;
	}
No results found