问小白 wenxiaobai
资讯
历史
科技
环境与自然
成长
游戏
财经
文学与艺术
美食
健康
家居
文化
情感
汽车
三农
军事
旅行
运动
教育
生活
星座命理

CUDA实战:图像缩放最近邻算法实现

创作时间:
作者:
@小白创作中心

CUDA实战:图像缩放最近邻算法实现

引用
CSDN
1.
https://m.blog.csdn.net/qq_43448134/article/details/142001271

本文介绍如何使用CUDA实现图像缩放的最近邻算法。通过详细讲解算法原理和提供完整的CUDA代码实现,帮助读者理解CUDA编程和图像处理算法。

项目背景

本项目旨在通过一系列图像算法的CUDA实现,帮助读者弄清楚算法原理和对应的CUDA程序编程代码。

最近邻算法原理

最近邻算法的核心思想是根据目标图像的像素坐标映射回原始图像,选择离映射坐标最近的一个像素作为结果。假设源图像的宽高为H,W,通道为C,目标图像的长宽为h,w,通道数为c。则目标图像的在像素坐标(x,y)的像素值P(x,y)的计算方式:

  1. 横坐标缩放比例scale_X = W / w,纵坐标缩放比例scale_Y = H / h
  2. 目标图像取得原图像的横坐标值x1=round(x * scale_X) ,目标图像取得原图像的横坐标值y1= round(y * scale_Y)
  3. 因此 P(x,y) = Ori(x1,y1)

代码实现

1. 头文件

#include <cuda_runtime.h>
#include <iostream>
// 读取和写入图像
#define STB_IMAGE_IMPLEMENTATION
#include "stb_image.h"
#define STB_IMAGE_WRITE_IMPLEMENTATION
#include "stb_image_write.h"
using namespace std;
typedef unsigned char uchar;

2. CPU读取图像

unsigned char* read_image(const char* filename ){
    int width, height, channels;
    // 读取图像文件
    unsigned char* imageData = stbi_load(filename, &width, &height, &channels, 0);
    if (imageData == nullptr) {
        std::cerr << "Error: Could not load image " << filename << std::endl;
    }
    std::cout << "Image loaded: " << filename << std::endl;
    std::cout << "Width: " << width << " Height: " << height << " Channels: " << channels << std::endl;
    return imageData;
}

3. 最近邻算法CPU实现

void resize_cpu(unsigned char* ori_img,
                          unsigned char* dst_img,
                          int input_w,int input_h,
                          int out_w,int out_h){
    const int channel =3;
    // 计算缩放比例
    float scale_w = static_cast<float>(input_w / out_w);
    float scale_h = static_cast<float>(input_h / out_h);
    
    size_t size_total = out_w * out_h * 3;
    for(int y = 0; y < out_h ; y++){
        for(int x = 0; x < out_w; x++){
            // 计算源图像中最接近的像素位置
            int srcX = static_cast<int> (x * scale_w);
            int srcY = static_cast<int> (y * scale_w);
            // 防止越界
            srcX = min( srcX, input_w -1 );
            srcY = min( srcY, input_h -1 );
            // 计算原图像和目标图像中的像素索引
            int srcIndex = (srcY * input_w + srcX) * channel;
            int dstIndex = (y * out_w + x) * channel;
            for( int c = 0; c < channel; c++ ){
                dst_img[dstIndex + c] = ori_img[srcIndex + c];
            }
        }
    }
}

4. CUDA核函数

__global__ void nearestNeighborKernel(unsigned char* d_inputImage, 
                                  unsigned char* d_outputImage, 
                                  int inputWidth, int inputHeight, 
                                  int outputWidth, int outputHeight){
    int x = blockIdx.x * blockDim.x + threadIdx.x;
    int y = blockIdx.y * blockDim.y + threadIdx.y;
    if( x < outputWidth && y < outputHeight ){
        // 计算在输入图像中位置
        int srcX = static_cast<int>(x / (float)outputWidth  * inputWidth);
        int srcY = static_cast<int>(y / (float)outputHeight *inputHeight );
        // rgb三个通道分别取对应的像素值,rgb三个数据相邻
        d_outputImage[(y *outputWidth + x )*3] = d_inputImage[(srcY * inputWidth + srcX) *3];
        d_outputImage[(y *outputWidth + x )*3 + 1] = d_inputImage[(srcY * inputWidth + srcX) *3 + 1];
        d_outputImage[(y *outputWidth + x )*3 + 2] = d_inputImage[(srcY * inputWidth + srcX) *3 + 2];
    }
}

5. CUDA核函数调用

void nearestNeighborInterpolation_launch(unsigned char* h_inputImage, 
                                  unsigned char* h_outputImage, 
                                  int inputWidth, int inputHeight, 
                                  int outputWidth, int outputHeight){
    unsigned char * d_inputImage;
    unsigned char * d_outputImage;
    size_t inputImageSize = inputWidth * inputHeight * 3 * sizeof(unsigned char);
    size_t outputImageSize = outputWidth * outputHeight * 3 * sizeof(unsigned char);
    cout << "sizeof(unsigned char) = " << sizeof(unsigned char) << endl;
    // cuda malloc && memset
    cudaMalloc(&d_inputImage, inputImageSize);
    cudaMalloc(&d_outputImage, outputImageSize);
    cudaMemset(d_inputImage, 0, inputImageSize);
    cudaMemset(d_outputImage, 0, outputImageSize);
    // h2d
    auto status = cudaMemcpy( d_inputImage, h_inputImage, inputImageSize, cudaMemcpyHostToDevice );
    cout << "h2d status = " << status << endl;
    cout << "outputWidth = " << outputWidth << ",outputHeight = " << outputHeight <<endl;
    // cuda block/grid size
    dim3 blockSize(16,16,1);
    dim3 gridSize( (outputWidth + blockSize.x -1) /blockSize.x, \
                     (outputHeight + blockSize.y -1) /blockSize.y,1  );
    cout << "blockSize: x =" << blockSize.x <<",y = " << blockSize.y <<",z ="<< blockSize.z << endl;
    cout << "gridSize: x = " << gridSize.x <<",y="<< gridSize.y <<",z = "<< gridSize.z<< endl;
    // launch cuda kernel
    // 最近邻插值
    nearestNeighborKernel<<<gridSize,blockSize >>>(d_inputImage,d_outputImage,inputWidth, inputHeight,outputWidth, outputHeight );
    // 同步设备
    cudaDeviceSynchronize();
    // 复制输出图像数据回主机
    cudaMemcpy(h_outputImage, d_outputImage, outputImageSize, cudaMemcpyDeviceToHost);
    // 释放设备内存
    cudaFree(d_inputImage);
    cudaFree(d_outputImage);
}

6. 主函数

int main(){
    int inputWidth   = 640;
    int inputHeight  = 427;
    int outputWidth  = 320;
    int outputHeight = 213;
    const char* image_path = "../det_427_640.png";
    // malloc host 
    unsigned char* h_inputImage = read_image(image_path );
    unsigned char* h_outputImage = new unsigned char[outputWidth * outputHeight * 3];
    // gpu impl(最近邻)
    nearestNeighborInterpolation_launch(h_inputImage, h_outputImage, inputWidth, inputHeight, outputWidth, outputHeight);
    // cpu impl
    // resize_cpu( h_inputImage, h_outputImage, inputWidth, inputHeight, outputWidth, outputHeight );
    // save img 
    const char* output_filename = "../det_427_640_gpu_out.png";
    stbi_write_png( output_filename, outputWidth, outputHeight, 3, h_outputImage, outputWidth * 3);
    // free cpu 
    delete[] h_inputImage;
    delete[] h_outputImage;
    return 0;
}

最终结果

源图像:尺寸=427*640
目标图像:尺寸=213 * 320

© 2023 北京元石科技有限公司 ◎ 京公网安备 11010802042949号