C语言数据去重处理的三种方法

创作时间:

作者:

@小白创作中心

C语言数据去重处理的三种方法

引用

来源

https://docs.pingcode.com/baike/1077530

在C语言中对数据进行去重处理，可以使用哈希表、排序和双指针、以及暴力枚举等方法，其中哈希表方法较为高效。接下来将详细解释哈希表方法在C语言中的实现。

一、哈希表去重方法

1. 初始化哈希表

在C语言中，可以使用unordered_map（在C++中）或自己实现一个简单的哈希表来存储已经遇到的元素。

#include <stdio.h>
#include <stdlib.h>

typedef struct HashNode {
    int key;
    struct HashNode* next;
} HashNode;

#define HASH_TABLE_SIZE 1000
HashNode* hashTable[HASH_TABLE_SIZE];

unsigned int hashFunction(int key) {
    return key % HASH_TABLE_SIZE;
}

void initHashTable() {
    for (int i = 0; i < HASH_TABLE_SIZE; i++) {
        hashTable[i] = NULL;
    }
}

2. 插入和查找元素

在哈希表中插入新元素，并查找已有元素。

int searchHashTable(int key) {
    unsigned int hashIndex = hashFunction(key);
    HashNode* node = hashTable[hashIndex];
    while (node != NULL) {
        if (node->key == key) {
            return 1;
        }
        node = node->next;
    }
    return 0;
}

void insertHashTable(int key) {
    if (searchHashTable(key)) {
        return;
    }
    unsigned int hashIndex = hashFunction(key);
    HashNode* newNode = (HashNode*)malloc(sizeof(HashNode));
    newNode->key = key;
    newNode->next = hashTable[hashIndex];
    hashTable[hashIndex] = newNode;
}

3. 数据去重

通过哈希表对数据进行去重处理。

void removeDuplicates(int* array, int size) {
    initHashTable();
    int* result = (int*)malloc(size * sizeof(int));
    int resultSize = 0;
    for (int i = 0; i < size; i++) {
        if (!searchHashTable(array[i])) {
            insertHashTable(array[i]);
            result[resultSize++] = array[i];
        }
    }
    for (int i = 0; i < resultSize; i++) {
        printf("%d ", result[i]);
    }
    printf("\n");
    free(result);
}

int main() {
    int array[] = {1, 2, 2, 3, 4, 4, 5};
    int size = sizeof(array) / sizeof(array[0]);
    removeDuplicates(array, size);
    return 0;
}

二、排序和双指针方法

1. 对数组进行排序

首先对数组进行排序，这样相同的元素会在一起。

#include <stdio.h>
#include <stdlib.h>

int compare(const void* a, const void* b) {
    return (*(int*)a - *(int*)b);
}

void removeDuplicatesUsingSorting(int* array, int size) {
    qsort(array, size, sizeof(int), compare);
    int* result = (int*)malloc(size * sizeof(int));
    int resultSize = 0;
    for (int i = 0; i < size; i++) {
        if (i == 0 || array[i] != array[i - 1]) {
            result[resultSize++] = array[i];
        }
    }
    for (int i = 0; i < resultSize; i++) {
        printf("%d ", result[i]);
    }
    printf("\n");
    free(result);
}

int main() {
    int array[] = {1, 2, 2, 3, 4, 4, 5};
    int size = sizeof(array) / sizeof(array[0]);
    removeDuplicatesUsingSorting(array, size);
    return 0;
}

三、暴力枚举方法

1. 遍历数组

通过嵌套循环遍历数组，判断每个元素是否在结果数组中出现过。

#include <stdio.h>
#include <stdlib.h>

void removeDuplicatesUsingBruteForce(int* array, int size) {
    int* result = (int*)malloc(size * sizeof(int));
    int resultSize = 0;
    for (int i = 0; i < size; i++) {
        int found = 0;
        for (int j = 0; j < resultSize; j++) {
            if (array[i] == result[j]) {
                found = 1;
                break;
            }
        }
        if (!found) {
            result[resultSize++] = array[i];
        }
    }
    for (int i = 0; i < resultSize; i++) {
        printf("%d ", result[i]);
    }
    printf("\n");
    free(result);
}

int main() {
    int array[] = {1, 2, 2, 3, 4, 4, 5};
    int size = sizeof(array) / sizeof(array[0]);
    removeDuplicatesUsingBruteForce(array, size);
    return 0;
}