|
朴素贝叶斯分类算法是一种基于贝叶斯定理的简单概率分类算法。
贝叶斯分类的基础是概率推理,就是在各种条件的存在不确定,仅知其出现概率的情况下,如何完成推理和决策任务。概率推理是与确定性推理相对应的。而朴素贝叶斯分类器是基于独立假设的,即假设样本每个特征与其他特征都不相关。
朴素贝叶斯分类算法,分别用C语言,C++,java,python编写出来
当涉及朴素贝叶斯分类算法时,下面是使用C语言、C++、Java和Python编写的示例代码:
1.**C语言示例:**
- #include <stdio.h>
- #include <math.h>
- #define NUM_CLASSES 2
- #define NUM_FEATURES 4
- struct DataPoint {
- double features[NUM_FEATURES];
- int label;
- };
- double calculateGaussian(double x, double mean, double variance) {
- double exponent = exp(-(pow(x - mean, 2) / (2 * variance)));
- return (1 / (sqrt(2 * M_PI * variance))) * exponent;
- }
- int predictClass(struct DataPoint testPoint, struct DataPoint trainingSet[], int numTrainingPoints) {
- double classProbabilities[NUM_CLASSES] = {0};
- for (int i = 0; i < numTrainingPoints; i++) {
- struct DataPoint trainingPoint = trainingSet[i];
- int label = trainingPoint.label;
- classProbabilities[label]++;
- }
- for (int i = 0; i < NUM_CLASSES; i++) {
- classProbabilities[i] /= numTrainingPoints;
- }
- double featureProbabilities[NUM_CLASSES][NUM_FEATURES][2] = {0};
- for (int i = 0; i < numTrainingPoints; i++) {
- struct DataPoint trainingPoint = trainingSet[i];
- int label = trainingPoint.label;
- for (int j = 0; j < NUM_FEATURES; j++) {
- double feature = trainingPoint.features[j];
- featureProbabilities[label][j][0] += feature;
- featureProbabilities[label][j][1] += pow(feature, 2);
- }
- }
- for (int i = 0; i < NUM_CLASSES; i++) {
- for (int j = 0; j < NUM_FEATURES; j++) {
- double mean = featureProbabilities[i][j][0] / classProbabilities[i];
- double variance = (featureProbabilities[i][j][1] / classProbabilities[i]) - pow(mean, 2);
- featureProbabilities[i][j][0] = mean;
- featureProbabilities[i][j][1] = variance;
- }
- }
- double maxProbability = 0;
- int predictedClass = -1;
- for (int i = 0; i < NUM_CLASSES; i++) {
- double probability = classProbabilities[i];
- for (int j = 0; j < NUM_FEATURES; j++) {
- double feature = testPoint.features[j];
- double mean = featureProbabilities[i][j][0];
- double variance = featureProbabilities[i][j][1];
- double likelihood = calculateGaussian(feature, mean, variance);
- probability *= likelihood;
- }
- if (probability > maxProbability) {
- maxProbability = probability;
- predictedClass = i;
- }
- }
- return predictedClass;
- }
复制代码游客,本帖隐藏的内容需要积分高于 9 才可浏览,您当前积分为 0
2.**C++示例:**
- #include <iostream>
- #include <cmath>
- #include <vector>
- #define NUM_CLASSES 2
- #define NUM_FEATURES 4
- struct DataPoint {
- std::vector<double> features;
- int label;
- };
- double calculateGaussian(double x, double mean, double variance) {
- double exponent = exp(-(pow(x - mean, 2) / (2 * variance)));
- return (1 / (sqrt(2 * M_PI * variance))) * exponent;
- }
- int predictClass(DataPoint testPoint, std::vector<DataPoint> trainingSet) {
- std::vector<double> classProbabilities(NUM_CLASSES, 0);
- for (int i = 0; i < trainingSet.size(); i++) {
- DataPoint trainingPoint = trainingSet[i];
- int label = trainingPoint.label;
- classProbabilities[label]++;
- }
- for (int i = 0; i < NUM_CLASSES; i++) {
- classProbabilities[i] /= trainingSet.size();
- }
- std::vector<std::vector<std::vector<double>>> featureProbabilities(NUM_CLASSES, std::vector<std::vector<double>>(NUM_FEATURES, std::vector<double>(2, 0)));
- for (int i = 0; i < trainingSet.size(); i++) {
- DataPoint trainingPoint = trainingSet[i];
- int label = trainingPoint.label;
- for (int j = 0; j < NUM_FEATURES; j++) {
- double feature = trainingPoint.features[j];
- featureProbabilities[label][j][0] += feature;
- featureProbabilities[label][j][1] += pow(feature, 2);
- }
- }
- for (int i = 0; i < NUM_CLASSES; i++) {
- for (int j = 0; j < NUM_FEATURES; j++) {
- double mean = featureProbabilities[i][j][0] / classProbabilities[i];
- double variance = (featureProbabilities[i][j][1] / classProbabilities[i]) - pow(mean, 2);
- featureProbabilities[i][j][0] = mean;
- featureProbabilities[i][j][1] = variance;
- }
- }
- double maxProbability = 0;
- int predictedClass = -1;
- for (int i = 0; i < NUM_CLASSES; i++) {
- double probability = classProbabilities[i];
- for (int j = 0; j < NUM_FEATURES; j++) {
- double feature = testPoint.features[j];
- double mean = featureProbabilities[i][j][0];
- double variance = featureProbabilities[i][j][1];
- double likelihood = calculateGaussian(feature, mean, variance);
- probability *= likelihood;
- }
- if (probability > maxProbability) {
- maxProbability = probability;
- predictedClass = i;
- }
- }
- return predictedClass;
- }
复制代码游客,本帖隐藏的内容需要积分高于 9 才可浏览,您当前积分为 0
3.**JAVA示例:**
- import java.util.ArrayList;
- import java.util.List;
- public class NaiveBayesClassifier {
- private static final int NUM_CLASSES = 2;
- private static final int NUM_FEATURES = 4;
- private static class DataPoint {
- private double[] features;
- private int label;
- public DataPoint(double[] features, int label) {
- this.features = features;
- this.label = label;
- }
- public double[] getFeatures() {
- return features;
- }
- public int getLabel() {
- return label;
- }
- }
- private static double calculateGaussian(double x, double mean, double variance) {
- double exponent = Math.exp(-(Math.pow(x - mean, 2) / (2 * variance)));
- return (1 / (Math.sqrt(2 * Math.PI * variance))) * exponent;
- }
- private static int predictClass(DataPoint testPoint, List<DataPoint> trainingSet) {
- double[] classProbabilities = new double[NUM_CLASSES];
- for (DataPoint trainingPoint : trainingSet) {
- int label = trainingPoint.getLabel();
- classProbabilities[label]++;
- }
- for (int i = 0; i < NUM_CLASSES; i++) {
- classProbabilities[i] /= trainingSet.size();
- }
- double[][][] featureProbabilities = new double[NUM_CLASSES][NUM_FEATURES][2];
- for (DataPoint trainingPoint : trainingSet) {
- int label = trainingPoint.getLabel();
- for (int j = 0; j < NUM_FEATURES; j++) {
- double feature = trainingPoint.getFeatures()[j];
- featureProbabilities[label][j][0] += feature;
- featureProbabilities[label][j][1] += Math.pow(feature, 2);
- }
- }
- for (int i = 0; i < NUM_CLASSES; i++) {
- for (int j = 0; j < NUM_FEATURES; j++) {
- double mean = featureProbabilities[i][j][0] / classProbabilities[i];
- double variance = (featureProbabilities[i][j][1] / classProbabilities[i]) - Math.pow(mean, 2);
- featureProbabilities[i][j][0] = mean;
- featureProbabilities[i][j][1] = variance;
- }
- }
- double maxProbability = 0;
- int predictedClass = -1;
- for (int i = 0; i < NUM_CLASSES; i++) {
- double probability = classProbabilities[i];
- for (int j = 0; j < NUM_FEATURES; j++) {
- double feature = testPoint.getFeatures()[j];
- double mean = featureProbabilities[i][j][0];
- double variance = featureProbabilities[i][j][1];
- double likelihood = calculateGaussian(feature, mean, variance);
- probability *= likelihood;
- }
- if (probability > maxProbability) {
- maxProbability = probability;
- predictedClass = i;
- }
- }
- return predictedClass;
- }
-
- }
复制代码 游客,本帖隐藏的内容需要积分高于 9 才可浏览,您当前积分为 0
4.**Python示例:**
- import numpy as np
- class NaiveBayesClassifier:
- def __init__(self):
- self.classes = None
- self.class_priors = None
- self.feature_probs = None
- def fit(self, X, y):
- self.classes = np.unique(y)
- self.class_priors = np.zeros(len(self.classes))
- self.feature_probs = []
- for i, c in enumerate(self.classes):
- X_c = X[y == c]
- self.class_priors[i] = len(X_c) / len(X)
- feature_probs_c = []
- for j in range(X.shape[1]):
- feature_values = np.unique(X[:, j])
- feature_probs_j = []
- for v in feature_values:
- count = len(X_c[X_c[:, j] == v])
- prob = (count + 1) / (len(X_c) + len(feature_values))
- feature_probs_j.append(prob)
- feature_probs_c.append(feature_probs_j)
- self.feature_probs.append(feature_probs_c)
- def predict(self, X):
- y_pred = []
- for x in X:
- class_probs = []
- for i, c in enumerate(self.classes):
- class_prob = np.log(self.class_priors[i])
- for j, feature_value in enumerate(x):
- prob = self.feature_probs[i][j][feature_value]
- class_prob += np.log(prob)
- class_probs.append(class_prob)
- y_pred.append(self.classes[np.argmax(class_probs)])
- return np.array(y_pred)
- # 示例用法
复制代码游客,本帖隐藏的内容需要积分高于 9 才可浏览,您当前积分为 0
这个示例代码实现了一个简单的朴素贝叶斯分类器。首先, fit 方法用于训练模型,接受特征矩阵 X 和目标向量 y 作为输入。然后, predict 方法用于预测新数据点的类别,接受特征矩阵 X 作为输入,并返回预测的类别向量 y_pred 。
请注意,这只是一个基本的示例,用于理解朴素贝叶斯分类算法的基本原理。在实际应用中,可能需要根据具体的需求和数据进行适当的调整和改进。
扫码关注微信公众号,免费查看完整算法内容。
|
|