跪求C4.5算法,C语言的……

谁给个有程序也有数据集的,还要有运行方法的。。

具体算法步骤如下;   1创建节点N   2如果训练集为空,在返回节点N标记为Failure   3如果训练集中的所有记录都属于同一个类别,则以该类别标记节点N   4如果候选属性为空,则返回N作为叶节点,标记为训练集中最普通的类;   5for each 候选属性 attribute_list   6if 候选属性是联系的then   7对该属性进行离散化   8选择候选属性attribute_list中具有最高信息增益的属性D   9标记节点N为属性D   10for each 属性D的一致值d   11由节点N长出一个条件为D=d的分支   12设s是训练集中D=d的训练样本的集合   13if s为空   14加上一个树叶,标记为训练集中最普通的类   15else加上一个有C4.5(R - {D},C,s)返回的点

C++代码你可以参考下

C4.5算法源代码(C++)
// C4.5_test.cpp : Defines the entry point for the console application.
//
#include "stdafx.h"
#include <stdio.h>
#include <math.h>
#include "malloc.h"
#include <stdlib.h>
const int MAX = 10;
int** iInput;
int i = 0;//列数
int j = 0;//行数
void build_tree(FILE *fp, int* iSamples, int* iAttribute,int ilevel);//输出规则
int choose_attribute(int* iSamples, int* iAttribute);//通过计算信息增益率选出test_attribute
double info(double dTrue,double dFalse);//计算期望信息
double entropy(double dTrue, double dFalse, double dAll);//求熵
double splitinfo(int* list,double dAll);
int check_samples(int *iSamples);//检查samples是否都在同一个类里
int check_ordinary(int *iSamples);//检查最普通的类
int check_attribute_null(int *iAttribute);//检查attribute是否为空
void get_attributes(int *iSamples,int *iAttributeValue,int iAttribute);
int _tmain(int argc, _TCHAR* argv[])
{
FILE *fp;
FILE *fp1;
char iGet;
int a = 0;
int b = 0;//a,b是循环变量
int* iSamples;
int* iAttribute;
fp = fopen("c:\\input.txt","r");
if (NULL == fp)
{
printf("error\n");
return 0;
}
iGet = getc(fp);

while (('\n' != iGet)&&(EOF != iGet))
{
if (',' == iGet)
{
i++;
}
iGet = getc(fp);
}
i++;

iAttribute = (int *)malloc(sizeof(int)*i);
for (int k = 0; k<i; k++)
{
iAttribute[k] = (int)malloc(sizeof(int));
iAttribute[k] = 1;
}
while (EOF != iGet)
{
if ('\n' == iGet)
{
j++;
}
iGet = getc(fp);
}
j++;

iInput = (int **)malloc(sizeof(int*)*j);
iSamples = (int *)malloc(sizeof(int)*j);
for (a = 0;a < j;a++)
{
iInput[a] = (int *)malloc(sizeof(int)*i);
iSamples[a] = (int)malloc(sizeof(int));
iSamples[a] = a;
}
a = 0;
fclose(fp);
fp=fopen("c:\\input.txt","r");
iGet = getc(fp);
while(EOF != iGet)
{
if ((',' != iGet)&&('\n' != iGet))
{
iInput[a][b] = iGet - 48;
b++;
}
if (b == i)
{
a++;
b = 0;
}
iGet = getc(fp);
}

fp1 = fopen("d:\\output.txt","w");
build_tree(fp1,iSamples,iAttribute,0);
fclose(fp);
return 0;
}

void build_tree(FILE * fp, int* iSamples, int* iAttribute,int level)//
{
int iTest_Attribute = 0;
int iAttributeValue[MAX];
int k = 0;
int l = 0;
int m = 0;
int *iSamples1;
for (k = 0; k<MAX; k++)
{
iAttributeValue[k] = -1;
}
if (0 == check_samples(iSamples))
{
fprintf(fp,"result: %d\n",iInput[iSamples[0]][i-1]);
return;
}
if (1 == check_attribute_null(iAttribute))
{
fprintf(fp,"result: %d\n",check_ordinary(iSamples));
return;
}
iTest_Attribute = choose_attribute(iSamples,iAttribute);
iAttribute[iTest_Attribute] = -1;
get_attributes(iSamples,iAttributeValue,iTest_Attribute);
k = 0;
while ((-1 != iAttributeValue[k])&&(k < MAX))
{
l = 0;
m = 0;
while ((-1 != iSamples[l])&&(l < j))
{
if (iInput[iSamples[l]][iTest_Attribute] == iAttributeValue[k])
{
m++;
}
l++;
}
iSamples1 = (int *)malloc(sizeof(int)*(m+1));
l = 0;
m = 0;
while ((-1 != iSamples[l])&&(l < j))
{
if (iInput[iSamples[l]][iTest_Attribute] == iAttributeValue[k])
{
iSamples1[m] = iSamples[l];
m++;
}
l++;
}
iSamples1[m] = -1;
if (-1 == iSamples1[0])
{
fprintf(fp,"result: %d\n",check_ordinary(iSamples));
return;
}
fprintf(fp,"level%d: %d = %d\n",level,iTest_Attribute,iAttributeValue[k]);
build_tree(fp,iSamples1,iAttribute,level+1);
k++;
}
}
int choose_attribute(int* iSamples, int* iAttribute)
{
int iTestAttribute = -1;
int k = 0;
int l = 0;
int m = 0;
int n = 0;
int iTrue = 0;
int iFalse = 0;
int iTrue1 = 0;
int iFalse1 = 0;
int iDepart[MAX];
int iRecord[MAX];
double dEntropy = 0.0;
double dGainratio = 0.0;
double test = 0.0;

for (k = 0;k<MAX;k++)
{
iDepart[k] = -1;
iRecord[k] = 0;
}
k = 0;
while ((l!=2)&&(k<(i - 1)))
{
if (iAttribute[k] == -1)
{
l++;
}
k++;
}
if (l == 1)
{
for (k = 0;k<(k-1);k++)
{
if (iAttribute[k] == -1)
{
return iAttribute[k];
}
}
}
for (k = 0;k < (i-1);k++)
{
l = 0;
iTrue = 0;
iFalse = 0;
if (iAttribute[k] != -1)
{
while ((-1 != iSamples[l])&&(l < j))
{
if (0 == iInput[iSamples[l]][i-1])
{
iFalse++;
}
if (1 == iInput[iSamples[l]][i-1])
{
iTrue++;
}
l++;
}
for (n = 0;n<l;n++)//计算该属性有多少不同的值并记录
{
m = 0;
while((iDepart[m]!=-1)&&(m!=MAX))
{
if (iInput[iSamples[n]][iAttribute[k]] == iDepart[m])
{
break;
}
m++;
}
if (-1 == iDepart[m])
{
iDepart[m] = iInput[iSamples[n]][iAttribute[k]];
}
}
while ((iDepart[m] != -1)&&(m!=MAX))
{
for (n = 0;n<l;n++)
{
if (iInput[iSamples[n]][iAttribute[k]] == iDepart[m])
{
if (1 == iInput[iSamples[n]][i-1])
{
iTrue1++;
}
if (0 == iInput[iSamples[n]][i-1])
{
iFalse1++;
}
iRecord[m]++;
}
}

dEntropy += entropy((double)iTrue1,(double)iFalse1,(double)l);
iTrue1 = 0;
iFalse1 = 0;
m++;
}
double dSplitinfo = splitinfo(iRecord,(double)l);
if (-1 == iTestAttribute)
{
iTestAttribute = k;
dGainratio = (info((double)iTrue,(double)iFalse)-dEntropy)/dSplitinfo;
}
else
{
test = (info((double)iTrue,(double)iFalse)-dEntropy)/dSplitinfo;
if (dGainratio < test)
{
iTestAttribute = k;
dGainratio = test;
}
}
}
}
return iTestAttribute;
}
double info(double dTrue,double dFalse)
{
double dInfo = 0.0;
dInfo = ((dTrue/(dTrue+dFalse))*(log(dTrue/(dTrue+dFalse))/log(2.0))+(dFalse/(dTrue+dFalse))*(log(dFalse/(dTrue+dFalse))/log(2.0)))*(-1);
return dInfo;
}
double entropy(double dTrue, double dFalse, double dAll)
{
double dEntropy = 0.0;
dEntropy = (dTrue + dFalse)*info(dTrue,dFalse)/dAll;
return dEntropy;
}
double splitinfo(int* list,double dAll)
{
int k = 0;
double dSplitinfo = 0.0;
while (0!=list[k])
{
dSplitinfo -= ((double)list[k]/(double)dAll)*(log((double)list[k]/(double)dAll));
k++;
}
return dSplitinfo;
}
int check_samples(int *iSamples)
{
int k = 0;
int b = 0;
while ((-1 != iSamples[k])&&(k < j-1))
{
if (iInput[k][i-1] != iInput[k+1][i-1])
{
b = 1;
break;
}
k++;
}
return b;
}
int check_ordinary(int *iSamples)
{
int k = 0;
int iTrue = 0;
int iFalse = 0;
while ((-1 != iSamples[k])&&(k < i))
{
if (0 == iInput[iSamples[k]][i-1])
{
iFalse++;
}
else
{
iTrue++;
}
k++;
}
if (iTrue >= iFalse)
{
return 1;
}
else
{
return 0;
}
}
int check_attribute_null(int *iAttribute)
{
int k = 0;
while (k < (i-1))
{
if (-1 != iAttribute[k])
{
return 0;
}
k++;
}
return 1;
}
void get_attributes(int *iSamples,int *iAttributeValue,int iAttribute)
{
int k = 0;
int l = 0;
while ((-1 != iSamples[k])&&(k < j))
{
l = 0;
while (-1 != iAttributeValue[l])
{
if (iInput[iSamples[k]][iAttribute] == iAttributeValue[l])
{
break;
}
l++;
}
if (-1 == iAttributeValue[l])
{
iAttributeValue[l] = iInput[iSamples[k]][iAttribute];
}
k++;
}
}追问

数据集你有吗,就是那个input.txt。发给我吧[email protected]

追答

不好意思没有呢,你可以搜一下,网上应该有你想要的

温馨提示:答案为网友推荐,仅供参考
第1个回答  2012-05-04
没法上传附件,给我邮箱发个邮件吧 [email protected],我发给你
第2个回答  2012-05-04
楼上写的好长,收藏了.顺便问一句,这是什么书上的