www.gusucode.com > 一个相对很完善的数据挖掘系统源码程序 > 一个相对很完善的数据挖掘系统源码程序/Discover/TreeViewDMfunction.cpp
// TreeViewDMfunction.cpp : implementation file // #include "stdafx.h" #include "Discover.h" #include "colledoc.h" #include "TreeViewDMfunction.h" #include "MainFrm.h" #include "DummyList.h" #include "ClassTree.h" #include "AprioriDM.h" #include <math.h> #ifdef _DEBUG #define new DEBUG_NEW #undef THIS_FILE static char THIS_FILE[] = __FILE__; #endif //extern CCollectDoc::IsStrItemInclude(CString strSmall,CString strBig); ///////////////////////////////////////////////////////////////////////////// // CTreeViewDMfunction IMPLEMENT_DYNCREATE(CTreeViewDMfunction, CTreeView) CTreeViewDMfunction::CTreeViewDMfunction() { } CTreeViewDMfunction::~CTreeViewDMfunction() { } BEGIN_MESSAGE_MAP(CTreeViewDMfunction, CTreeView) //{{AFX_MSG_MAP(CTreeViewDMfunction) ON_NOTIFY_REFLECT(TVN_ITEMEXPANDING, OnItemexpanding) ON_NOTIFY_REFLECT(TVN_SELCHANGED, OnSelchanged) ON_WM_CREATE() ON_WM_LBUTTONDOWN() //}}AFX_MSG_MAP END_MESSAGE_MAP() ///////////////////////////////////////////////////////////////////////////// // CTreeViewDMfunction drawing CCollectDoc* CTreeViewDMfunction::GetDocument() // non-debug version is inline { return STATIC_DOWNCAST(CCollectDoc, m_pDocument); //ASSERT(m_pDocument->IsKindOf(RUNTIME_CLASS(CCollectDoc))); // return (CCollectDoc*)m_pDocument; } void CTreeViewDMfunction::OnDraw(CDC* pDC) { // CDocument* pDoc = GetDocument(); CCollectDoc* pDoc = GetDocument(); } ///////////////////////////////////////////////////////////////////////////// // CTreeViewDMfunction diagnostics #ifdef _DEBUG void CTreeViewDMfunction::AssertValid() const { CTreeView::AssertValid(); } void CTreeViewDMfunction::Dump(CDumpContext& dc) const { CTreeView::Dump(dc); } #endif //_DEBUG ///////////////////////////////////////////////////////////////////////////// // CTreeViewDMfunction message handlers void CTreeViewDMfunction::OnItemexpanding(NMHDR* pNMHDR, LRESULT* pResult) { NM_TREEVIEW* pNMTreeView = (NM_TREEVIEW*)pNMHDR; // TODO: Add your control notification handler code here *pResult = 0; } void CTreeViewDMfunction::OnSelchanged(NMHDR* pNMHDR, LRESULT* pResult) { NM_TREEVIEW* pNMTreeView = (NM_TREEVIEW*)pNMHDR; // TODO: Add your control notification handler code here *pResult = 0; } static CString csTree[] = { _T("Dataming functions"), _T("Association"),//关联规则 1 _T("Apriori"), _T("C4.5"), _T("FTP-GrowTree"), _T("Other"), _T("Classification"),//分类 6 _T("ID3"), _T("C4.5"), _T("K-Nearest Neighbor"), _T("Neural Network"), _T("Clustering"),//聚类 11 _T("Apriori"), _T("C4.5"), _T("Other"), _T("Regression"),//回归 15 _T("Apriori"), _T("C4.5"), _T("Other"), }; int CTreeViewDMfunction::OnCreate(LPCREATESTRUCT lpCreateStruct) { if (CTreeView::OnCreate(lpCreateStruct) == -1) return -1; m_TreeCtrl = &GetTreeCtrl(); // create the image list for the tree control m_ImageList.Create (IDB_IL_FILE, 16, 1, RGB(0,255,0)); m_TreeCtrl->SetImageList (&m_ImageList, TVSIL_NORMAL); // add the parent item, make it bold HTREEITEM htiParent = m_TreeCtrl->InsertItem (csTree[0]); HTREEITEM htiChild; // child item htiChild = m_TreeCtrl->InsertItem (csTree[1], 1, 1, htiParent, TVI_LAST); m_TreeCtrl->SetItemState (htiChild, TVIS_BOLD, TVIS_BOLD); // add the children of the parent item for (int i = 2; i < 6; i++) { m_TreeCtrl->InsertItem (csTree[i], 2, 3, htiChild, TVI_LAST); } //_T("Classification"),//分类 6 htiChild = m_TreeCtrl->InsertItem (csTree[6], 1, 1, htiParent, TVI_LAST); m_TreeCtrl->SetItemState (htiChild, TVIS_BOLD, TVIS_BOLD); for (i = 7; i < 11; i++) { m_TreeCtrl->InsertItem (csTree[i], 2, 3, htiChild, TVI_LAST); } //_T("Clustering"),//聚类 11 htiChild = m_TreeCtrl->InsertItem (csTree[11], 1, 1, htiParent, TVI_LAST); m_TreeCtrl->SetItemState (htiChild, TVIS_BOLD, TVIS_BOLD); for (i = 12; i < 15; i++) { m_TreeCtrl->InsertItem (csTree[i], 2, 3, htiChild, TVI_LAST); } //_T("Regression"),//回归 15 htiChild = m_TreeCtrl->InsertItem (csTree[15], 1, 1, htiParent, TVI_LAST); m_TreeCtrl->SetItemState (htiChild, TVIS_BOLD, TVIS_BOLD); for (i = 16; i < 19; i++) { m_TreeCtrl->InsertItem (csTree[i], 2, 3, htiChild, TVI_LAST); } m_TreeCtrl->Expand (htiParent, TVE_EXPAND); m_TreeCtrl->Expand (htiChild, TVE_EXPAND); return 0; } BOOL CTreeViewDMfunction::PreCreateWindow(CREATESTRUCT& cs) { // TODO: Add your specialized code here and/or call the base class cs.style |= TVS_HASBUTTONS | TVS_HASLINES | TVS_LINESATROOT; return CTreeView::PreCreateWindow(cs); } void CTreeViewDMfunction::OnLButtonDown(UINT nFlags, CPoint point) { UINT m_uFlags; m_TreeCtrl=&(this->GetTreeCtrl()); HTREEITEM hItem =m_TreeCtrl->HitTest(point, &m_uFlags); if ((hItem != NULL) && (m_uFlags&TVHT_ONITEM)) { m_TreeCtrl->Select(hItem,TVGN_CARET);//上面HitTest语句定位到TreeItem,本句选中该Item CString sTreeLabel=m_TreeCtrl->GetItemText(hItem); _bstr_t sConnection; CStringArray* SingleItemArray;//项目集中的单项组成的字符串数组 CStringArray* DoubleItemArray;//项目集中的单项组成的字符串数组 CStringArray* TriItemArray;//项目集中的单项组成的字符串数组 POSITION pos; //HRESULT hr;//数据库操作返回值 _variant_t RecordsAffected; _variant_t vField1,vField2,vField3,vField4;//数据库表各个字段对应的变量 try { BeginWaitCursor(); m_pConnection=theApp.m_pConnection; m_pRecordset=theApp.m_pRecordset; FieldsPtr myFieldsPtr; myFieldsPtr=m_pRecordset->GetFields( ); FieldPtr myFieldPtr; _bstr_t FiledName; long RecordCount;//总记录个数 //m_pRecordset->get_RecordCount(RecordCount); RecordCount=m_pRecordset->GetRecordCount(); int FiledsCount=myFieldsPtr->Count;//字段个数 //_variant_t FiledValue[15]; CStringArray FiledValue; FiledValue.SetSize(FiledsCount); long ml; for(ml=0;ml<FiledsCount;ml++) { myFieldPtr=myFieldsPtr->GetItem(ml); FiledName=myFieldPtr->GetName(); } POSITION DCPpos=AfxGetApp()->GetFirstDocTemplatePosition( ) ;//获得第一个文档模板位置 CDocTemplate *pDCP=AfxGetApp()->GetNextDocTemplate(DCPpos ) ;//获得第一个文档模板的指针 POSITION DOCpos=pDCP->GetFirstDocPosition( ) ; //获得第一个文档模板指向的第一个文档的位置 CCollectDoc *m_pDoc=(CCollectDoc *)pDCP->GetNextDoc(DOCpos) ;//获得第一个文档模板指向的第一个文档的指针 //Apriori if( m_TreeCtrl->GetItemText(m_TreeCtrl->GetParentItem(hItem))=="Association" ) { if(sTreeLabel=="Apriori") { CAprioriStruct* pAprioriStruct=new CAprioriStruct;//由每条记录内容得到的用于将来插入m_AprioristructList中的结构 CAprioriStruct* pTempAprioriStruct=new CAprioriStruct;//临时结构,用于排序等操作 CMainFrame* appMainWnd=(CMainFrame*)AfxGetMainWnd();//得到主框架指针 appMainWnd->m_wndSplitter3.DeleteView(0, 0); appMainWnd->m_wndSplitter3.CreateView(0,0,RUNTIME_CLASS(CAprioriDM),CSize(380,100),NULL); //appMainWnd->m_wndSplitter3.SetRowInfo( 0,500, 200 ); appMainWnd->m_wndSplitter3.RecalcLayout(); CAprioriDM *m_Apriorilist= (CAprioriDM *)appMainWnd->m_wndSplitter3.GetPane(0,0);//得到 CAprioriDM FormView对象的指针 CListBox* pApriorilistBox1 =(CListBox *)m_Apriorilist->GetDlgItem(IDC_LIST1);//获得List Box列表控件 CListBox* pApriorilistBox2 =(CListBox *)m_Apriorilist->GetDlgItem(IDC_LIST2);//获得List Box列表控件 CListBox* pApriorilistBox3 =(CListBox *)m_Apriorilist->GetDlgItem(IDC_LIST3);//获得List Box列表控件 m_pDoc->m_AprioristructList1.RemoveAll();//delete pAprioriStruct m_pDoc->m_AprioristructList2.RemoveAll(); m_pDoc->m_AprioristructList3.RemoveAll(); /*验证 CCollectDoc::GenAssoRuleStr 函数*/ /*验证 CCollectDoc::GenAssoRuleStr 函数*/ m_pRecordset->MoveFirst(); long i=0; int nSingleItemArray;//项目集中的单项组成的字符串数组中各单项元素的个数 int nDoubleItemArray;//项目集中的两项组成的字符串数组中各两项元素的个数 int nTriItemArray;//项目集中的三项组成的字符串数组中各三项元素的个数 int strcount;//循环次数 int FlagSmaller=0;//标记记录中的内容单项小于列表中的单项内容,比如I2<I3 while(!m_pRecordset->adoEOF)//循环处理所有记录 { //vField1 = m_pRecordset->GetCollect(_variant_t((long)0));///取得第1列的值,从0开始计数,也可以直接给出列的名称,如下一行 // vField2 = m_pRecordset->GetCollect("ZN");///取得ZN字段的值 //lc.InsertItem(i,(LPCTSTR)(_bstr_t)vField1); //pAprioriStruct->m_nFrequentItemCount=(long)vField1; for(ml=0;ml<FiledsCount-1;ml++) { //FiledValue[ml+1]=m_pRecordset->GetCollect(_variant_t((long)(ml+1))); FiledValue.InsertAt(ml,(LPCTSTR)(_bstr_t)(m_pRecordset->GetCollect(_variant_t((long)(ml+1))))); if(FiledValue.GetAt(ml) != VT_NULL) { //pAprioriStruct->m_strFrequentItem=(LPCTSTR)(_bstr_t)FiledValue[ml]; SingleItemArray=m_pDoc->DevideStr((LPCTSTR)(_bstr_t)FiledValue[ml],','); DoubleItemArray=m_pDoc->DevideDoubleStr((LPCTSTR)(_bstr_t)FiledValue[ml],','); TriItemArray=m_pDoc->DevideTriStr((LPCTSTR)(_bstr_t)FiledValue[ml],','); } } nSingleItemArray=SingleItemArray->GetSize( ); nDoubleItemArray=DoubleItemArray->GetSize( ); nTriItemArray=TriItemArray->GetSize( ); for(strcount=0;strcount<nSingleItemArray;strcount++)//循环用于将每条记录项目分解后的每个单项压入m_AprioristructList1 { pos = m_pDoc->m_AprioristructList1.GetHeadPosition(); pAprioriStruct=new CAprioriStruct; pAprioriStruct->m_nFrequentItemCount=1; pAprioriStruct->m_strFrequentItem=SingleItemArray->GetAt(strcount); while (1)//循环判断如果列表中已经存在同样的单项,则将其个数累加;如果小于某个列表中单项,则插入;如果大于列表中最后一个单项,则在最后add一个单项 { if (pos==NULL) break; pTempAprioriStruct = m_pDoc->m_AprioristructList1.GetNext(pos); if ((pTempAprioriStruct->m_strFrequentItem)>(pAprioriStruct->m_strFrequentItem)) { if (pos==NULL) pos=m_pDoc->m_AprioristructList1.GetTailPosition();//如果前面 GetNext(pos)已经到达末尾之后,将pos设置为末尾 else m_pDoc->m_AprioristructList1.GetPrev(pos); m_pDoc->m_AprioristructList1.InsertBefore(pos,pAprioriStruct); FlagSmaller=1;//标记记录中的内容单项小于列表中的单项内容 break; } if ((pTempAprioriStruct->m_strFrequentItem)<(pAprioriStruct->m_strFrequentItem)) { FlagSmaller=0;//标记记录中的内容单项不小于列表中的单项内容 continue; } if ((pTempAprioriStruct->m_strFrequentItem)==(pAprioriStruct->m_strFrequentItem)) { pTempAprioriStruct->m_nFrequentItemCount++; if (pos==NULL) pos=m_pDoc->m_AprioristructList1.GetTailPosition();//如果前面 GetNext(pos)已经到达末尾之后,将pos设置为末尾 else m_pDoc->m_AprioristructList1.GetPrev(pos);//仅仅为了将pos提前 pAprioriStruct=pTempAprioriStruct; m_pDoc->m_AprioristructList1.SetAt( pos,pAprioriStruct);//如果m_AprioristructList1中已经有此单项,则将pAprioriStruct->m_nFrequentItemCount累加 FlagSmaller=0;//标记记录中的内容单项不小于列表中的单项内容 break; } } if(pAprioriStruct->m_nFrequentItemCount==1 && FlagSmaller==0)//这里相当于一个标记,当上面循环中没有找到m_AprioristructList2中同样的单项 m_pDoc->m_AprioristructList1.AddTail(pAprioriStruct);//如果m_AprioristructList1中没有此单项,则在尾部加入该单项 } for(strcount=0;strcount<nDoubleItemArray;strcount++)//循环用于将每条记录项目分解后的每个两项压入m_AprioristructList2 { pos = m_pDoc->m_AprioristructList2.GetHeadPosition(); pAprioriStruct=new CAprioriStruct; pAprioriStruct->m_nFrequentItemCount=1; pAprioriStruct->m_strFrequentItem=DoubleItemArray->GetAt(strcount); while (1)//循环判断如果列表中已经存在同样的两项,则将其个数累加;如果小于某个列表中两项,则插入;如果大于列表中最后一个两项,则在最后add一个两项 { if (pos==NULL) break; pTempAprioriStruct = m_pDoc->m_AprioristructList2.GetNext(pos); if ((pTempAprioriStruct->m_strFrequentItem)>(pAprioriStruct->m_strFrequentItem)) { if (pos==NULL) pos=m_pDoc->m_AprioristructList2.GetTailPosition();//如果前面 GetNext(pos)已经到达末尾之后,将pos设置为末尾 else m_pDoc->m_AprioristructList2.GetPrev(pos); m_pDoc->m_AprioristructList2.InsertBefore(pos,pAprioriStruct); FlagSmaller=1;//标记记录中的内容两项小于列表中的两项内容 break; } if ((pTempAprioriStruct->m_strFrequentItem)<(pAprioriStruct->m_strFrequentItem)) { FlagSmaller=0;//标记记录中的内容两项不小于列表中的两项内容 continue; } if ((pTempAprioriStruct->m_strFrequentItem)==(pAprioriStruct->m_strFrequentItem)) { pTempAprioriStruct->m_nFrequentItemCount++; if (pos==NULL) pos=m_pDoc->m_AprioristructList2.GetTailPosition();//如果前面 GetNext(pos)已经到达末尾之后,将pos设置为末尾 else m_pDoc->m_AprioristructList2.GetPrev(pos);//仅仅为了将pos提前 pAprioriStruct=pTempAprioriStruct; m_pDoc->m_AprioristructList2.SetAt( pos,pAprioriStruct);//如果m_AprioristructList2中已经有此两项,则将pAprioriStruct->m_nFrequentItemCount累加 FlagSmaller=0;//标记记录中的内容两项不小于列表中的两项内容 break; } } if(pAprioriStruct->m_nFrequentItemCount==1 && FlagSmaller==0)//这里相当于一个标记,当上面循环中没有找到m_AprioristructList2中同样的两项 m_pDoc->m_AprioristructList2.AddTail(pAprioriStruct);//如果m_AprioristructList2中没有此两项,则在尾部加入该两项 } for(strcount=0;strcount<nTriItemArray;strcount++)//循环用于将每条记录项目分解后的每个三项压入m_AprioristructList3 { pos = m_pDoc->m_AprioristructList3.GetHeadPosition(); pAprioriStruct=new CAprioriStruct; pAprioriStruct->m_nFrequentItemCount=1; pAprioriStruct->m_strFrequentItem=TriItemArray->GetAt(strcount); while (1)//循环判断如果列表中已经存在同样的三项,则将其个数累加;如果小于某个列表中三项,则插入;如果大于列表中最后一个三项,则在最后add一个三项 { if (pos==NULL) break; pTempAprioriStruct = m_pDoc->m_AprioristructList3.GetNext(pos); if ((pTempAprioriStruct->m_strFrequentItem)>(pAprioriStruct->m_strFrequentItem)) { if (pos==NULL) pos=m_pDoc->m_AprioristructList3.GetTailPosition();//如果前面 GetNext(pos)已经到达末尾之后,将pos设置为末尾 else m_pDoc->m_AprioristructList3.GetPrev(pos); m_pDoc->m_AprioristructList3.InsertBefore(pos,pAprioriStruct); FlagSmaller=1;//标记记录中的内容三项小于列表中的三项内容 break; } if ((pTempAprioriStruct->m_strFrequentItem)<(pAprioriStruct->m_strFrequentItem)) { FlagSmaller=0;//标记记录中的内容三项不小于列表中的三项内容 continue; } if ((pTempAprioriStruct->m_strFrequentItem)==(pAprioriStruct->m_strFrequentItem)) { pTempAprioriStruct->m_nFrequentItemCount++; if (pos==NULL) pos=m_pDoc->m_AprioristructList3.GetTailPosition();//如果前面 GetNext(pos)已经到达末尾之后,将pos设置为末尾 else m_pDoc->m_AprioristructList3.GetPrev(pos);//仅仅为了将pos提前 pAprioriStruct=pTempAprioriStruct; m_pDoc->m_AprioristructList3.SetAt( pos,pAprioriStruct);//如果m_AprioristructList3中已经有此三项,则将pAprioriStruct->m_nFrequentItemCount累加 FlagSmaller=0;//标记记录中的内容三项不小于列表中的三项内容 break; } } if(pAprioriStruct->m_nFrequentItemCount==1 && FlagSmaller==0)//这里相当于一个标记,当上面循环中没有找到m_AprioristructList3中同样的三项 m_pDoc->m_AprioristructList3.AddTail(pAprioriStruct);//如果m_AprioristructList3中没有此三项,则在尾部加入该三项 } m_pRecordset->MoveNext();//移到下一条记录 i++; } //显示到频繁项 listbox 中 char buffer[20]; pos = m_pDoc->m_AprioristructList1.GetHeadPosition(); while (pos != NULL) { pAprioriStruct=m_pDoc->m_AprioristructList1.GetNext(pos); pApriorilistBox1->AddString(pAprioriStruct->m_strFrequentItem+"->"+ltoa(pAprioriStruct->m_nFrequentItemCount,buffer,10)); } pos = m_pDoc->m_AprioristructList2.GetHeadPosition(); while (pos != NULL) { pAprioriStruct=m_pDoc->m_AprioristructList2.GetNext(pos); pApriorilistBox2->AddString(pAprioriStruct->m_strFrequentItem+"->"+ltoa(pAprioriStruct->m_nFrequentItemCount,buffer,10)); } pos = m_pDoc->m_AprioristructList3.GetHeadPosition(); while (pos != NULL) { pAprioriStruct=m_pDoc->m_AprioristructList3.GetNext(pos); pApriorilistBox3->AddString(pAprioriStruct->m_strFrequentItem+"->"+ltoa(pAprioriStruct->m_nFrequentItemCount,buffer,10)); } //显示到频繁项 listbox 中 } } if( m_TreeCtrl->GetItemText(m_TreeCtrl->GetParentItem(hItem))=="Classification" ) { if(sTreeLabel=="ID3") { int nAttribute=FiledsCount-2;//特征数=字段数-2 除去 序号、类别 2个字段 //CFieldInfoArray FieldInfoArray; //FieldInfoArray.SetSize( nAttribute);//特征信息的数组的元素个数-即特征个数 //得到树控件指针 CMainFrame* appMainWnd=(CMainFrame*)AfxGetMainWnd();//得到主框架指针 appMainWnd->m_wndSplitter3.DeleteView(0, 0); appMainWnd->m_wndSplitter3.CreateView(0,0,RUNTIME_CLASS(CClassTree),CSize(380,100),NULL); appMainWnd->m_wndSplitter3.RecalcLayout(); CClassTree* listInPane=(CClassTree*)appMainWnd->m_wndSplitter3.GetPane(0,0);//得到CClassTree对象的指针 CTreeCtrl& lt=listInPane-> GetTreeCtrl();//得到树控件CTreeCtrl的指针 lt.DeleteAllItems(); // add the parent item, make it bold HTREEITEM htiParent = lt.InsertItem ("ID3 Tree"); //HTREEITEM htiChild,htiGrandson; // child,Grandson item sTableName=theApp.sTableName;//间接取得CTreeViewDataManage类中的表名 sSQL=theApp.sSQL;////间接取得CTreeViewDataManage类中的SQL语句 //调用递归建树函数调用 RecursionTree(htiParent,m_pRecordset,nAttribute,sSQL); } } else return; } catch(_com_error e)///捕捉异常 { CString errormessage; errormessage.Format("连接数据库失败!\r\n错误信息:%s",e.ErrorMessage()); AfxMessageBox(errormessage);///显示错误信息 if (m_pRecordset->State!=adStateClosed) m_pRecordset->Close(); if (m_pConnection->State!=adStateClosed) m_pConnection->Close(); } EndWaitCursor(); } CMainFrame* appMainWnd=(CMainFrame*)AfxGetMainWnd();//得到主框架指针 //appMainWnd->m_wndStatusBar.SetPaneText(0,"This is an apple"); appMainWnd->m_wndStatusBar.UpdateWindow(); CTreeView::OnLButtonDown(nFlags, point); } void CTreeViewDMfunction::RecordTree(_RecordsetPtr m_pRecordset,CFieldInfoArray& FieldInfoArray,int nAttribute) //ID3算法中,循环处理所有记录的各个特征,返回记录特征信息,用于计算互信息及建树 //m_pRecordset-传递进来的最新的包含特征值条件的记录子集,但RecursionTree最早调用时,是对全部的记录集 //FieldInfoArray- 对应新记录子集m_pRecordset的各个特征的CFiledInfo对象的数组 //nAttribute-特征个数 { POSITION DCPpos=AfxGetApp()->GetFirstDocTemplatePosition( ) ;//获得第一个文档模板位置 CDocTemplate *pDCP=AfxGetApp()->GetNextDocTemplate(DCPpos ) ;//获得第一个文档模板的指针 POSITION DOCpos=pDCP->GetFirstDocPosition( ) ; //获得第一个文档模板指向的第一个文档的位置 CCollectDoc *m_pDoc=(CCollectDoc *)pDCP->GetNextDoc(DOCpos) ;//获得第一个文档模板指向的第一个文档的指针 FieldsPtr myFieldsPtr; myFieldsPtr=m_pRecordset->GetFields( ); FieldPtr myFieldPtr; _bstr_t FiledName; int FiledsCount=myFieldsPtr->Count;//字段个数 for(long ml=0;ml<FiledsCount;ml++) { myFieldPtr=myFieldsPtr->GetItem(ml); FiledName=myFieldPtr->GetName(); } CString sAttributeValue;//特征值,统一按字符串处理,后期再调整 //下面循环用于给各个特征的sFieldName赋值,提前填满集合中的所有元素 for(ml=1;ml<=nAttribute;ml++)//1表示第一个有效特征是在第一个非特征字段“序号”后的第二个 { CFiledInfo *TempFiledInfo=new CFiledInfo;//(nAttribute); myFieldPtr=myFieldsPtr->GetItem((ml)); TempFiledInfo->sFieldName=(LPCTSTR)(_bstr_t)myFieldPtr->GetName(); TempFiledInfo->nFieldValueTypeCount=0; FieldInfoArray.Add(TempFiledInfo); } //上面循环用于给各个特征的sFieldName赋值 m_pRecordset->MoveFirst(); while(!m_pRecordset->adoEOF)//循环处理所有记录 { //vField2 = m_pRecordset->GetCollect(_variant_t((long)1));///取得第2列的值-即第一个特征值 CString stempType=(LPCTSTR)(_bstr_t)(m_pRecordset->GetCollect("类别"));// 临时存放类别记录值 for(ml=0;ml<nAttribute;ml++) { CFiledInfo *TempFiledInfo=FieldInfoArray.GetAt(ml); sAttributeValue=(LPCTSTR)(_bstr_t)(m_pRecordset->GetCollect(_variant_t((long)(ml+1))));// +1 表示从第二个字段开始作为特征值 if (!sAttributeValue.IsEmpty() ) { int InPosition=m_pDoc->IsInStringArray(sAttributeValue,&TempFiledInfo->FieldValueList); if(!InPosition)//特征值没有出现在字符串数组中 { TempFiledInfo->nFieldValueTypeCount++; TempFiledInfo->FieldValueList.Add(sAttributeValue); TempFiledInfo->FieldValueCount.Add(1); if(stempType=="P" || stempType=="1") { TempFiledInfo->PositiveClassCount.Add(1); TempFiledInfo->NegativeClassCount.Add(0); } else { TempFiledInfo->NegativeClassCount.Add(1); TempFiledInfo->PositiveClassCount.Add(0); } } else { TempFiledInfo->FieldValueCount[InPosition-1]++; if(stempType=="P" || stempType=="1") TempFiledInfo->PositiveClassCount[InPosition-1]++; else TempFiledInfo->NegativeClassCount[InPosition-1]++; } FieldInfoArray.SetAt(ml,TempFiledInfo); } } m_pRecordset->MoveNext();//移到下一条记录 } // for(ml=1;ml<=nAttribute;ml++)//验证是否加入新元素 // { // CFiledInfo *TempFiledInfo=FieldInfoArray.GetAt(ml-1); // CString display; // int nFieldValueList=TempFiledInfo->FieldValueList.GetSize(); // int nFieldValueCount=TempFiledInfo->FieldValueCount.GetSize(); // int nPositiveClassCount=TempFiledInfo->PositiveClassCount.GetSize(); // int nNegativeClassCount=TempFiledInfo->NegativeClassCount.GetSize(); // // MessageBox(TempFiledInfo->sFieldName); // for(int zer=0;zer<TempFiledInfo->nFieldValueTypeCount;zer++) // { // display.Format("特征值:%s 一共%d个,P类%d个,N类%d个", // TempFiledInfo->FieldValueList.GetAt(zer), // TempFiledInfo->FieldValueCount.GetAt(zer), // TempFiledInfo->PositiveClassCount.GetAt(zer), // TempFiledInfo->NegativeClassCount.GetAt(zer)); // //MessageBox(TempFiledInfo->FieldValueList.GetAt(zer)); // MessageBox(display); // } // }//验证是否加入新元素 //return FieldInfoArray; } int CTreeViewDMfunction::IuvCaculate(CFieldInfoArray& FieldInfoArray,int nAttribute) //计算互信息,返回互信息最大的属性对应的Filed序号 //FieldInfoArray- 对应新记录子集m_pRecordset的各个特征的CFiledInfo对象的数组 //nAttribute-特征个数 { //计算互信息 /* 信息熵: H(U)=-∑P(Ui)Log(P(Ui)) i 条件熵:H(U/V)=-∑P(Vi)∑(P(Ui/Vj))Log(P(Ui/Vj)) j i, 互信息: I(U,V)=H(U)-H(U/V) */ double Hu;//信息熵 CArray<double,double>Huv,Iuv;//条件熵,互信息 double Pu1,Pu2; double Pcount=0,NCount=0; double IuvMax=0;//存放临时的最大互信息值 int AttributeIuvMax;//取得最大互信息值对应的属性序号 //计算信息熵 CFiledInfo *TempFiledInfo=FieldInfoArray.GetAt(0); for(int zer=0;zer<TempFiledInfo->nFieldValueTypeCount;zer++) { Pcount+=TempFiledInfo->PositiveClassCount.GetAt(zer); NCount+=TempFiledInfo->NegativeClassCount.GetAt(zer); } Pu1=Pcount/(Pcount+NCount); Pu2=NCount/(Pcount+NCount); Hu=-Pu1*log(Pu1)/log(2.0)-Pu2*log(Pu2)/log(2.0); //计算信息熵end for(long ml=0;ml<nAttribute;ml++) { CFiledInfo *TempFiledInfo=FieldInfoArray.GetAt(ml); for(int zer=0;zer<TempFiledInfo->nFieldValueTypeCount;zer++) { TempFiledInfo->FieldValueList.GetAt(zer); double nFieldValueCount=TempFiledInfo->FieldValueCount.GetAt(zer); double nPositiveClassCount=TempFiledInfo->PositiveClassCount.GetAt(zer); double nNegativeClassCount=TempFiledInfo->NegativeClassCount.GetAt(zer); double HuvAdded;//每个特征取不同值的Huv值,用于累加到该特征值的Huv计算 if(Huv.GetSize()==ml) //如果 Huv 数组中只存放了第ml-1个特征的条件熵的值,则需要将当前的第ml个特征的条件熵的值append到数组中 { if (nPositiveClassCount==0 || nNegativeClassCount==0) Huv.Add(0.0); else Huv.Add(-nFieldValueCount/(Pcount+NCount) *(nPositiveClassCount/(nPositiveClassCount+nNegativeClassCount)*log(nPositiveClassCount/(nPositiveClassCount+nNegativeClassCount))/log(2) +nNegativeClassCount/(nPositiveClassCount+nNegativeClassCount)*log(nNegativeClassCount/(nPositiveClassCount+nNegativeClassCount))/log(2))); } else { if (nPositiveClassCount==0 || nNegativeClassCount==0) HuvAdded=0; else HuvAdded=-nFieldValueCount/(Pcount+NCount) *(nPositiveClassCount/(nPositiveClassCount+nNegativeClassCount)*log(nPositiveClassCount/(nPositiveClassCount+nNegativeClassCount))/log(2) +nNegativeClassCount/(nPositiveClassCount+nNegativeClassCount)*log(nNegativeClassCount/(nPositiveClassCount+nNegativeClassCount))/log(2)); Huv.SetAt(ml,Huv.GetAt(ml)+HuvAdded); } } Iuv.Add(Hu-Huv[ml]);//计算互信息 if(IuvMax<=Iuv[ml]) //将最大的互信息值赋予 IuvMax,然后将序号赋予AttributeIuvMax { IuvMax=Iuv[ml]; AttributeIuvMax=ml; } } //计算互信息end return AttributeIuvMax;//返回互信息最大的特征对应的Filed序号 } void CTreeViewDMfunction::RecursionTree(HTREEITEM &htiParent,_RecordsetPtr m_pRecordset, int nAttribute, CString sSQL) //递归建树函数 //htiParent-插入特征值对应的CTreeCtrl的 item //m_pRecordset指向增加了where条件的SQL对应的记录集; //nAttribute 特征数=字段数-2 除去 序号、类别 2个字段,主要是为了调用RecordTree时传递参数用 //sSQL-存放对应当前记录集的SQL语句 { HTREEITEM htiChild; int nMarkFieldValueTypeAsPure;//某个特征属性中某个取值时全为P类或N类,标记该取值在中的序号,当下面重新构造SQL语句时略过 CMainFrame* appMainWnd=(CMainFrame*)AfxGetMainWnd();//得到主框架指针 CClassTree* listInPane=(CClassTree*)appMainWnd->m_wndSplitter3.GetPane(0,0);//得到CClassTree对象的指针 CTreeCtrl& lt=listInPane->GetTreeCtrl();//得到树控件CTreeCtrl的指针 CFieldInfoArray FieldInfoArray;//建立一个新的对象 RecordTree( m_pRecordset,FieldInfoArray,nAttribute);//ID3算法中,循环处理所有记录的各个特征,返回记录特征信息,用于计算互信息及建树 int AttributeIuvMax=IuvCaculate(FieldInfoArray,nAttribute);//计算互信息,返回互信息最大的属性对应的Filed序号 //下面开始在控件上建树 CFiledInfo *TempFiledInfo=FieldInfoArray.GetAt(AttributeIuvMax); if (lt.GetItemText( htiParent ) == "")//第一次调用时,直接建立在树根上 htiParent = lt.GetRootItem( ); htiParent = lt.InsertItem (TempFiledInfo->sFieldName,1, 1, htiParent, TVI_LAST); lt.SetItemState (htiParent, TVIS_BOLD, TVIS_BOLD); int FieldClassCount=0;//存放全部为P类或N类的特征值的个数 TempFiledInfo->m_AttributeValueTree.RemoveAll( ); for(long ml=0;ml<TempFiledInfo->nFieldValueTypeCount;ml++) { htiChild=lt.InsertItem (TempFiledInfo->FieldValueList[ml], 2, 3, htiParent, TVI_LAST); TempFiledInfo->m_AttributeValueTree.Add(htiChild); //m_AttributeValueTree-为树控件的Item项的数组,用来存放某个特征的各个作为树控件item的属性值的item值,便于后续的树分支可以挂靠其上 if(TempFiledInfo->PositiveClassCount[ml]==TempFiledInfo->FieldValueCount[ml]) { lt.InsertItem ("P类", 1, 1, htiChild, TVI_LAST); FieldClassCount++; nMarkFieldValueTypeAsPure=ml;//某个特征属性中某个取值时全为P类或N类,标记该取值在nFieldValueTypeCount中的序号,当下面重新构造SQL语句时略过 } if(TempFiledInfo->NegativeClassCount[ml]==TempFiledInfo->FieldValueCount[ml]) { lt.InsertItem ("N类", 1, 1, htiChild, TVI_LAST); FieldClassCount++; nMarkFieldValueTypeAsPure=ml; } } if (FieldClassCount==TempFiledInfo->nFieldValueTypeCount) return;//如果特征值全为P类或N类,停止递归返回调用处 if (sSQL.Find("where")!=-1)//如果SQL语句中包含 where字符串 sSQL+=" and "; else sSQL+=" where "; for(long nNewRst=0;nNewRst<TempFiledInfo->nFieldValueTypeCount;nNewRst++) { //int nMarkFieldValueTypeAsPure;//某个特征属性中某个取值时全为P类或N类,标记该取值在中的序号,当下面重新构造SQL语句时略过 if(nMarkFieldValueTypeAsPure==nNewRst) continue; CString InterSQL=sSQL+TempFiledInfo->sFieldName+"="+"'"+TempFiledInfo->FieldValueList[nNewRst]+"'"; m_pRecordset->Close(); m_pRecordset->Open((_variant_t)(InterSQL),_variant_t((IDispatch *)m_pConnection,true),adOpenStatic,adLockOptimistic,adCmdText); htiParent=TempFiledInfo->m_AttributeValueTree[nNewRst]; RecursionTree(htiParent,m_pRecordset,nAttribute,InterSQL); } lt.Expand (htiParent, TVE_EXPAND); lt.Expand (htiChild, TVE_EXPAND); }