www.gusucode.com > 一个相对很完善的数据挖掘系统源码程序 > 一个相对很完善的数据挖掘系统源码程序/Discover/colledoc.cpp
#include "stdafx.h" //#include "collect.h" #include "colledoc.h" #include "resource.h" #ifdef _DEBUG #undef THIS_FILE static char BASED_CODE THIS_FILE[] = __FILE__; #endif ///////////////////////////////////////////////////////////////////////////// // CAprioriStruct void CAprioriStruct::FormatAprioriStruct(CString& str) { str.Format(_T("{%s,%i}"), (LPCTSTR)m_strFrequentItem,m_nFrequentItemCount); } CFiledInfo::CFiledInfo() { sFieldName=""; nFieldValueTypeCount=0; // FieldValueList.SetSize(0); // FieldValueCount.SetSize(0); // PositiveClassCount.SetSize(0); // NegativeClassCount.SetSize(0); } CFiledInfo::CFiledInfo(const CFiledInfo& OtherFiledInfo) { *this=OtherFiledInfo; } CFiledInfo::CFiledInfo(int nAttribute) { sFieldName=""; nFieldValueTypeCount=0; FieldValueList.SetSize(nAttribute); FieldValueCount.SetSize(nAttribute); PositiveClassCount.SetSize(nAttribute); NegativeClassCount.SetSize(nAttribute); int i; for (i=0;i<nAttribute;i++) { FieldValueList.SetAt(i,""); FieldValueCount.SetAt(i,0); PositiveClassCount.SetAt(i,0); NegativeClassCount.SetAt(i,0); } } void CFiledInfo::operator=(const CFiledInfo& oFileInfo) { sFieldName=oFileInfo.sFieldName; nFieldValueTypeCount=oFileInfo.nFieldValueTypeCount; int i; for (i=0;i<nFieldValueTypeCount;i++) { // FieldValueList[i]=oFileInfo.FieldValueList[i]; // FieldValueCount[i]=oFileInfo.FieldValueCount[i]; // PositiveClassCount[i]=oFileInfo.PositiveClassCount[i]; // NegativeClassCount[i]=oFileInfo.NegativeClassCount[i]; FieldValueList.SetSize(oFileInfo.FieldValueList.GetSize()); FieldValueCount.SetSize(oFileInfo.FieldValueCount.GetSize()); PositiveClassCount.SetSize(oFileInfo.PositiveClassCount.GetSize()); NegativeClassCount.SetSize(oFileInfo.NegativeClassCount.GetSize()); FieldValueList.SetAt(i,oFileInfo.FieldValueList.GetAt(i)); FieldValueCount.SetAt(i,oFileInfo.FieldValueCount.GetAt(i)); PositiveClassCount.SetAt(i,oFileInfo.PositiveClassCount.GetAt(i)); NegativeClassCount.SetAt(i,oFileInfo.NegativeClassCount.GetAt(i)); } } //CFiledInfo* CFiledInfo::operator=( CFiledInfo* oFileInfo) //{ // sFieldName=oFileInfo->sFieldName; // nFieldValueTypeCount=oFileInfo->nFieldValueTypeCount; // // int i; // for (i=0;i<nFieldValueTypeCount;i++) // { // FieldValueList.SetSize(oFileInfo->FieldValueList.GetSize()); // FieldValueCount.SetSize(oFileInfo->FieldValueCount.GetSize()); // PositiveClassCount.SetSize(oFileInfo->PositiveClassCount.GetSize()); // NegativeClassCount.SetSize(oFileInfo->NegativeClassCount.GetSize()); // // FieldValueList.SetAt(i,oFileInfo->FieldValueList.GetAt(i)); // FieldValueCount.SetAt(i,oFileInfo->FieldValueCount.GetAt(i)); // PositiveClassCount.SetAt(i,oFileInfo->PositiveClassCount.GetAt(i)); // NegativeClassCount.SetAt(i,oFileInfo->NegativeClassCount.GetAt(i)); // } // return this; // // } //CFiledInfo& CFiledInfo::operator=(const CFiledInfo& oFileInfo) //{ // sFieldName=oFileInfo.sFieldName; // nFieldValueTypeCount=oFileInfo.nFieldValueTypeCount; // // int i; // for (i=0;i<nFieldValueTypeCount;i++) // { //// FieldValueList[i]=oFileInfo.FieldValueList[i]; //// FieldValueCount[i]=oFileInfo.FieldValueCount[i]; //// PositiveClassCount[i]=oFileInfo.PositiveClassCount[i]; //// NegativeClassCount[i]=oFileInfo.NegativeClassCount[i]; // FieldValueList.SetSize(oFileInfo.FieldValueList.GetSize()); // FieldValueCount.SetSize(oFileInfo.FieldValueCount.GetSize()); // PositiveClassCount.SetSize(oFileInfo.PositiveClassCount.GetSize()); // NegativeClassCount.SetSize(oFileInfo.NegativeClassCount.GetSize()); // // FieldValueList.SetAt(i,oFileInfo.FieldValueList.GetAt(i)); // FieldValueCount.SetAt(i,oFileInfo.FieldValueCount.GetAt(i)); // PositiveClassCount.SetAt(i,oFileInfo.PositiveClassCount.GetAt(i)); // NegativeClassCount.SetAt(i,oFileInfo.NegativeClassCount.GetAt(i)); // } // return *this; //}; CFiledInfo::~CFiledInfo() {} #if _MSC_VER > 1020 template <> void AFXAPI SerializeElements<CAprioriStruct*>(CArchive& ar, CAprioriStruct** ppElements, int nCount) #else void SerializeElements(CArchive& ar, CAprioriStruct** ppElements, int nCount) #endif { // Since SerializeElements is always called by the framework with nCount=1 // for a CMap<>, it is a good idea to implement SerializeElement to handle // nCount>1, in case you decide to reuse it for a CArray<> with the same // element type. if (ar.IsStoring()) { for (int i = 0; i < nCount; i++) { CAprioriStruct* pMyStruct = *(ppElements + i); DWORD w = (DWORD)pMyStruct->m_nFrequentItemCount; ar << pMyStruct->m_strFrequentItem; ar << w; nCount--; } } else { for (int i = 0; i < nCount; i++) { CAprioriStruct* pMyStruct = new CAprioriStruct; *(ppElements + i) = pMyStruct; DWORD w; pMyStruct->m_nFrequentItemCount = w; ar >> pMyStruct->m_strFrequentItem; ar >> w; } } } ///////////////////////////////////////////////////////////////////////////// // CMyObject IMPLEMENT_SERIAL(CMyObject, CObject, 0) CMyObject::CMyObject() { } CMyObject::~CMyObject() { } void CMyObject::FormatMyObject(CString& str) { str.Format(_T("{%i, %.4f, %s}"), m_int, m_float, (LPCTSTR)m_str); } void CMyObject::Serialize(CArchive& ar) { WORD w; if (ar.IsStoring()) { w = (WORD)m_int; ar << w; ar << m_float; ar << m_str; } else { ar >> w; m_int = w; ar >> m_float; ar >> m_str; } } ///////////////////////////////////////////////////////////////////////////// // CCollectDoc IMPLEMENT_DYNCREATE(CCollectDoc, CDocument) BEGIN_MESSAGE_MAP(CCollectDoc, CDocument) //{{AFX_MSG_MAP(CCollectDoc) //}}AFX_MSG_MAP END_MESSAGE_MAP() ///////////////////////////////////////////////////////////////////////////// // CCollectDoc construction/destruction CCollectDoc::CCollectDoc() { } CCollectDoc::~CCollectDoc() { } BOOL CCollectDoc::OnNewDocument() { if (!CDocument::OnNewDocument()) return FALSE; // CString strFirst; // //strFirst.LoadString(IDS_INITIAL_STRING); // strFirst="OK"; // //m_stringList.AddTail(strFirst); // // CAprioriStruct* pMyStruct = new CAprioriStruct(); // /*pMyStruct->m_strFrequentItem.LoadString(IDS_INITIAL_STRING);*/ // pMyStruct->m_strFrequentItem="Wonderful"; // pMyStruct->m_nFrequentItemCount= 1234; // // m_AprioristructList1.AddTail(pMyStruct); //m_intList.AddTail(100); //m_dwArray.Add(100000); // CMyObject* pMyObject = new CMyObject(); // pMyObject->m_int = 5678; // pMyObject->m_float = 56.78f; // pMyObject->m_str.LoadString(IDS_INITIAL_STRING); // m_myobArray.Add(pMyObject); // // CPoint pt(10,10); // m_ptArray.Add(pt); // // CString strKey, strValue; // strKey.LoadString(IDS_INITIAL_KEY); // strValue.LoadString(IDS_INITIAL_VALUE); // m_mapStringToString[strKey] = strValue; // // // CMyObject* pMyObject2 = new CMyObject(); // pMyObject2->m_int = 1357; // pMyObject2->m_float = 13.57f; // pMyObject2->m_str.LoadString(IDS_INITIAL_STRING); // m_mapStringToMyObject[strKey] = pMyObject2; // // CAprioriStruct* pMyStruct2 = new CAprioriStruct(); // pMyStruct2->m_int = 2468; // pMyStruct2->m_float = 24.68f; // pMyStruct2->m_str.LoadString(IDS_INITIAL_STRING); // m_mapDWordToMyStruct[100] = pMyStruct2; return TRUE; } void CCollectDoc::DeleteContents() { // m_stringList.RemoveAll(); POSITION pos = m_AprioristructList1.GetHeadPosition(); while (pos != NULL) { delete m_AprioristructList1.GetNext(pos); } m_AprioristructList1.RemoveAll(); // m_intList.RemoveAll(); // // m_dwArray.RemoveAll(); // // for (int n = 0; n < m_myobArray.GetSize(); n++) // { // delete m_myobArray[n]; // } // m_myobArray.RemoveAll(); // // m_mapStringToString.RemoveAll(); // // m_ptArray.RemoveAll(); // // pos = m_mapStringToMyObject.GetStartPosition(); // while (pos != NULL) // { // CString str; // CMyObject* pMyObject; // m_mapStringToMyObject.GetNextAssoc(pos, str, pMyObject); // delete pMyObject; // } // m_mapStringToMyObject.RemoveAll(); // // pos = m_mapDWordToMyStruct.GetStartPosition(); // while (pos != NULL) // { // DWORD dwKey; // CAprioriStruct* pMyStruct; // m_mapDWordToMyStruct.GetNextAssoc(pos, dwKey, pMyStruct); // delete pMyStruct; // } // m_mapDWordToMyStruct.RemoveAll(); } ///////////////////////////////////////////////////////////////////////////// // CCollectDoc serialization void CCollectDoc::Serialize(CArchive& ar) { POSITION pos; WORD nCount; DWORD w; // m_stringList.Serialize(ar); if (ar.IsStoring()) { nCount = (WORD)m_AprioristructList1.GetCount(); ar << nCount; pos = m_AprioristructList1.GetHeadPosition(); while (pos != NULL) { CAprioriStruct* pMyStruct = m_AprioristructList1.GetNext(pos); w = (DWORD)pMyStruct->m_nFrequentItemCount; ar << pMyStruct->m_strFrequentItem; ar << w; nCount--; } ASSERT(nCount == 0); } else { ar >> nCount; while (nCount-- > 0) { CAprioriStruct* pMyStruct = new CAprioriStruct; ar >> pMyStruct->m_strFrequentItem; ar >> w; pMyStruct->m_nFrequentItemCount = w; m_AprioristructList1.AddTail(pMyStruct); } } // m_intList.Serialize(ar); // // m_dwArray.Serialize(ar); // // m_myobArray.Serialize(ar); // // m_ptArray.Serialize(ar); // // m_mapStringToString.Serialize(ar); // // m_mapStringToMyObject.Serialize(ar); // // m_mapDWordToMyStruct.Serialize(ar); } ///////////////////////////////////////////////////////////////////////////// // CCollectDoc diagnostics #ifdef _DEBUG void CCollectDoc::AssertValid() const { CDocument::AssertValid(); } void CCollectDoc::Dump(CDumpContext& dc) const { CDocument::Dump(dc); } #endif //_DEBUG ///////////////////////////////////////////////////////////////////////////// // CCollectDoc commands BOOL CCollectDoc::IsStrItemInclude(CString strSmall,CString strBig) //一个频繁项目集中的多个项目组成的strSmall,是否都包含于strBig, //strSmall={I2,I3,I1} strBig={I4,I2,I5,I7,I3,I1} 则IsStrItemInclude返回TRUE { int strFind; CString strSingleItem; while(strSmall.Find(',')>0) { strFind=strSmall.Find(','); strSingleItem=strSmall.Left(strFind); strSingleItem.TrimLeft(); strSingleItem.TrimRight(); strSmall=strSmall.Right(strSmall.GetLength()-strFind-1); if (strBig.Find(strSingleItem)<0) return FALSE; } return TRUE; /*验证IsStrItemInclude函数 CString strSmall="I1,I2,I3,I4, I1,I5 ", strBig="I4,I2,I5,I7,I3,I1"; BOOL tempb=m_pDoc->IsStrItemInclude(strSmall,strBig); if(tempb) MessageBox("IsStrItemInclude!"); */ } void CCollectDoc::QuickSortStrArray(CStringArray *pArray, int left,int right) { int i,j; CString strMiddle,iTemp; i = left; j = right; //strMiddle = pArray[(left+right)/2]; //求中间值 strMiddle = pArray->GetAt((left+right)/2); do{ while((pArray->GetAt(i)<strMiddle) && (i<right))//从左扫描大于中值的数 i++; while((pArray->GetAt(j)>strMiddle) && (j>left))//从右扫描大于中值的数 j--; if(i<=j)//找到了一对值 { //交换 iTemp = pArray->GetAt(i); pArray->SetAt(i,pArray->GetAt(j)); pArray->SetAt(j,iTemp); i++; j--; } }while(i<=j);//如果两边扫描的下标交错,就停止(完成一次) if(left<j) //当左边部分有值(left<j),递归左半边 QuickSortStrArray(pArray,left,j); if(right>i) //当右边部分有值(right>i),递归右半边 QuickSortStrArray(pArray,i,right); } CStringArray* CCollectDoc::DevideStr(CString ItemFieldStr,char divider) /*将一个有多个项目组成的字符串分解为单独的项目 比如来自一条记录的内容 ItemFieldStr="I1,I2,I4";每个项目间的分割字符 divider 为',' 则函数最终得到的是由单独的项目"I1" "I2" "I4"组成的字符串数组CStringArray */ { CStringArray *SingleItemArray=new CStringArray ;//由每个单独项目组成的数组 //CStringArray *tempArray=new CStringArray ;//临时字符串数组 CString SingleItem;//临时存放每个单独项目 //寻找分割字符(串)divider 的总个数 int dividerPos,dividerSum=0; dividerPos=ItemFieldStr.Find(divider,0); while(dividerPos>=0) { dividerSum++; //dividerPos=ItemFieldStr.Right(ItemFieldStr.GetLength()-dividerPos-1).Find(divider,0); dividerPos=ItemFieldStr.Find(divider,dividerPos+1); } //寻找分割字符(串)divider 的总个数 for (int i=0;i<=dividerSum;i++) { AfxExtractSubString(SingleItem, ItemFieldStr, i, divider);//析出第i个子串 SingleItemArray->Add(SingleItem);//将每个项目字符串压入字符串数组 } // 将 SingleItemArray 升序排序 int nSingleItemArraySize=SingleItemArray->GetSize(); QuickSortStrArray(SingleItemArray,0,nSingleItemArraySize-1); //将SingleItemArray升序排序 return SingleItemArray; /*验证 CCollectDoc::DevideStr 函数 //CStringArray* CCollectDoc::DevideStr(CString ItemFieldStr,char divider) CStringArray* tempStringArray; CString ItemFieldStr="I4,I2,I5,I1"; char divider=','; tempStringArray=m_pDoc->DevideStr(ItemFieldStr,divider); MessageBox(tempStringArray->GetAt(0)); MessageBox(tempStringArray->GetAt(1)); MessageBox(tempStringArray->GetAt(2)); MessageBox(tempStringArray->GetAt(3)); *///验证 CCollectDoc::DevideStr 函数 } CStringArray* CCollectDoc::DevideDoubleStr(CString ItemFieldStr, char divider) /*将一个有多个项目组成的字符串分解为单独的项目后,再组成两项集 比如来自一条记录的内容 ItemFieldStr="I1,I2,I4";每个项目间的分割字符 divider 为',' 则函数最终得到的是包含单独的项目"I1" "I2" "I4"形成的"I1,I2","I1,I4","I2,I4" 组成的字符串数组CStringArray */ { CStringArray *SingleItemArray=DevideStr(ItemFieldStr,divider);//由每个单独项目组成的数组 CStringArray *DoubleItemArray=new CStringArray ;//由两个单独项目一组组成的数组,比如(I1,I4) CString DoubleItem;//临时存放每个两项项目 int nSingleItemArraySize=SingleItemArray->GetSize ();//得到所有单项的个数 for (int i=0;i<nSingleItemArraySize-1;i++) { for(int j=i+1;j<nSingleItemArraySize;j++) { DoubleItem=SingleItemArray->GetAt(i)+','+SingleItemArray->GetAt(j); DoubleItemArray->Add(DoubleItem);//将每个项目字符串压入字符串数组 } } // 将 DoubleItemArray 升序排序 int nDoubleItemArraySize=DoubleItemArray->GetSize(); QuickSortStrArray(DoubleItemArray,0,nDoubleItemArraySize-1); //将 DoubleItemArray 升序排序 return DoubleItemArray; /*验证 CCollectDoc::DevideDoubleStr 函数 //CStringArray* CCollectDoc::DevideDoubleStr(CString ItemFieldStr,char divider) CStringArray* tempStringArray; CString ItemFieldStr="I4,I2,I5,I1"; char divider=','; tempStringArray=m_pDoc->DevideDoubleStr(ItemFieldStr,divider); MessageBox(tempStringArray->GetAt(0)); MessageBox(tempStringArray->GetAt(1)); MessageBox(tempStringArray->GetAt(2)); MessageBox(tempStringArray->GetAt(3)); *///验证 CCollectDoc::DevideStr 函数 } CStringArray* CCollectDoc::DevideTriStr(CString ItemFieldStr, char divider) /*将一个有多个项目组成的字符串分解为单独的项目后,再组成三项集 比如来自一条记录的内容 ItemFieldStr="I1,I2,I4,I5";每个项目间的分割字符 divider 为',' 则函数最终得到的是包含单独的项目"I1" "I2" "I4" "I5" 形成的"I1,I2,I4","I1,I4,I5","I2,I4,I5" 组成的字符串数组CStringArray */ { CStringArray *SingleItemArray=DevideStr(ItemFieldStr,divider);//由每个单独项目组成的数组 CStringArray *TriItemArray=new CStringArray ;//由两个三项项目一组组成的数组,比如(I1,I2,I4) CString TriItem;//临时存放每个三项项目 int nSingleItemArraySize=SingleItemArray->GetSize ();//得到所有单项的个数 for (int i=0;i<nSingleItemArraySize-1;i++) { for(int j=i+1;j<nSingleItemArraySize;j++) { for(int k=j+1;k<nSingleItemArraySize;k++) { TriItem=SingleItemArray->GetAt(i)+','+SingleItemArray->GetAt(j)+','+SingleItemArray->GetAt(k); TriItemArray->Add(TriItem);//将每个项目字符串压入字符串数组 } } } // 将 TriItemArray 升序排序 int nTriItemArraySize=TriItemArray->GetSize(); QuickSortStrArray(TriItemArray,0,nTriItemArraySize-1); //将 TriItemArray 升序排序 return TriItemArray; /*验证 CCollectDoc::DevideTriStr 函数 CStringArray* tempStringArray; CString ItemFieldStr="I4,I2,I5,I1"; char divider=','; tempStringArray=m_pDoc->DevideTriStr(ItemFieldStr,divider); MessageBox(tempStringArray->GetAt(0)); MessageBox(tempStringArray->GetAt(1)); MessageBox(tempStringArray->GetAt(2)); MessageBox(tempStringArray->GetAt(3)); *///验证 CCollectDoc::DevideStr 函数 } CStringArray* CCollectDoc::GenAssoRuleStr(CString Frequent3Str, char divider) /*将一个有三个单项组成的字符串生成关联规则 比如来自一条字符串的内容 Frequent3Str="I1,I2,I3";每个项目间的分割字符 divider 为',' 则函数最终得到的是包含 "I1,I2=>I3","I3=>I1,I2","I1,I3=>I2"等6种关联规则组成的字符串数组CStringArray */ { CStringArray *SingleItemArray=DevideStr(Frequent3Str,divider);//由每个单独项目组成的数组 CStringArray *AssoRuleArray=new CStringArray ;//由两个单独项目一组组成的数组,比如(I1,I4) CString AssoRuleItem;//临时存放每个关联规则项目 int nSingleItemArraySize=SingleItemArray->GetSize ();//得到所有单项的个数 int k; for (int i=0;i<nSingleItemArraySize-1;i++) { for(int j=i+1;j<nSingleItemArraySize;j++) { if((j+1)%3!=i &&(j+1)%3!=j) k=(j+1)%3; if((j-1)%3!=i &&(j-1)%3!=j) k=(j-1)%3; AssoRuleItem=SingleItemArray->GetAt(i%3)+','+SingleItemArray->GetAt(j%3)+"=>"+SingleItemArray->GetAt(k%3); AssoRuleArray->Add(AssoRuleItem);//将每个项目字符串压入字符串数组 AssoRuleItem=SingleItemArray->GetAt(k%3)+"=>"+SingleItemArray->GetAt(i%3)+','+SingleItemArray->GetAt(j%3); AssoRuleArray->Add(AssoRuleItem);//将每个项目字符串压入字符串数组 } } // 将 AssoRuleArray 升序排序 // int nAssoRuleArraySize=AssoRuleArray->GetSize(); // QuickSortStrArray(AssoRuleArray,0,nAssoRuleArraySize-1); //将 AssoRuleArray 升序排序 return AssoRuleArray; /*验证 CCollectDoc::GenAssoRuleStr 函数 CStringArray* tempStringArray; CString ItemFieldStr="I1,I2,I5"; char divider=','; tempStringArray=m_pDoc->GenAssoRuleStr(ItemFieldStr,divider); MessageBox(tempStringArray->GetAt(0)); MessageBox(tempStringArray->GetAt(1)); MessageBox(tempStringArray->GetAt(2)); MessageBox(tempStringArray->GetAt(3)); MessageBox(tempStringArray->GetAt(4)); MessageBox(tempStringArray->GetAt(5)); *///验证 CCollectDoc::GenAssoRuleStr 函数 } CString CCollectDoc::FloatToString(double source, int bit) { int decimal, sign; CString strSource; strSource=_fcvt(source,bit,&decimal, &sign ); if (decimal==0) strSource="0."+strSource; if (decimal<0) { strSource="0"+strSource; for (int i=decimal+2;i<0;i++) strSource="0"+strSource; strSource="0."+strSource; } if (decimal>0) { strSource=strSource.Left(decimal)+'.'+strSource.Right(strSource.GetLength()-decimal); } if(sign!=0) strSource="-"+strSource; return strSource; } int CCollectDoc::IsInStringArray(CString str, CStringArray* strArray) //检查一个字符串str 是否在字符串数组中出现,如果出现则返回字符串数组的下标-1 //如果没有出现则返回 0 { int nsize=strArray->GetSize(); for (int i=0;i<nsize;i++) if(str==strArray->GetAt(i)) return i+1; return 0; /*验证 CCollectDoc::IsInStringArray 函数 CStringArray *tempStringArray=new CStringArray ; tempStringArray->Add("I1"); tempStringArray->Add("I2"); tempStringArray->Add("I3"); tempStringArray->Add("I4"); CString ItemFieldStr="I5"; BOOL bTemp=m_pDoc->IsInStringArray(ItemFieldStr, tempStringArray); if (bTemp) MessageBox("OK include"); else MessageBox("NOT include"); 验证 CCollectDoc::IsInStringArray 函数*/ }