www.gusucode.com > 网页爬虫VC++源码下载-源码程序 > 网页爬虫VC++源码下载-源码程序\code\webpageloader_SourceCode\ThreadFunctions.cpp

    //Download by http://www.NewXing.com
// ThreadFunctions.cpp: implementation of the ThreadFunctions class.
//
//////////////////////////////////////////////////////////////////////

#include "stdafx.h"
#include "WebPageLoader.h"
#include "ThreadFunctions.h"

#ifdef _DEBUG
#undef THIS_FILE
static char THIS_FILE[]=__FILE__;
#define new DEBUG_NEW
#endif


//////////////////////////////////////////////////////////////////////
// Helper functions
//////////////////////////////////////////////////////////////////////

BOOL BfxHasValidExtension(LPCTSTR pstrFilename, LPCTSTR pstrExtensions)
{
   ASSERT(AfxIsValidString(pstrFilename));
   ASSERT(AfxIsValidString(pstrExtensions));
   // Check if it's a image....
   // Extract extension first
   CString sFilename( pstrFilename );
   sFilename.MakeLower();
   int pos;
   pos = sFilename.Find(_T('?'));
   if( pos>=0 ) sFilename = sFilename.Left(pos);
   pos = sFilename.ReverseFind(_T('.'));
   if( pos<0 ) return FALSE;
   CString sExt;
   sExt = sFilename.Mid(pos+1);
   pos = sExt.ReverseFind(_T('/'));
   if( pos>=0 ) sExt = sExt.Mid(pos);
   // Mask off some traditional URL extensions
   pos = sExt.Find(_T('?'));
   if( pos>0 ) sExt = sExt.Left(pos);
   pos = sExt.Find(_T('#'));
   if( pos>0 ) sExt = sExt.Left(pos);
   // Anything left?
   if( sExt.IsEmpty() ) return FALSE;
   // Scan known extensions
   // We look through the defined list of known image extensions
   CString sFilter( pstrExtensions );
   sFilter.MakeLower();
   while( !sFilter.IsEmpty() ) {
      CString tok = BfxRemoveToken(sFilter, _T(';'));
      if( sExt==tok ) return TRUE;
   };
   return FALSE;
};

BOOL BfxIsValidSession(CSession *pSession)
{
   ASSERT_VALID(pSession);
   if( pSession==NULL ) return FALSE;
   // NOTE: This is a rough copy of the CObject::IsKindOf() method.
   if( ::IsBadCodePtr((FARPROC)pSession) ) return FALSE;
   if( ::IsBadReadPtr(pSession,sizeof(CSession)) ) return FALSE;
   if( ::IsBadReadPtr(*(void**)pSession, sizeof(void*)) ) return FALSE;
   if( ::IsBadReadPtr(pSession, pSession->GetRuntimeClass()->m_nObjectSize)) return FALSE;
   return TRUE;
};


//////////////////////////////////////////////////////////////////////
//
// ImageScanSession
//
//////////////////////////////////////////////////////////////////////

UINT DownloadSessionThread( LPVOID pParam )
{
   CSession *pSession = (CSession *) pParam;
   if( !BfxIsValidSession(pSession) ) return 1;
   TRACE(_T("Thread %d starts.\n"), pSession->m_hThread);

   TRY
   {
      pSession->Start();

      // First we want to validate the session...
      bool bOk = true;
      {
         CSingleLock lock( *pSession, TRUE );
         switch( pSession->m_Type ) {
         case TYPE_IMAGESCAN:
            if( pSession->m_sFormat.IsEmpty() ) bOk = false;
            if( pSession->m_nStartIndex > pSession->m_nStopIndex ) bOk = false;
            break;
         case TYPE_HTMLSCAN:
            break;
         default:
            // Not a valid type
            bOk = false;
            break;
         };
      }
      if( !bOk ) {
         pSession->Done();
         return 1;
      };
   }
   CATCH_ALL( e )
   {
      // oops...
      TRACE("WebPageLoader: Thread exception caught!\n");
      pSession->Done();
      return 1;
   }
   END_CATCH_ALL

   CString sProxy;
   sProxy.Format(_T("http://%s:%ld"), pSession->m_Preferences->m_sProxy, pSession->m_Preferences->m_iPort);

   CInternetSession inet( pSession->m_Preferences->m_sAgentName, 
      pSession->m_iUniqueID, 
      pSession->m_Preferences->m_bUseProxy ? INTERNET_OPEN_TYPE_PROXY : PRE_CONFIG_INTERNET_ACCESS, 
      pSession->m_Preferences->m_bUseProxy ? (LPCTSTR) sProxy : NULL );
   TRY
   {
      pSession->SetState(STATE_RUNNING);

      CDownloadFile *pFile = NULL;
      int nItem = 0;
      while( TRUE ) 
      {         
         // Download all scheduled files
         while( (pFile = pSession->m_Files.GetNextDownload() ) != NULL ) {
            pFile->Download(pSession, &inet, ++nItem);
            // Do a little checking
            if( !BfxIsValidSession(pSession) ) return 0; // EMERGENCY
            // Do we need to stop
            if( pSession->m_bSleepRequest || pSession->m_bStopRequest || pSession->m_bKillRequest ) {
               pSession->SafeLog(LOGTYPE_WARNING, IDS_LOG_INTERRUPT);
               break;
            }
         };
         
         // Reschedule all broken files
         BOOL res;
         res = pSession->m_Files.RescheduleBrokenDownloads(pSession->m_Settings.m_nDownloadRetries);    
         if( res ) pSession->Log(LOGTYPE_LOG, IDS_LOG_RESCHEDULED);
         
         // No files to download anymore?
         if( pSession->m_Files.GetNextDownload()==NULL ) {
            break;
         }
         // Do we need to stop
         if( pSession->m_bSleepRequest || pSession->m_bStopRequest || pSession->m_bKillRequest ) {
            break;
         }
      };

      inet.Close();
   }
   CATCH_ALL( e )
   {
      // oops...
      TRACE("WebPageLoader: Thread download exception caught!\n");
      inet.Close();
   }
   END_CATCH_ALL

   // Done
   TRACE(_T("Thread %d is done.\n"), pSession->m_hThread);
   pSession->Done();

   return 0;
};