//========= Copyright Valve Corporation, All rights reserved. ============// // // Purpose: // //=============================================================================// #include #include #include "vmpi.h" #include "cmdlib.h" #include "vmpi_tools_shared.h" #include "tier1/strtools.h" #include "mpi_stats.h" #include "iphelpers.h" #include "tier0/minidump.h" // ----------------------------------------------------------------------------- // // Globals. // ----------------------------------------------------------------------------- // static bool g_bReceivedDirectoryInfo = false; // Have we gotten the qdir info yet? static bool g_bReceivedDBInfo = false; static CDBInfo g_DBInfo; static unsigned long g_JobPrimaryID; static int g_nDisconnects = 0; // Tracks how many remote processes have disconnected ungracefully. // ----------------------------------------------------------------------------- // // Shared dispatch code. // ----------------------------------------------------------------------------- // bool SharedDispatch( MessageBuffer *pBuf, int iSource, int iPacketID ) { char *pInPos = &pBuf->data[2]; switch ( pBuf->data[1] ) { case VMPI_SUBPACKETID_DIRECTORIES: { Q_strncpy( gamedir, pInPos, sizeof( gamedir ) ); pInPos += strlen( pInPos ) + 1; Q_strncpy( qdir, pInPos, sizeof( qdir ) ); g_bReceivedDirectoryInfo = true; } return true; case VMPI_SUBPACKETID_DBINFO: { g_DBInfo = *((CDBInfo*)pInPos); pInPos += sizeof( CDBInfo ); g_JobPrimaryID = *((unsigned long*)pInPos); g_bReceivedDBInfo = true; } return true; case VMPI_SUBPACKETID_CRASH: { char const chCrashInfoType = *pInPos; pInPos += 2; switch ( chCrashInfoType ) { case 't': Warning( "\nWorker '%s' dead: %s\n", VMPI_GetMachineName( iSource ), pInPos ); break; case 'f': { int iFileSize = * reinterpret_cast< int const * >( pInPos ); pInPos += sizeof( iFileSize ); // Temp folder char const *szFolder = NULL; if ( !szFolder ) szFolder = getenv( "TEMP" ); if ( !szFolder ) szFolder = getenv( "TMP" ); if ( !szFolder ) szFolder = "c:"; // Base module name char chModuleName[_MAX_PATH], *pModuleName = chModuleName; ::GetModuleFileName( NULL, chModuleName, sizeof( chModuleName ) / sizeof( chModuleName[0] ) ); if ( char *pch = strrchr( chModuleName, '.' ) ) *pch = 0; if ( char *pch = strrchr( chModuleName, '\\' ) ) *pch = 0, pModuleName = pch + 1; // Current time time_t currTime = ::time( NULL ); struct tm * pTime = ::localtime( &currTime ); // Number of minidumps this run static int s_numMiniDumps = 0; ++ s_numMiniDumps; // Prepare the filename char chSaveFileName[ 2 * _MAX_PATH ] = { 0 }; sprintf( chSaveFileName, "%s\\vmpi_%s_on_%s_%d%.2d%2d%.2d%.2d%.2d_%d.mdmp", szFolder, pModuleName, VMPI_GetMachineName( iSource ), pTime->tm_year + 1900, /* Year less 2000 */ pTime->tm_mon + 1, /* month (0 - 11 : 0 = January) */ pTime->tm_mday, /* day of month (1 - 31) */ pTime->tm_hour, /* hour (0 - 23) */ pTime->tm_min, /* minutes (0 - 59) */ pTime->tm_sec, /* seconds (0 - 59) */ s_numMiniDumps ); if ( FILE *fDump = fopen( chSaveFileName, "wb" ) ) { fwrite( pInPos, 1, iFileSize, fDump ); fclose( fDump ); Warning( "\nSaved worker crash minidump '%s', size %d byte(s).\n", chSaveFileName, iFileSize ); } else { Warning( "\nReceived worker crash minidump size %d byte(s), failed to save.\n", iFileSize ); } } break; } } return true; } return false; } CDispatchReg g_SharedDispatchReg( VMPI_SHARED_PACKET_ID, SharedDispatch ); // ----------------------------------------------------------------------------- // // Module interfaces. // ----------------------------------------------------------------------------- // void SendQDirInfo() { char cPacketID[2] = { VMPI_SHARED_PACKET_ID, VMPI_SUBPACKETID_DIRECTORIES }; MessageBuffer mb; mb.write( cPacketID, 2 ); mb.write( gamedir, strlen( gamedir ) + 1 ); mb.write( qdir, strlen( qdir ) + 1 ); VMPI_SendData( mb.data, mb.getLen(), VMPI_PERSISTENT ); } void RecvQDirInfo() { while ( !g_bReceivedDirectoryInfo ) VMPI_DispatchNextMessage(); } void SendDBInfo( const CDBInfo *pInfo, unsigned long jobPrimaryID ) { char cPacketInfo[2] = { VMPI_SHARED_PACKET_ID, VMPI_SUBPACKETID_DBINFO }; const void *pChunks[] = { cPacketInfo, pInfo, &jobPrimaryID }; int chunkLengths[] = { 2, sizeof( CDBInfo ), sizeof( jobPrimaryID ) }; VMPI_SendChunks( pChunks, chunkLengths, ARRAYSIZE( pChunks ), VMPI_PERSISTENT ); } void RecvDBInfo( CDBInfo *pInfo, unsigned long *pJobPrimaryID ) { while ( !g_bReceivedDBInfo ) VMPI_DispatchNextMessage(); *pInfo = g_DBInfo; *pJobPrimaryID = g_JobPrimaryID; } // If the file is successfully opened, read and sent returns the size of the file in bytes // otherwise returns 0 and nothing is sent int VMPI_SendFileChunk( const void *pvChunkPrefix, int lenPrefix, tchar const *ptchFileName ) { HANDLE hFile = NULL; HANDLE hMapping = NULL; void const *pvMappedData = NULL; int iResult = 0; hFile = ::CreateFile( ptchFileName, GENERIC_READ, 0, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL ); if ( !hFile || ( hFile == INVALID_HANDLE_VALUE ) ) goto done; hMapping = ::CreateFileMapping( hFile, NULL, PAGE_READONLY, 0, 0, NULL ); if ( !hMapping || ( hMapping == INVALID_HANDLE_VALUE ) ) goto done; pvMappedData = ::MapViewOfFile( hMapping, FILE_MAP_READ, 0, 0, 0 ); if ( !pvMappedData ) goto done; int iMappedFileSize = ::GetFileSize( hFile, NULL ); if ( INVALID_FILE_SIZE == iMappedFileSize ) goto done; // Send the data over VMPI if ( VMPI_Send3Chunks( pvChunkPrefix, lenPrefix, &iMappedFileSize, sizeof( iMappedFileSize ), pvMappedData, iMappedFileSize, VMPI_MASTER_ID ) ) iResult = iMappedFileSize; // Fall-through for cleanup code to execute done: if ( pvMappedData ) ::UnmapViewOfFile( pvMappedData ); if ( hMapping && ( hMapping != INVALID_HANDLE_VALUE ) ) ::CloseHandle( hMapping ); if ( hFile && ( hFile != INVALID_HANDLE_VALUE ) ) ::CloseHandle( hFile ); return iResult; } void VMPI_HandleCrash( const char *pMessage, void *pvExceptionInfo, bool bAssert ) { static LONG crashHandlerCount = 0; if ( InterlockedIncrement( &crashHandlerCount ) == 1 ) { Msg( "\nFAILURE: '%s' (assert: %d)\n", pMessage, bAssert ); // Send a message to the master. char crashMsg[4] = { VMPI_SHARED_PACKET_ID, VMPI_SUBPACKETID_CRASH, 't', ':' }; VMPI_Send2Chunks( crashMsg, sizeof( crashMsg ), pMessage, strlen( pMessage ) + 1, VMPI_MASTER_ID ); // Now attempt to create a minidump with the given exception information if ( pvExceptionInfo ) { struct _EXCEPTION_POINTERS *pvExPointers = ( struct _EXCEPTION_POINTERS * ) pvExceptionInfo; tchar tchMinidumpFileName[_MAX_PATH] = { 0 }; bool bSucceededWritingMinidump = WriteMiniDumpUsingExceptionInfo( pvExPointers->ExceptionRecord->ExceptionCode, pvExPointers, ( MINIDUMP_TYPE )( MiniDumpWithDataSegs | MiniDumpWithIndirectlyReferencedMemory | MiniDumpWithProcessThreadData ), // ( MINIDUMP_TYPE )( MiniDumpWithDataSegs | MiniDumpWithFullMemory | MiniDumpWithHandleData | MiniDumpWithUnloadedModules | MiniDumpWithIndirectlyReferencedMemory | MiniDumpWithProcessThreadData | MiniDumpWithPrivateReadWriteMemory ), // ( MINIDUMP_TYPE )( MiniDumpNormal ), tchMinidumpFileName ); if ( bSucceededWritingMinidump ) { crashMsg[2] = 'f'; VMPI_SendFileChunk( crashMsg, sizeof( crashMsg ), tchMinidumpFileName ); ::DeleteFile( tchMinidumpFileName ); } } // Let the messages go out. Sleep( 500 ); } InterlockedDecrement( &crashHandlerCount ); } // This is called if we crash inside our crash handler. It just terminates the process immediately. LONG __stdcall VMPI_SecondExceptionFilter( struct _EXCEPTION_POINTERS *ExceptionInfo ) { TerminateProcess( GetCurrentProcess(), 2 ); return EXCEPTION_EXECUTE_HANDLER; // (never gets here anyway) } void VMPI_ExceptionFilter( unsigned long uCode, void *pvExceptionInfo ) { // This is called if we crash inside our crash handler. It just terminates the process immediately. SetUnhandledExceptionFilter( VMPI_SecondExceptionFilter ); //DWORD code = ExceptionInfo->ExceptionRecord->ExceptionCode; #define ERR_RECORD( name ) { name, #name } struct { int code; char *pReason; } errors[] = { ERR_RECORD( EXCEPTION_ACCESS_VIOLATION ), ERR_RECORD( EXCEPTION_ARRAY_BOUNDS_EXCEEDED ), ERR_RECORD( EXCEPTION_BREAKPOINT ), ERR_RECORD( EXCEPTION_DATATYPE_MISALIGNMENT ), ERR_RECORD( EXCEPTION_FLT_DENORMAL_OPERAND ), ERR_RECORD( EXCEPTION_FLT_DIVIDE_BY_ZERO ), ERR_RECORD( EXCEPTION_FLT_INEXACT_RESULT ), ERR_RECORD( EXCEPTION_FLT_INVALID_OPERATION ), ERR_RECORD( EXCEPTION_FLT_OVERFLOW ), ERR_RECORD( EXCEPTION_FLT_STACK_CHECK ), ERR_RECORD( EXCEPTION_FLT_UNDERFLOW ), ERR_RECORD( EXCEPTION_ILLEGAL_INSTRUCTION ), ERR_RECORD( EXCEPTION_IN_PAGE_ERROR ), ERR_RECORD( EXCEPTION_INT_DIVIDE_BY_ZERO ), ERR_RECORD( EXCEPTION_INT_OVERFLOW ), ERR_RECORD( EXCEPTION_INVALID_DISPOSITION ), ERR_RECORD( EXCEPTION_NONCONTINUABLE_EXCEPTION ), ERR_RECORD( EXCEPTION_PRIV_INSTRUCTION ), ERR_RECORD( EXCEPTION_SINGLE_STEP ), ERR_RECORD( EXCEPTION_STACK_OVERFLOW ), ERR_RECORD( EXCEPTION_ACCESS_VIOLATION ), }; int nErrors = sizeof( errors ) / sizeof( errors[0] ); int i=0; char *pchReason = NULL; char chUnknownBuffer[32]; for ( i; ( i < nErrors ) && !pchReason; i++ ) { if ( errors[i].code == uCode ) pchReason = errors[i].pReason; } if ( i == nErrors ) { sprintf( chUnknownBuffer, "Error code 0x%08X", uCode ); pchReason = chUnknownBuffer; } VMPI_HandleCrash( pchReason, pvExceptionInfo, true ); TerminateProcess( GetCurrentProcess(), 1 ); } void HandleMPIDisconnect( int procID, const char *pReason ) { int nLiveWorkers = VMPI_GetCurrentNumberOfConnections() - g_nDisconnects - 1; // We ran into the size limit before and it wasn't readily apparent that the size limit had // been breached, so make sure to show errors about invalid packet sizes.. bool bOldSuppress = g_bSuppressPrintfOutput; g_bSuppressPrintfOutput = ( Q_stristr( pReason, "invalid packet size" ) == 0 ); Warning( "\n\n--- WARNING: lost connection to '%s' (%s).\n", VMPI_GetMachineName( procID ), pReason ); if ( g_bMPIMaster ) { Warning( "%d workers remain.\n\n", nLiveWorkers ); ++g_nDisconnects; /* if ( VMPI_GetCurrentNumberOfConnections() - g_nDisconnects <= 1 ) { Error( "All machines disconnected!" ); } */ } else { VMPI_HandleAutoRestart(); Error( "Worker quitting." ); } g_bSuppressPrintfOutput = bOldSuppress; }