//========= Copyright Valve Corporation, All rights reserved. ============// // // Purpose: // //============================================================================= #include "pch_materialsystem.h" #define MATSYS_INTERNAL #include "cmatlightmaps.h" #include "colorspace.h" #include "IHardwareConfigInternal.h" #include "cmaterialsystem.h" // NOTE: This must be the last file included!!! #include "tier0/memdbgon.h" #include "bitmap/float_bm.h" static ConVar mat_lightmap_pfms( "mat_lightmap_pfms", "0", FCVAR_MATERIAL_SYSTEM_THREAD, "Outputs .pfm files containing lightmap data for each lightmap page when a level exits." ); // Write PFM files for each lightmap page in the game directory when exiting a level #define USE_32BIT_LIGHTMAPS_ON_360 //uncomment to use 32bit lightmaps, be sure to keep this in sync with the same #define in stdshaders/lightmappedgeneric_ps2_3_x.h #ifdef _X360 #define X360_USE_SIMD_LIGHTMAP #endif //----------------------------------------------------------------------------- inline IMaterialInternal* CMatLightmaps::GetCurrentMaterialInternal() const { return GetMaterialSystem()->GetRenderContextInternal()->GetCurrentMaterialInternal(); } inline void CMatLightmaps::SetCurrentMaterialInternal(IMaterialInternal* pCurrentMaterial) { return GetMaterialSystem()->GetRenderContextInternal()->SetCurrentMaterialInternal( pCurrentMaterial ); } inline IMaterialInternal *CMatLightmaps::GetMaterialInternal( MaterialHandle_t idx ) const { return GetMaterialSystem()->GetMaterialInternal( idx ); } inline const IMatRenderContextInternal *CMatLightmaps::GetRenderContextInternal() const { return GetMaterialSystem()->GetRenderContextInternal(); } inline IMatRenderContextInternal *CMatLightmaps::GetRenderContextInternal() { return GetMaterialSystem()->GetRenderContextInternal(); } inline const CMaterialDict *CMatLightmaps::GetMaterialDict() const { return GetMaterialSystem()->GetMaterialDict(); } inline CMaterialDict *CMatLightmaps::GetMaterialDict() { return GetMaterialSystem()->GetMaterialDict(); } //----------------------------------------------------------------------------- // //----------------------------------------------------------------------------- CMatLightmaps::CMatLightmaps() { m_currentWhiteLightmapMaterial = NULL; m_pLightmapPages = NULL; m_NumLightmapPages = 0; m_numSortIDs = 0; m_nUpdatingLightmapsStackDepth = 0; m_nLockedLightmap = -1; m_pLightmapDataPtrArray = NULL; m_eLightmapsState = STATE_DEFAULT; } //----------------------------------------------------------------------------- // //----------------------------------------------------------------------------- void CMatLightmaps::Shutdown( ) { // Clean up all lightmaps CleanupLightmaps(); } //----------------------------------------------------------------------------- // Assign enumeration IDs to all materials //----------------------------------------------------------------------------- void CMatLightmaps::EnumerateMaterials( void ) { // iterate in sorted order int id = 0; for (MaterialHandle_t i = GetMaterialDict()->FirstMaterial(); i != GetMaterialDict()->InvalidMaterial(); i = GetMaterialDict()->NextMaterial(i) ) { GetMaterialInternal(i)->SetEnumerationID( id ); ++id; } } //----------------------------------------------------------------------------- // Gets the maximum lightmap page size... //----------------------------------------------------------------------------- int CMatLightmaps::GetMaxLightmapPageWidth() const { // FIXME: It's unclear which we want here. // It doesn't drastically increase primitives per DrawIndexedPrimitive // call at the moment to increase it, so let's not for now. // If we're using dynamic textures though, we want bigger that's for sure. // The tradeoff here is how much memory we waste if we don't fill the lightmap // We need to go to 512x256 textures because that's the only way bumped // lighting on displacements can work given the 128x128 allowance.. int nWidth = 512; if ( nWidth > HardwareConfig()->MaxTextureWidth() ) nWidth = HardwareConfig()->MaxTextureWidth(); return nWidth; } //----------------------------------------------------------------------------- // //----------------------------------------------------------------------------- int CMatLightmaps::GetMaxLightmapPageHeight() const { int nHeight = 256; if ( nHeight > HardwareConfig()->MaxTextureHeight() ) nHeight = HardwareConfig()->MaxTextureHeight(); return nHeight; } //----------------------------------------------------------------------------- // Returns the lightmap page size //----------------------------------------------------------------------------- void CMatLightmaps::GetLightmapPageSize( int lightmapPageID, int *pWidth, int *pHeight ) const { switch( lightmapPageID ) { default: Assert( lightmapPageID >= 0 && lightmapPageID < GetNumLightmapPages() ); *pWidth = m_pLightmapPages[lightmapPageID].m_Width; *pHeight = m_pLightmapPages[lightmapPageID].m_Height; break; case MATERIAL_SYSTEM_LIGHTMAP_PAGE_USER_DEFINED: *pWidth = *pHeight = 1; AssertOnce( !"Can't use CMatLightmaps to get properties of MATERIAL_SYSTEM_LIGHTMAP_PAGE_USER_DEFINED" ); break; case MATERIAL_SYSTEM_LIGHTMAP_PAGE_WHITE: case MATERIAL_SYSTEM_LIGHTMAP_PAGE_WHITE_BUMP: *pWidth = *pHeight = 1; break; } } //----------------------------------------------------------------------------- // //----------------------------------------------------------------------------- int CMatLightmaps::GetLightmapWidth( int lightmapPageID ) const { switch( lightmapPageID ) { default: Assert( lightmapPageID >= 0 && lightmapPageID < GetNumLightmapPages() ); return m_pLightmapPages[lightmapPageID].m_Width; case MATERIAL_SYSTEM_LIGHTMAP_PAGE_USER_DEFINED: AssertOnce( !"Can't use CMatLightmaps to get properties of MATERIAL_SYSTEM_LIGHTMAP_PAGE_USER_DEFINED" ); return 1; case MATERIAL_SYSTEM_LIGHTMAP_PAGE_WHITE: case MATERIAL_SYSTEM_LIGHTMAP_PAGE_WHITE_BUMP: return 1; } } //----------------------------------------------------------------------------- // //----------------------------------------------------------------------------- int CMatLightmaps::GetLightmapHeight( int lightmapPageID ) const { switch( lightmapPageID ) { default: Assert( lightmapPageID >= 0 && lightmapPageID < GetNumLightmapPages() ); return m_pLightmapPages[lightmapPageID].m_Height; case MATERIAL_SYSTEM_LIGHTMAP_PAGE_USER_DEFINED: AssertOnce( !"Can't use CMatLightmaps to get properties of MATERIAL_SYSTEM_LIGHTMAP_PAGE_USER_DEFINED" ); return 1; case MATERIAL_SYSTEM_LIGHTMAP_PAGE_WHITE: case MATERIAL_SYSTEM_LIGHTMAP_PAGE_WHITE_BUMP: return 1; } } //----------------------------------------------------------------------------- // Clean up lightmap pages. //----------------------------------------------------------------------------- void CMatLightmaps::CleanupLightmaps() { if ( mat_lightmap_pfms.GetBool()) { // Write PFM files containing lightmap data for this page for (int lightmap = 0; lightmap < GetNumLightmapPages(); lightmap++) { if ((NULL != m_pLightmapDataPtrArray) && (NULL != m_pLightmapDataPtrArray[lightmap])) { char szPFMFileName[MAX_PATH]; sprintf(szPFMFileName, "Lightmap-Page-%d.pfm", lightmap); m_pLightmapDataPtrArray[lightmap]->WritePFM(szPFMFileName); } } } // Remove the lightmap data bitmap representations if (m_pLightmapDataPtrArray) { int i; for( i = 0; i < GetNumLightmapPages(); i++ ) { delete m_pLightmapDataPtrArray[i]; } delete [] m_pLightmapDataPtrArray; m_pLightmapDataPtrArray = NULL; } // delete old lightmap pages if( m_pLightmapPages ) { int i; for( i = 0; i < GetNumLightmapPages(); i++ ) { g_pShaderAPI->DeleteTexture( m_LightmapPageTextureHandles[i] ); } delete [] m_pLightmapPages; m_pLightmapPages = 0; } m_NumLightmapPages = 0; } //----------------------------------------------------------------------------- // Resets the lightmap page info for each material //----------------------------------------------------------------------------- void CMatLightmaps::ResetMaterialLightmapPageInfo( void ) { for (MaterialHandle_t i = GetMaterialDict()->FirstMaterial(); i != GetMaterialDict()->InvalidMaterial(); i = GetMaterialDict()->NextMaterial(i) ) { IMaterialInternal *pMaterial = GetMaterialInternal(i); pMaterial->SetMinLightmapPageID( 9999 ); pMaterial->SetMaxLightmapPageID( -9999 ); pMaterial->SetNeedsWhiteLightmap( false ); } } //----------------------------------------------------------------------------- // This is called before any lightmap allocations take place //----------------------------------------------------------------------------- void CMatLightmaps::BeginLightmapAllocation() { // delete old lightmap pages CleanupLightmaps(); m_ImagePackers.RemoveAll(); int i = m_ImagePackers.AddToTail(); m_ImagePackers[i].Reset( 0, GetMaxLightmapPageWidth(), GetMaxLightmapPageHeight() ); SetCurrentMaterialInternal(0); m_currentWhiteLightmapMaterial = 0; m_numSortIDs = 0; // need to set the min and max sorting id number for each material to // a default value that basically means that it hasn't been used yet. ResetMaterialLightmapPageInfo(); EnumerateMaterials(); } //----------------------------------------------------------------------------- // Allocates space in the lightmaps; must be called after BeginLightmapAllocation //----------------------------------------------------------------------------- int CMatLightmaps::AllocateLightmap( int width, int height, int offsetIntoLightmapPage[2], IMaterial *iMaterial ) { IMaterialInternal *pMaterial = static_cast( iMaterial ); if ( !pMaterial ) { Warning( "Programming error: CMatRenderContext::AllocateLightmap: NULL material\n" ); return m_numSortIDs; } pMaterial = pMaterial->GetRealTimeVersion(); //always work with the real time versions of materials internally // material change int i; int nPackCount = m_ImagePackers.Count(); if ( GetCurrentMaterialInternal() != pMaterial ) { // If this happens, then we need to close out all image packers other than // the last one so as to produce as few sort IDs as possible for ( i = nPackCount - 1; --i >= 0; ) { // NOTE: We *must* use the order preserving one here so the remaining one // is the last lightmap m_ImagePackers.Remove( i ); --nPackCount; } // If it's not the first material, increment the sort id if (GetCurrentMaterialInternal()) { m_ImagePackers[0].IncrementSortId( ); ++m_numSortIDs; } SetCurrentMaterialInternal(pMaterial); // This assertion guarantees we don't see the same material twice in this loop. Assert( pMaterial->GetMinLightmapPageID( ) > pMaterial->GetMaxLightmapPageID() ); // NOTE: We may not use this lightmap page, but we might // we won't know for sure until the next material is passed in. // So, for now, we're going to forcibly add the current lightmap // page to this material so the sort IDs work out correctly. GetCurrentMaterialInternal()->SetMinLightmapPageID( GetNumLightmapPages() ); GetCurrentMaterialInternal()->SetMaxLightmapPageID( GetNumLightmapPages() ); } // Try to add it to any of the current images... bool bAdded = false; for ( i = 0; i < nPackCount; ++i ) { bAdded = m_ImagePackers[i].AddBlock( width, height, &offsetIntoLightmapPage[0], &offsetIntoLightmapPage[1] ); if ( bAdded ) break; } if ( !bAdded ) { ++m_numSortIDs; i = m_ImagePackers.AddToTail(); m_ImagePackers[i].Reset( m_numSortIDs, GetMaxLightmapPageWidth(), GetMaxLightmapPageHeight() ); ++m_NumLightmapPages; if ( !m_ImagePackers[i].AddBlock( width, height, &offsetIntoLightmapPage[0], &offsetIntoLightmapPage[1] ) ) { Error( "MaterialSystem_Interface_t::AllocateLightmap: lightmap (%dx%d) too big to fit in page (%dx%d)\n", width, height, GetMaxLightmapPageWidth(), GetMaxLightmapPageHeight() ); } // Add this lightmap to the material... GetCurrentMaterialInternal()->SetMaxLightmapPageID( GetNumLightmapPages() ); } return m_ImagePackers[i].GetSortId(); } // UNDONE: This needs testing, but it appears as though creating these textures managed // results in huge stalls whenever they are locked for modify. // That makes sense given the d3d docs, but these have been flagged as managed for quite some time. #define DYNAMIC_TEXTURES_NO_BACKING 1 void CMatLightmaps::EndLightmapAllocation() { // count the last page that we were on.if it wasn't // and count the last sortID that we were on m_NumLightmapPages++; m_numSortIDs++; m_firstDynamicLightmap = m_NumLightmapPages; // UNDONE: Until we start using the separate dynamic lighting textures don't allocate them // NOTE: Enable this if we want to stop locking the base lightmaps and instead only lock update // these completely dynamic pages // m_NumLightmapPages += COUNT_DYNAMIC_LIGHTMAP_PAGES; m_dynamic.Init(); // Compute the dimensions of the last lightmap int lastLightmapPageWidth, lastLightmapPageHeight; int nLastIdx = m_ImagePackers.Count(); m_ImagePackers[nLastIdx - 1].GetMinimumDimensions( &lastLightmapPageWidth, &lastLightmapPageHeight ); m_ImagePackers.Purge(); m_pLightmapPages = new LightmapPageInfo_t[GetNumLightmapPages()]; Assert( m_pLightmapPages ); if ( mat_lightmap_pfms.GetBool()) { // This array will be used to write PFM files full of lightmap data m_pLightmapDataPtrArray = new FloatBitMap_t*[GetNumLightmapPages()]; } int i; m_LightmapPageTextureHandles.EnsureCapacity( GetNumLightmapPages() ); for ( i = 0; i < GetNumLightmapPages(); i++ ) { // Compute lightmap dimensions bool lastStaticLightmap = ( i == (m_firstDynamicLightmap-1)); m_pLightmapPages[i].m_Width = (unsigned short)(lastStaticLightmap ? lastLightmapPageWidth : GetMaxLightmapPageWidth()); m_pLightmapPages[i].m_Height = (unsigned short)(lastStaticLightmap ? lastLightmapPageHeight : GetMaxLightmapPageHeight()); m_pLightmapPages[i].m_Flags = 0; AllocateLightmapTexture( i ); if ( mat_lightmap_pfms.GetBool()) { // Initialize the pointers to lightmap data m_pLightmapDataPtrArray[i] = NULL; } } } //----------------------------------------------------------------------------- // Allocate lightmap textures //----------------------------------------------------------------------------- void CMatLightmaps::AllocateLightmapTexture( int lightmap ) { bool bUseDynamicTextures = HardwareConfig()->PreferDynamicTextures(); int flags = bUseDynamicTextures ? TEXTURE_CREATE_DYNAMIC : TEXTURE_CREATE_MANAGED; m_LightmapPageTextureHandles.EnsureCount( lightmap + 1 ); char debugName[256]; Q_snprintf( debugName, sizeof( debugName ), "[lightmap %d]", lightmap ); ImageFormat imageFormat; switch ( HardwareConfig()->GetHDRType() ) { default: Assert( 0 ); // fall through. case HDR_TYPE_NONE: #if !defined( _X360 ) imageFormat = IMAGE_FORMAT_RGBA8888; flags |= TEXTURE_CREATE_SRGB; #else imageFormat = IMAGE_FORMAT_LINEAR_RGBA8888; #endif break; case HDR_TYPE_INTEGER: #if !defined( _X360 ) imageFormat = IMAGE_FORMAT_RGBA16161616; #else # if ( defined( USE_32BIT_LIGHTMAPS_ON_360 ) ) imageFormat = IMAGE_FORMAT_LINEAR_RGBA8888; # else imageFormat = IMAGE_FORMAT_LINEAR_RGBA16161616; # endif #endif break; case HDR_TYPE_FLOAT: imageFormat = IMAGE_FORMAT_RGBA16161616F; break; } switch ( m_eLightmapsState ) { case STATE_DEFAULT: // Allow allocations in default state { m_LightmapPageTextureHandles[lightmap] = g_pShaderAPI->CreateTexture( GetLightmapWidth(lightmap), GetLightmapHeight(lightmap), 1, imageFormat, 1, 1, flags, debugName, TEXTURE_GROUP_LIGHTMAP ); // don't mipmap lightmaps // Load up the texture data g_pShaderAPI->ModifyTexture( m_LightmapPageTextureHandles[lightmap] ); g_pShaderAPI->TexMinFilter( SHADER_TEXFILTERMODE_LINEAR ); g_pShaderAPI->TexMagFilter( SHADER_TEXFILTERMODE_LINEAR ); if ( !bUseDynamicTextures ) { g_pShaderAPI->TexSetPriority( 1 ); } // Blat out the lightmap bits InitLightmapBits( lightmap ); } break; case STATE_RELEASED: // Not assigned m_LightmapPageTextureHandles[lightmap]; DevMsg( "AllocateLightmapTexture(%d) in released lightmap state (STATE_RELEASED), delayed till \"Restore\".\n", lightmap ); return; default: // Not assigned m_LightmapPageTextureHandles[lightmap]; Warning( "AllocateLightmapTexture(%d) in unknown lightmap state (%d), skipped.\n", lightmap, m_eLightmapsState ); Assert( !"AllocateLightmapTexture(?) in unknown lightmap state (?)" ); return; } } int CMatLightmaps::AllocateWhiteLightmap( IMaterial *iMaterial ) { IMaterialInternal *pMaterial = static_cast( iMaterial ); if( !pMaterial ) { Warning( "Programming error: CMatRenderContext::AllocateWhiteLightmap: NULL material\n" ); return m_numSortIDs; } pMaterial = pMaterial->GetRealTimeVersion(); //always work with the real time versions of materials internally if ( !m_currentWhiteLightmapMaterial || ( m_currentWhiteLightmapMaterial != pMaterial ) ) { if ( !GetCurrentMaterialInternal() && !m_currentWhiteLightmapMaterial ) { // don't increment if this is the very first material (ie. no lightmaps // allocated with AllocateLightmap // Assert( 0 ); } else { // material change m_numSortIDs++; #if 0 char buf[128]; Q_snprintf( buf, sizeof( buf ), "AllocateWhiteLightmap: m_numSortIDs = %d %s\n", m_numSortIDs, pMaterial->GetName() ); OutputDebugString( buf ); #endif } // Warning( "%d material: \"%s\" lightmapPageID: -1\n", m_numSortIDs, pMaterial->GetName() ); m_currentWhiteLightmapMaterial = pMaterial; pMaterial->SetNeedsWhiteLightmap( true ); } return m_numSortIDs; } //----------------------------------------------------------------------------- // Releases/restores lightmap pages //----------------------------------------------------------------------------- void CMatLightmaps::ReleaseLightmapPages() { switch ( m_eLightmapsState ) { case STATE_DEFAULT: // Allow release in default state only break; default: Warning( "ReleaseLightmapPages is expected in STATE_DEFAULT, current state = %d, discarded.\n", m_eLightmapsState ); Assert( !"ReleaseLightmapPages is expected in STATE_DEFAULT" ); return; } for( int i = 0; i < GetNumLightmapPages(); i++ ) { g_pShaderAPI->DeleteTexture( m_LightmapPageTextureHandles[i] ); } // We are now in released state m_eLightmapsState = STATE_RELEASED; } void CMatLightmaps::RestoreLightmapPages() { switch ( m_eLightmapsState ) { case STATE_RELEASED: // Allow restore in released state only break; default: Warning( "RestoreLightmapPages is expected in STATE_RELEASED, current state = %d, discarded.\n", m_eLightmapsState ); Assert( !"RestoreLightmapPages is expected in STATE_RELEASED" ); return; } // Switch to default state to allow allocations m_eLightmapsState = STATE_DEFAULT; for( int i = 0; i < GetNumLightmapPages(); i++ ) { AllocateLightmapTexture( i ); } } //----------------------------------------------------------------------------- // This initializes the lightmap bits //----------------------------------------------------------------------------- void CMatLightmaps::InitLightmapBits( int lightmap ) { VPROF_( "CMatLightmaps::InitLightmapBits", 1, VPROF_BUDGETGROUP_DLIGHT_RENDERING, false, 0 ); int width = GetLightmapWidth(lightmap); int height = GetLightmapHeight(lightmap); CPixelWriter writer; g_pShaderAPI->ModifyTexture( m_LightmapPageTextureHandles[lightmap] ); if ( !g_pShaderAPI->TexLock( 0, 0, 0, 0, width, height, writer ) ) return; // Debug mode, make em green checkerboard if ( writer.IsUsingFloatFormat() ) { for ( int j = 0; j < height; ++j ) { writer.Seek( 0, j ); for ( int k = 0; k < width; ++k ) { #ifndef _DEBUG writer.WritePixel( 1.0f, 1.0f, 1.0f ); #else // _DEBUG if( ( j + k ) & 1 ) { writer.WritePixelF( 0.0f, 1.0f, 0.0f ); } else { writer.WritePixelF( 0.0f, 0.0f, 0.0f ); } #endif // _DEBUG } } } else { for ( int j = 0; j < height; ++j ) { writer.Seek( 0, j ); for ( int k = 0; k < width; ++k ) { #ifndef _DEBUG // note: make this white to find multisample centroid sampling problems. // writer.WritePixel( 255, 255, 255 ); writer.WritePixel( 0, 0, 0 ); #else // _DEBUG if ( ( j + k ) & 1 ) { writer.WritePixel( 0, 255, 0 ); } else { writer.WritePixel( 0, 0, 0 ); } #endif // _DEBUG } } } g_pShaderAPI->TexUnlock(); } bool CMatLightmaps::LockLightmap( int lightmap ) { // Warning( "locking lightmap page: %d\n", lightmap ); VPROF_INCREMENT_COUNTER( "lightmap fullpage texlock", 1 ); if( m_nLockedLightmap != -1 ) { g_pShaderAPI->TexUnlock(); } g_pShaderAPI->ModifyTexture( m_LightmapPageTextureHandles[lightmap] ); int pageWidth = m_pLightmapPages[lightmap].m_Width; int pageHeight = m_pLightmapPages[lightmap].m_Height; if (!g_pShaderAPI->TexLock( 0, 0, 0, 0, pageWidth, pageHeight, m_LightmapPixelWriter )) { Assert( 0 ); return false; } m_nLockedLightmap = lightmap; return true; } Vector4D ConvertLightmapColorToRGBScale( const float *lightmapColor ) { Vector4D result; float fScale = lightmapColor[0]; for( int i = 1; i != 3; ++i ) { if( lightmapColor[i] > fScale ) fScale = lightmapColor[i]; } fScale = ceil( fScale * (255.0f/16.0f) ) * (16.0f/255.0f); fScale = min( fScale, 16.0f ); float fInvScale = 1.0f / fScale; for( int i = 0; i != 3; ++i ) { result[i] = lightmapColor[i] * fInvScale; result[i] = ceil( result[i] * 255.0f ) * (1.0f/255.0f); result[i] = min( result[i], 1.0f ); } fScale /= 16.0f; result.w = fScale; return result; } #ifdef _X360 // SIMD version of above // input numbers from pSrc are on the domain [0..16] // output is RGBA // ignores contents of w channel of input // the shader does this: rOut = Rin * Ain * 16.0f // where Rin is [0..1], a float computed from a byte value [0..255] // Ain is therefore the brightest channel (say R) divided by 16 and quantized // Rin is computed from pSrc->r by dividing by Ain // this outputs RGBa where RGB are [0..255] and a is the shader's scaling factor (also 0..255) // // WARNING - this code appears to be vulnerable to a compiler bug. Be very careful modifying and be // sure to test fltx4 ConvertLightmapColorToRGBScale( FLTX4 lightmapColor ) { static const fltx4 vTwoFiftyFive = {255.0f, 255.0f, 255.0f, 255.0f}; static const fltx4 FourPoint1s = { 0.1, 0.1, 0.1, 0.1 }; static const fltx4 vTwoFiftyFiveOverSixteen = {255.0f / 16.0f, 255.0f / 16.0f, 255.0f / 16.0f, 255.0f / 16.0f}; // static const fltx4 vSixteenOverTwoFiftyFive = { 16.0f / 255.0f, 16.0f / 255.0f, 16.0f / 255.0f, 16.0f / 255.0f }; // find the highest color value in lightmapColor and replicate it fltx4 scale = FindHighestSIMD3( lightmapColor ); fltx4 minscale = FindLowestSIMD3( lightmapColor ); fltx4 fl4OutofRange = OrSIMD( CmpGeSIMD( scale, Four_Ones ), CmpLeSIMD( scale, FourPoint1s ) ); fl4OutofRange = OrSIMD( fl4OutofRange, CmpGtSIMD( minscale, MulSIMD( Four_PointFives, scale ) ) ); // scale needs to be divided by 16 (because the shader multiplies it by 16) // then mapped to 0..255 and quantized. scale = __vrfip(MulSIMD(scale, vTwoFiftyFiveOverSixteen)); // scale = ceil(scale * 255/16) fltx4 result = MulSIMD(vTwoFiftyFive, lightmapColor); // start the scale cooking on the final result fltx4 invScale = ReciprocalEstSIMD(scale); // invScale = (16/255)(1/scale). may be +inf invScale = MulSIMD(invScale, vTwoFiftyFiveOverSixteen); // take the quantizing factor back out // of the inverse scale (one less // dependent op if you do it this way) // scale the input channels // compute so the numbers are all 0..255 ints. (if one happens to // be 256 due to numerical error in the reciprocation, the unsigned-saturate // store we'll use later on will bake it back down to 255) result = MulSIMD(result, invScale); // now, output -- // if the input color was nonzero, slip the scale into return value's w // component and return. If the input was zero, return zero. result = MaskedAssign( fl4OutofRange, SetWSIMD( result, scale ), SetWSIMD( MulSIMD( lightmapColor, vTwoFiftyFive ), vTwoFiftyFiveOverSixteen ) ); return result; } #endif // write bumped lightmap update to LDR 8-bit lightmap void CMatLightmaps::BumpedLightmapBitsToPixelWriter_LDR( float* pFloatImage, float *pFloatImageBump1, float *pFloatImageBump2, float *pFloatImageBump3, int pLightmapSize[2], int pOffsetIntoLightmapPage[2], FloatBitMap_t *pfmOut ) { const int nLightmapSize0 = pLightmapSize[0]; const int nLightmap0WriterSizeBytes = nLightmapSize0 * m_LightmapPixelWriter.GetPixelSize(); const int nRewindToNextPixel = -( ( nLightmap0WriterSizeBytes * 3 ) - m_LightmapPixelWriter.GetPixelSize() ); for( int t = 0; t < pLightmapSize[1]; t++ ) { int srcTexelOffset = ( sizeof( Vector4D ) / sizeof( float ) ) * ( 0 + t * nLightmapSize0 ); m_LightmapPixelWriter.Seek( pOffsetIntoLightmapPage[0], pOffsetIntoLightmapPage[1] + t ); for( int s = 0; s < nLightmapSize0; s++, m_LightmapPixelWriter.SkipBytes(nRewindToNextPixel),srcTexelOffset += (sizeof(Vector4D)/sizeof(float))) { unsigned char color[4][3]; ColorSpace::LinearToBumpedLightmap( &pFloatImage[srcTexelOffset], &pFloatImageBump1[srcTexelOffset], &pFloatImageBump2[srcTexelOffset], &pFloatImageBump3[srcTexelOffset], color[0], color[1], color[2], color[3] ); unsigned char alpha = RoundFloatToByte( pFloatImage[srcTexelOffset+3] * 255.0f ); m_LightmapPixelWriter.WritePixelNoAdvance( color[0][0], color[0][1], color[0][2], alpha ); m_LightmapPixelWriter.SkipBytes( nLightmap0WriterSizeBytes ); m_LightmapPixelWriter.WritePixelNoAdvance( color[1][0], color[1][1], color[1][2], alpha ); m_LightmapPixelWriter.SkipBytes( nLightmap0WriterSizeBytes ); m_LightmapPixelWriter.WritePixelNoAdvance( color[2][0], color[2][1], color[2][2], alpha ); m_LightmapPixelWriter.SkipBytes( nLightmap0WriterSizeBytes ); m_LightmapPixelWriter.WritePixelNoAdvance( color[3][0], color[3][1], color[3][2], alpha ); } } if ( pfmOut ) { for( int t = 0; t < pLightmapSize[1]; t++ ) { int srcTexelOffset = ( sizeof( Vector4D ) / sizeof( float ) ) * ( 0 + t * nLightmapSize0 ); for( int s = 0; s < nLightmapSize0; s++,srcTexelOffset += (sizeof(Vector4D)/sizeof(float))) { unsigned char color[4][3]; ColorSpace::LinearToBumpedLightmap( &pFloatImage[srcTexelOffset], &pFloatImageBump1[srcTexelOffset], &pFloatImageBump2[srcTexelOffset], &pFloatImageBump3[srcTexelOffset], color[0], color[1], color[2], color[3] ); unsigned char alpha = RoundFloatToByte( pFloatImage[srcTexelOffset+3] * 255.0f ); // Write data to the bitmapped represenations so that PFM files can be written PixRGBAF pixelData; pixelData.Red = color[0][0]; pixelData.Green = color[0][1]; pixelData.Blue = color[0][2]; pixelData.Alpha = alpha; pfmOut->WritePixelRGBAF( pOffsetIntoLightmapPage[0] + s, pOffsetIntoLightmapPage[1] + t, pixelData); } } } } // write bumped lightmap update to HDR float lightmap void CMatLightmaps::BumpedLightmapBitsToPixelWriter_HDRF( float* pFloatImage, float *pFloatImageBump1, float *pFloatImageBump2, float *pFloatImageBump3, int pLightmapSize[2], int pOffsetIntoLightmapPage[2], FloatBitMap_t *pfmOut ) { if ( IsX360() ) { // 360 does not support HDR float mode Assert( 0 ); return; } Assert( !pfmOut ); // unsupported in this mode const int nLightmapSize0 = pLightmapSize[0]; const int nLightmap0WriterSizeBytes = nLightmapSize0 * m_LightmapPixelWriter.GetPixelSize(); const int nRewindToNextPixel = -( ( nLightmap0WriterSizeBytes * 3 ) - m_LightmapPixelWriter.GetPixelSize() ); for( int t = 0; t < pLightmapSize[1]; t++ ) { int srcTexelOffset = ( sizeof( Vector4D ) / sizeof( float ) ) * ( 0 + t * nLightmapSize0 ); m_LightmapPixelWriter.Seek( pOffsetIntoLightmapPage[0], pOffsetIntoLightmapPage[1] + t ); for( int s = 0; s < nLightmapSize0; s++, m_LightmapPixelWriter.SkipBytes(nRewindToNextPixel),srcTexelOffset += (sizeof(Vector4D)/sizeof(float))) { m_LightmapPixelWriter.WritePixelNoAdvanceF( pFloatImage[srcTexelOffset], pFloatImage[srcTexelOffset+1], pFloatImage[srcTexelOffset+2], pFloatImage[srcTexelOffset+3] ); m_LightmapPixelWriter.SkipBytes( nLightmap0WriterSizeBytes ); m_LightmapPixelWriter.WritePixelNoAdvanceF( pFloatImageBump1[srcTexelOffset], pFloatImageBump1[srcTexelOffset+1], pFloatImageBump1[srcTexelOffset+2], pFloatImage[srcTexelOffset+3] ); m_LightmapPixelWriter.SkipBytes( nLightmap0WriterSizeBytes ); m_LightmapPixelWriter.WritePixelNoAdvanceF( pFloatImageBump2[srcTexelOffset], pFloatImageBump2[srcTexelOffset+1], pFloatImageBump2[srcTexelOffset+2], pFloatImage[srcTexelOffset+3] ); m_LightmapPixelWriter.SkipBytes( nLightmap0WriterSizeBytes ); m_LightmapPixelWriter.WritePixelNoAdvanceF( pFloatImageBump3[srcTexelOffset], pFloatImageBump3[srcTexelOffset+1], pFloatImageBump3[srcTexelOffset+2], pFloatImage[srcTexelOffset+3] ); } } } #ifdef _X360 #pragma optimize("u", on) #endif #ifdef _X360 namespace { // pack a pixel into BGRA8888 and return it with the data packed into the w component FORCEINLINE fltx4 PackPixel_BGRA8888( FLTX4 rgba ) { // this happens to be in an order such that we can use the handy builtin packing op // clamp to 0..255 (coz it might have leaked over) static const fltx4 vTwoFiftyFive = {255.0f, 255.0f, 255.0f, 255.0f}; // the magic number such that when mul-accummulated against rbga, // gets us a representation 3.0 + (r)*2^-22 -- puts the bits at // the bottom of the float static const XMVECTOR PackScale = { (1.0f / (FLOAT)(1 << 22)), (1.0f / (FLOAT)(1 << 22)), (1.0f / (FLOAT)(1 << 22)), (1.0f / (FLOAT)(1 << 22))}; // 255.0f / (FLOAT)(1 << 22) static const XMVECTOR Three = {3.0f, 3.0f, 3.0f, 3.0f}; fltx4 N = MinSIMD(vTwoFiftyFive, rgba); N = __vmaddfp(N, PackScale, Three); N = __vpkd3d(N, N, VPACK_D3DCOLOR, VPACK_32, 0); // pack into w word return N; } // A small store-gather buffer used in the // BumpedLightmapBitsToPixelWriter_HDRI_BGRA_X360(). // The store-gather buffers. Hopefully these will live in the L1 // cache, which will make writing to them, then to memory, faster // than just using __stvewx to write directly into WC memory // one noncontiguous float at a time. (If there weren't a huge // compiler bug with __stvewx in the Apr07 XDK, that might not // be the case.) struct ALIGN128 CPixelWriterStoreGather { enum { kRows = 4, kWordsPerRow = 32, }; ALIGN128 uint32 m_data[kRows][kWordsPerRow]; // four rows of bgra data, aligned to 4 cache lines. dwords so memcpy works better. int m_wordsGathered; int m_bytesBetweenWriterRows; // the number of bytes spacing the maps inside the writer from each other // if we weren't gathering, we'd SkipBytes this many between the base map, bump1, etc. // write four rows, as SIMD registers, into the buffers inline void write( CPixelWriter * RESTRICT pLightmapPixelWriter, FLTX4 row0, FLTX4 row1, FLTX4 row2, FLTX4 row3 ) RESTRICT { // if full, commit Assert(m_wordsGathered <= kWordsPerRow); AssertMsg((m_wordsGathered & 3) == 0, "Don't call CPixelWriterStoreGather::write after ::writeJustX"); // single-word writes have misaligned me if (m_wordsGathered >= kWordsPerRow) { commitWhenFull(pLightmapPixelWriter); } XMStoreVector4A( &m_data[0][m_wordsGathered], row0 ); XMStoreVector4A( &m_data[1][m_wordsGathered], row1 ); XMStoreVector4A( &m_data[2][m_wordsGathered], row2 ); XMStoreVector4A( &m_data[3][m_wordsGathered], row3 ); m_wordsGathered += 4 ; // four words per simd vec } // pluck the w component out of each of the rows, and store it into the gather buffer. Don't // call the other write function after calling this. inline void writeJustW( CPixelWriter * RESTRICT pLightmapPixelWriter, FLTX4 row0, FLTX4 row1, FLTX4 row2, FLTX4 row3 ) RESTRICT { // if full, commit Assert(m_wordsGathered <= kWordsPerRow); if (m_wordsGathered >= kWordsPerRow) { commitWhenFull(pLightmapPixelWriter); } // for each fltx4, splat out x and then use the __stvewx to store // whichever word happens to align with the float pointer through // that pointer. __stvewx(__vspltw(row0, 3), &m_data[0][m_wordsGathered], 0 ); __stvewx(__vspltw(row1, 3), &m_data[1][m_wordsGathered], 0 ); __stvewx(__vspltw(row2, 3), &m_data[2][m_wordsGathered], 0 ); __stvewx(__vspltw(row3, 3), &m_data[3][m_wordsGathered], 0 ); m_wordsGathered += 1 ; // only stored one word } // Commit my buffers to the pixelwriter's memory, and advance its // pointer. void commit(CPixelWriter * RESTRICT pLightmapPixelWriter) RESTRICT { if (m_wordsGathered > 0) { unsigned char* RESTRICT pWriteInto = pLightmapPixelWriter->GetCurrentPixel(); // we have to use memcpy because we're writing to non-cacheable memory, // but we can't even assume that the addresses we're writing to are // vector-aligned. #ifdef memcpy // if someone's overriden the intrinsic, complain #pragma error("You have overridden memcpy(), which is an XBOX360 intrinsic. This function will not behave optimally.") #endif memcpy(pWriteInto, m_data[0], m_wordsGathered * sizeof(uint32)); pWriteInto += m_bytesBetweenWriterRows; memcpy(pWriteInto, m_data[1], m_wordsGathered * sizeof(uint32)); pWriteInto += m_bytesBetweenWriterRows; memcpy(pWriteInto, m_data[2], m_wordsGathered * sizeof(uint32)); pWriteInto += m_bytesBetweenWriterRows; memcpy(pWriteInto, m_data[3], m_wordsGathered * sizeof(uint32)); pLightmapPixelWriter->SkipBytes(m_wordsGathered * sizeof(uint32)); m_wordsGathered = 0; } } // like commit, but the version we use when we know we're full. // Takes advantage of better compile-time generation for // memcpy. void commitWhenFull(CPixelWriter * RESTRICT pLightmapPixelWriter) RESTRICT { unsigned char* RESTRICT pWriteInto = pLightmapPixelWriter->GetCurrentPixel(); // we have to use memcpy because we're writing to non-cacheable memory, // but we can't even assume that the addresses we're writing to are // vector-aligned. #ifdef memcpy // if someone's overriden the intrinsic, complain #pragma error("You have overridden memcpy(), which is an XBOX360 intrinsic. This function will not behave optimally.") #endif // if we're full, use compile-time known version of // mempcy to take advantage of its ability to generate // inline code. In fact, use the dword-aligned // version so that we use the 64-bit writing funcs. Assert( m_wordsGathered == kWordsPerRow ); COMPILE_TIME_ASSERT((kWordsPerRow & 3) == 0); // the number of words per row has to be a multiple of four memcpy(pWriteInto, reinterpret_cast(m_data[0]), kWordsPerRow * sizeof(uint32)); pWriteInto += m_bytesBetweenWriterRows; memcpy(pWriteInto, reinterpret_cast(m_data[1]), kWordsPerRow * sizeof(uint32)); pWriteInto += m_bytesBetweenWriterRows; memcpy(pWriteInto, reinterpret_cast(m_data[2]), kWordsPerRow * sizeof(uint32)); pWriteInto += m_bytesBetweenWriterRows; memcpy(pWriteInto, reinterpret_cast(m_data[3]), kWordsPerRow * sizeof(uint32)); pLightmapPixelWriter->SkipBytes(m_wordsGathered * sizeof(uint32)); m_wordsGathered = 0; } // parameter: space between bump pages in the pixelwriter CPixelWriterStoreGather(int writerSizeBytes) : m_wordsGathered(0), m_bytesBetweenWriterRows(writerSizeBytes) {}; }; } // this is a function for specifically writing bumped BGRA lightmaps -- in order for it // to be properly scheduled, I needed to break out the inline functions. Also, // to make the write-combined memory more efficient (and work around a bug in the // April 2007 XDK), we need to store-gather our writes on the cache before blasting // them out to write-combined memory. We can't simply write from the SIMD registers // into the pixelwriter's data, because the difference between the output rows, // eg nLightmap0WriterSizeBytes[0], might not be a multiple of 16. Unaligned stores // to non-cacheable memory cause an alignment exception. static void BumpedLightmapBitsToPixelWriter_HDRI_BGRA_X360( float* RESTRICT pFloatImage, float * RESTRICT pFloatImageBump1, float * RESTRICT pFloatImageBump2, float * RESTRICT pFloatImageBump3, int pLightmapSize[2], int pOffsetIntoLightmapPage[2], FloatBitMap_t *pfmOut, CPixelWriter * RESTRICT m_LightmapPixelWriter) { AssertMsg(m_LightmapPixelWriter->GetPixelSize() == 4, "BGRA format is no longer four bytes long? This is unsupported on 360, and probably immoral as well."); const int nLightmap0WriterSizeBytes = pLightmapSize[0] * 4 /*m_LightmapPixelWriter->GetPixelSize()*/; // const int nRewindToNextPixel = -( ( nLightmap0WriterSizeBytes * 3 ) - 4 ); // assert that 1 * 4 = 4 COMPILE_TIME_ASSERT(sizeof( Vector4D ) == sizeof(float) * 4); AssertMsg(!pfmOut, "Runtime conversion of lightmaps to files is no longer supported on 360.\n"); // The store-gather buffers. Hopefully these will live in the L1 // cache, which will make writing to them, then to memory, faster // than just using __stvewx to write directly into WC memory // one noncontiguous float at a time. (If there weren't a huge // compiler bug with __stvewx in the Apr07 XDK, that might not // be the case.) CPixelWriterStoreGather storeGather(nLightmap0WriterSizeBytes); for( int t = 0; t < pLightmapSize[1]; t++ ) { #define FOUR (sizeof( Vector4D ) / sizeof( float )) // make explicit when we're incrementing by length of a 4dvec int srcTexelOffset = ( FOUR ) * ( 0 + t * pLightmapSize[0] ); m_LightmapPixelWriter->Seek( pOffsetIntoLightmapPage[0], pOffsetIntoLightmapPage[1] + t ); // Our code works best when we can process luxels in groups of four. So, // figure out how many four-luxel groups we can process, // then do them in groups, then process the remainder. unsigned int groupsOfFourLimit = (((unsigned int)pLightmapSize[0]) & ~3); // we want to hang on to this index when we're done with groups so we can do the remainder. unsigned int s; // counts the number of luxels processed for( s = 0; s < groupsOfFourLimit; s += 4, srcTexelOffset += 4 * ( FOUR )) { static const fltx4 vSixteen = {16.0f, 16.0f, 16.0f, 16.0f}; // the store-gather simds fltx4 outBaseMap = Four_Zeros, outBump1 = Four_Zeros, outBump2 = Four_Zeros, outBump3 = Four_Zeros; // we'll read four at a time fltx4 vFloatImage[4], vFloatImageBump1[4], vFloatImageBump2[4], vFloatImageBump3[4]; // stripe these loads to cause less ERAT thrashing vFloatImage[0] = LoadUnalignedSIMD(pFloatImage + srcTexelOffset ); vFloatImage[1] = LoadUnalignedSIMD(pFloatImage + srcTexelOffset + 4 ); vFloatImage[2] = LoadUnalignedSIMD(pFloatImage + srcTexelOffset + 8 ); vFloatImage[3] = LoadUnalignedSIMD(pFloatImage + srcTexelOffset + 12 ); vFloatImageBump1[0] = LoadUnalignedSIMD(pFloatImageBump1 + srcTexelOffset ); vFloatImageBump1[1] = LoadUnalignedSIMD(pFloatImageBump1 + srcTexelOffset + 4 ); vFloatImageBump1[2] = LoadUnalignedSIMD(pFloatImageBump1 + srcTexelOffset + 8 ); vFloatImageBump1[3] = LoadUnalignedSIMD(pFloatImageBump1 + srcTexelOffset + 12 ); vFloatImageBump2[0] = LoadUnalignedSIMD(pFloatImageBump2 + srcTexelOffset ); vFloatImageBump2[1] = LoadUnalignedSIMD(pFloatImageBump2 + srcTexelOffset + 4 ); vFloatImageBump2[2] = LoadUnalignedSIMD(pFloatImageBump2 + srcTexelOffset + 8 ); vFloatImageBump2[3] = LoadUnalignedSIMD(pFloatImageBump2 + srcTexelOffset + 12 ); vFloatImageBump3[0] = LoadUnalignedSIMD(pFloatImageBump3 + srcTexelOffset ); vFloatImageBump3[1] = LoadUnalignedSIMD(pFloatImageBump3 + srcTexelOffset + 4 ); vFloatImageBump3[2] = LoadUnalignedSIMD(pFloatImageBump3 + srcTexelOffset + 8 ); vFloatImageBump3[3] = LoadUnalignedSIMD(pFloatImageBump3 + srcTexelOffset + 12 ); // perform an arcane averaging operation upon the bump map values // (todo: make this not an inline so it will schedule better -- inlining is // done by the linker, which is too late for operation scheduling) ColorSpace::LinearToBumpedLightmap( vFloatImage[0], vFloatImageBump1[0], vFloatImageBump2[0], vFloatImageBump3[0], // transform "in place": vFloatImage[0], vFloatImageBump1[0], vFloatImageBump2[0], vFloatImageBump3[0] ); ColorSpace::LinearToBumpedLightmap( vFloatImage[1], vFloatImageBump1[1], vFloatImageBump2[1], vFloatImageBump3[1], // transform "in place": vFloatImage[1], vFloatImageBump1[1], vFloatImageBump2[1], vFloatImageBump3[1] ); ColorSpace::LinearToBumpedLightmap( vFloatImage[2], vFloatImageBump1[2], vFloatImageBump2[2], vFloatImageBump3[2], // transform "in place": vFloatImage[2], vFloatImageBump1[2], vFloatImageBump2[2], vFloatImageBump3[2] ); ColorSpace::LinearToBumpedLightmap( vFloatImage[3], vFloatImageBump1[3], vFloatImageBump2[3], vFloatImageBump3[3], // transform "in place": vFloatImage[3], vFloatImageBump1[3], vFloatImageBump2[3], vFloatImageBump3[3] ); // convert each color to RGB scaled. // DO NOT! make this into a for loop. The (April07 XDK) compiler // in fact DOES NOT unroll them, and will perform very naive // scheduling if you try. // clamp to 0..16 float vFloatImage[0] = MinSIMD(vFloatImage[0], vSixteen); vFloatImageBump1[0] = MinSIMD(vFloatImageBump1[0], vSixteen); vFloatImageBump2[0] = MinSIMD(vFloatImageBump2[0], vSixteen); vFloatImageBump3[0] = MinSIMD(vFloatImageBump3[0], vSixteen); vFloatImage[1] = MinSIMD(vFloatImage[1], vSixteen); vFloatImageBump1[1] = MinSIMD(vFloatImageBump1[1], vSixteen); vFloatImageBump2[1] = MinSIMD(vFloatImageBump2[1], vSixteen); vFloatImageBump3[1] = MinSIMD(vFloatImageBump3[1], vSixteen); vFloatImage[2] = MinSIMD(vFloatImage[2], vSixteen); vFloatImageBump1[2] = MinSIMD(vFloatImageBump1[2], vSixteen); vFloatImageBump2[2] = MinSIMD(vFloatImageBump2[2], vSixteen); vFloatImageBump3[2] = MinSIMD(vFloatImageBump3[2], vSixteen); vFloatImage[3] = MinSIMD(vFloatImage[3], vSixteen); vFloatImageBump1[3] = MinSIMD(vFloatImageBump1[3], vSixteen); vFloatImageBump2[3] = MinSIMD(vFloatImageBump2[3], vSixteen); vFloatImageBump3[3] = MinSIMD(vFloatImageBump3[3], vSixteen); // compute the scaling factor, place it in w, and // scale the rest by it. Obliterates whatever was // already in alpha. // This code is why it is important to not use a for // loop: you need to let the compiler keep the value // on registers (which it can't do if you use a // variable indexed array) and interleave the // inlined instructions. vFloatImage[0] = PackPixel_BGRA8888( ConvertLightmapColorToRGBScale(vFloatImage[0]) ); vFloatImageBump1[0] = PackPixel_BGRA8888( ConvertLightmapColorToRGBScale(vFloatImageBump1[0]) ); vFloatImageBump2[0] = PackPixel_BGRA8888( ConvertLightmapColorToRGBScale(vFloatImageBump2[0]) ); vFloatImageBump3[0] = PackPixel_BGRA8888( ConvertLightmapColorToRGBScale(vFloatImageBump3[0]) ); vFloatImage[1] = PackPixel_BGRA8888( ConvertLightmapColorToRGBScale(vFloatImage[1]) ); vFloatImageBump1[1] = PackPixel_BGRA8888( ConvertLightmapColorToRGBScale(vFloatImageBump1[1]) ); vFloatImageBump2[1] = PackPixel_BGRA8888( ConvertLightmapColorToRGBScale(vFloatImageBump2[1]) ); vFloatImageBump3[1] = PackPixel_BGRA8888( ConvertLightmapColorToRGBScale(vFloatImageBump3[1]) ); vFloatImage[2] = PackPixel_BGRA8888( ConvertLightmapColorToRGBScale(vFloatImage[2]) ); vFloatImageBump1[2] = PackPixel_BGRA8888( ConvertLightmapColorToRGBScale(vFloatImageBump1[2]) ); vFloatImageBump2[2] = PackPixel_BGRA8888( ConvertLightmapColorToRGBScale(vFloatImageBump2[2]) ); vFloatImageBump3[2] = PackPixel_BGRA8888( ConvertLightmapColorToRGBScale(vFloatImageBump3[2]) ); vFloatImage[3] = PackPixel_BGRA8888( ConvertLightmapColorToRGBScale(vFloatImage[3]) ); vFloatImageBump1[3] = PackPixel_BGRA8888( ConvertLightmapColorToRGBScale(vFloatImageBump1[3]) ); vFloatImageBump2[3] = PackPixel_BGRA8888( ConvertLightmapColorToRGBScale(vFloatImageBump2[3]) ); vFloatImageBump3[3] = PackPixel_BGRA8888( ConvertLightmapColorToRGBScale(vFloatImageBump3[3]) ); // Each of the registers above contains one RGBA 32-bit struct // in their w word. So, combine them such that each of the assignees // below contains four RGBAs, in xyzw order (big-endian). outBaseMap = __vrlimi(outBaseMap, vFloatImage[0], 8, 3 ); // insert into x outBump1 = __vrlimi(outBump1, vFloatImageBump1[0], 8, 3 ); // insert into x outBump2 = __vrlimi(outBump2, vFloatImageBump2[0], 8, 3 ); // insert into x outBump3 = __vrlimi(outBump3, vFloatImageBump3[0], 8, 3 ); // insert into x outBaseMap = __vrlimi(outBaseMap, vFloatImage[1], 4, 2 ); // insert into y outBump1 = __vrlimi(outBump1, vFloatImageBump1[1], 4, 2 ); // insert into y outBump2 = __vrlimi(outBump2, vFloatImageBump2[1], 4, 2 ); // insert into y outBump3 = __vrlimi(outBump3, vFloatImageBump3[1], 4, 2 ); // insert into y outBaseMap = __vrlimi(outBaseMap, vFloatImage[2], 2, 1 ); // insert into z outBump1 = __vrlimi(outBump1, vFloatImageBump1[2], 2, 1 ); // insert into z outBump2 = __vrlimi(outBump2, vFloatImageBump2[2], 2, 1 ); // insert into z outBump3 = __vrlimi(outBump3, vFloatImageBump3[2], 2, 1 ); // insert into z outBaseMap = __vrlimi(outBaseMap, vFloatImage[3], 1, 0 ); // insert into w outBump1 = __vrlimi(outBump1, vFloatImageBump1[3], 1, 0 ); // insert into w outBump2 = __vrlimi(outBump2, vFloatImageBump2[3], 1, 0 ); // insert into w outBump3 = __vrlimi(outBump3, vFloatImageBump3[3], 1, 0 ); // insert into w // push the data through the store-gather buffer. storeGather.write(m_LightmapPixelWriter, outBaseMap, outBump1, outBump2, outBump3); } // Once here, make sure we've committed any leftover changes, then process // the remainders singly. storeGather.commit(m_LightmapPixelWriter); for( ; // s is where it should be from the loop above s < (unsigned int) pLightmapSize[0]; s++, // m_LightmapPixelWriter->SkipBytes(nRewindToNextPixel), // now handled by store-gather srcTexelOffset += ( FOUR )) { static const fltx4 vSixteen = {16.0f, 16.0f, 16.0f, 16.0f}; fltx4 vColor[4]; fltx4 vFloatImage = LoadUnalignedSIMD(&pFloatImage[srcTexelOffset]); fltx4 vFloatImageBump1 = LoadUnalignedSIMD(&pFloatImageBump1[srcTexelOffset]); fltx4 vFloatImageBump2 = LoadUnalignedSIMD(&pFloatImageBump2[srcTexelOffset]); fltx4 vFloatImageBump3 = LoadUnalignedSIMD(&pFloatImageBump3[srcTexelOffset]); // perform an arcane averaging operation upon the bump map values ColorSpace::LinearToBumpedLightmap( vFloatImage, vFloatImageBump1, vFloatImageBump2, vFloatImageBump3, vColor[0], vColor[1], vColor[2], vColor[3] ); // convert each color to RGB scaled. // DO NOT! make this into a for loop. The (April07 XDK) compiler // in fact DOES NOT unroll them, and will perform very naive // scheduling if you try. // clamp to 0..16 float vColor[0] = MinSIMD(vColor[0], vSixteen); vColor[1] = MinSIMD(vColor[1], vSixteen); vColor[2] = MinSIMD(vColor[2], vSixteen); vColor[3] = MinSIMD(vColor[3], vSixteen); // compute the scaling factor, place it in w, and // scale the rest by it. Obliterates whatever was // already in alpha. // This code is why it is important to not use a for // loop: you need to let the compiler interleave the // inlined instructions. vColor[0] = ConvertLightmapColorToRGBScale( vColor[0] ); vColor[1] = ConvertLightmapColorToRGBScale( vColor[1] ); vColor[2] = ConvertLightmapColorToRGBScale( vColor[2] ); vColor[3] = ConvertLightmapColorToRGBScale( vColor[3] ); #ifdef X360_DOUBLECHECK_LIGHTMAPS unsigned short color[4][4]; ColorSpace::LinearToBumpedLightmap( &pFloatImage[srcTexelOffset], &pFloatImageBump1[srcTexelOffset], &pFloatImageBump2[srcTexelOffset], &pFloatImageBump3[srcTexelOffset], color[0], color[1], color[2], color[3] ); unsigned short alpha = ColorSpace::LinearToUnsignedShort( pFloatImage[srcTexelOffset+3], 16 ); color[0][3] = color[1][3] = color[2][3] = color[3][3] = alpha; if( IsX360() ) { for( int i = 0; i != 4; ++i ) { Vector4D vRGBScale; vRGBScale.x = color[i][0] * (16.0f / 65535.0f); vRGBScale.y = color[i][1] * (16.0f / 65535.0f); vRGBScale.z = color[i][2] * (16.0f / 65535.0f); vRGBScale = ConvertLightmapColorToRGBScale( &vRGBScale.x ); color[i][0] = RoundFloatToByte( vRGBScale.x * 255.0f ); color[i][1] = RoundFloatToByte( vRGBScale.y * 255.0f ); color[i][2] = RoundFloatToByte( vRGBScale.z * 255.0f ); color[i][3] = RoundFloatToByte( vRGBScale.w * 255.0f ); } } /* for (int ii = 0; ii < 4; ++ii) { uint32 pack = (PackPixel_BGRA8888( vColor[ii] ).u[3]); if (color[ii][3] != 0) Assert( color[ii][0] == (pack & 0xFF0000) >> 16 && color[ii][1] == (pack & 0xFF00) >> 8 && color[ii][2] == (pack & 0xFF) && color[ii][3] == (pack & 0xFF000000) >> 24 ); } */ #endif vColor[0] = PackPixel_BGRA8888( vColor[0] ); vColor[1] = PackPixel_BGRA8888( vColor[1] ); vColor[2] = PackPixel_BGRA8888( vColor[2] ); vColor[3] = PackPixel_BGRA8888( vColor[3] ); storeGather.writeJustW(m_LightmapPixelWriter, vColor[0], vColor[1], vColor[2], vColor[3] ); /* // here is the old way of writing pixels: // now we store-gather this m_LightmapPixelWriter->WritePixelNoAdvance_BGRA8888( vColor[0] ); Assert(*reinterpret_cast(m_LightmapPixelWriter->GetCurrentPixel()) == PackPixel_BGRA8888( vColor[0] ).u[3] ); void * RESTRICT pBits = m_LightmapPixelWriter->SkipBytes( nLightmap0WriterSizeBytes ); m_LightmapPixelWriter->WritePixelNoAdvance_BGRA8888( vColor[1], pBits ); Assert(*reinterpret_cast(m_LightmapPixelWriter->GetCurrentPixel()) == PackPixel_BGRA8888( vColor[1] ).u[3] ); pBits = m_LightmapPixelWriter->SkipBytes( nLightmap0WriterSizeBytes ); m_LightmapPixelWriter->WritePixelNoAdvance_BGRA8888( vColor[2], pBits ); Assert(*reinterpret_cast(m_LightmapPixelWriter->GetCurrentPixel()) == PackPixel_BGRA8888( vColor[2] ).u[3] ); pBits = m_LightmapPixelWriter->SkipBytes( nLightmap0WriterSizeBytes ); m_LightmapPixelWriter->WritePixelNoAdvance_BGRA8888( vColor[3], pBits ); Assert(*reinterpret_cast(m_LightmapPixelWriter->GetCurrentPixel()) == PackPixel_BGRA8888( vColor[3] ).u[3] ); m_LightmapPixelWriter->SkipBytes(nRewindToNextPixel); */ } storeGather.commit(m_LightmapPixelWriter); } } #endif // _X360 // write bumped lightmap update to HDR integer lightmap void CMatLightmaps::BumpedLightmapBitsToPixelWriter_HDRI( float* RESTRICT pFloatImage, float * RESTRICT pFloatImageBump1, float * RESTRICT pFloatImageBump2, float * RESTRICT pFloatImageBump3, int pLightmapSize[2], int pOffsetIntoLightmapPage[2], FloatBitMap_t *pfmOut ) RESTRICT { const int nLightmapSize0 = pLightmapSize[0]; const int nLightmap0WriterSizeBytes = nLightmapSize0 * m_LightmapPixelWriter.GetPixelSize(); const int nRewindToNextPixel = -( ( nLightmap0WriterSizeBytes * 3 ) - m_LightmapPixelWriter.GetPixelSize() ); if( m_LightmapPixelWriter.IsUsingFloatFormat() ) { AssertMsg(!IsX360(), "Tried to use a floating-point pixel format for lightmaps on 360, which is not supported."); if (!IsX360()) { for( int t = 0; t < pLightmapSize[1]; t++ ) { int srcTexelOffset = ( sizeof( Vector4D ) / sizeof( float ) ) * ( 0 + t * nLightmapSize0 ); m_LightmapPixelWriter.Seek( pOffsetIntoLightmapPage[0], pOffsetIntoLightmapPage[1] + t ); for( int s = 0; s < nLightmapSize0; s++, m_LightmapPixelWriter.SkipBytes(nRewindToNextPixel),srcTexelOffset += (sizeof(Vector4D)/sizeof(float))) { unsigned short color[4][4]; ColorSpace::LinearToBumpedLightmap( &pFloatImage[srcTexelOffset], &pFloatImageBump1[srcTexelOffset], &pFloatImageBump2[srcTexelOffset], &pFloatImageBump3[srcTexelOffset], color[0], color[1], color[2], color[3] ); float alpha = pFloatImage[srcTexelOffset+3]; Assert( alpha >= 0.0f && alpha <= 1.0f ); color[0][3] = color[1][3] = color[2][3] = color[3][3] = alpha; float toFloat = ( 1.0f / ( float )( 1 << 16 ) ); /* // This code is now a can't-happen, because we do not allow float formats on 360. #if ( defined( USE_32BIT_LIGHTMAPS_ON_360 ) ) if( IsX360() ) { for( int i = 0; i != 4; ++i ) { Vector4D vRGBScale; vRGBScale.x = color[i][0] * (16.0f / 65535.0f); vRGBScale.y = color[i][1] * (16.0f / 65535.0f); vRGBScale.z = color[i][2] * (16.0f / 65535.0f); vRGBScale = ConvertLightmapColorToRGBScale( &vRGBScale.x ); color[i][0] = RoundFloatToByte( vRGBScale.x * 255.0f ); color[i][1] = RoundFloatToByte( vRGBScale.y * 255.0f ); color[i][2] = RoundFloatToByte( vRGBScale.z * 255.0f ); color[i][3] = RoundFloatToByte( vRGBScale.w * 255.0f ); } toFloat = ( 1.0f / ( float )( 1 << 8 ) ); } #endif */ m_LightmapPixelWriter.WritePixelNoAdvanceF( toFloat * color[0][0], toFloat * color[0][1], toFloat * color[0][2], toFloat * color[0][3] ); m_LightmapPixelWriter.SkipBytes( nLightmap0WriterSizeBytes ); m_LightmapPixelWriter.WritePixelNoAdvanceF( toFloat * color[1][0], toFloat * color[1][1], toFloat * color[1][2], toFloat * color[1][3] ); m_LightmapPixelWriter.SkipBytes( nLightmap0WriterSizeBytes ); m_LightmapPixelWriter.WritePixelNoAdvanceF( toFloat * color[2][0], toFloat * color[2][1], toFloat * color[2][2], toFloat * color[2][3] ); m_LightmapPixelWriter.SkipBytes( nLightmap0WriterSizeBytes ); m_LightmapPixelWriter.WritePixelNoAdvanceF( toFloat * color[3][0], toFloat * color[3][1], toFloat * color[3][2], toFloat * color[3][3] ); } } } } else { #ifndef X360_USE_SIMD_LIGHTMAP for( int t = 0; t < pLightmapSize[1]; t++ ) { int srcTexelOffset = ( sizeof( Vector4D ) / sizeof( float ) ) * ( 0 + t * nLightmapSize0 ); m_LightmapPixelWriter.Seek( pOffsetIntoLightmapPage[0], pOffsetIntoLightmapPage[1] + t ); for( int s = 0; s < nLightmapSize0; s++, m_LightmapPixelWriter.SkipBytes(nRewindToNextPixel),srcTexelOffset += (sizeof(Vector4D)/sizeof(float))) { unsigned short color[4][4]; ColorSpace::LinearToBumpedLightmap( &pFloatImage[srcTexelOffset], &pFloatImageBump1[srcTexelOffset], &pFloatImageBump2[srcTexelOffset], &pFloatImageBump3[srcTexelOffset], color[0], color[1], color[2], color[3] ); unsigned short alpha = ColorSpace::LinearToUnsignedShort( pFloatImage[srcTexelOffset+3], 16 ); color[0][3] = color[1][3] = color[2][3] = color[3][3] = alpha; #if ( defined( USE_32BIT_LIGHTMAPS_ON_360 ) ) if( IsX360() ) { for( int i = 0; i != 4; ++i ) { Vector4D vRGBScale; vRGBScale.x = color[i][0] * (16.0f / 65535.0f); vRGBScale.y = color[i][1] * (16.0f / 65535.0f); vRGBScale.z = color[i][2] * (16.0f / 65535.0f); vRGBScale = ConvertLightmapColorToRGBScale( &vRGBScale.x ); color[i][0] = RoundFloatToByte( vRGBScale.x * 255.0f ); color[i][1] = RoundFloatToByte( vRGBScale.y * 255.0f ); color[i][2] = RoundFloatToByte( vRGBScale.z * 255.0f ); color[i][3] = RoundFloatToByte( vRGBScale.w * 255.0f ); } } #endif m_LightmapPixelWriter.WritePixelNoAdvance( color[0][0], color[0][1], color[0][2], color[0][3] ); m_LightmapPixelWriter.SkipBytes( nLightmap0WriterSizeBytes ); m_LightmapPixelWriter.WritePixelNoAdvance( color[1][0], color[1][1], color[1][2], color[1][3] ); m_LightmapPixelWriter.SkipBytes( nLightmap0WriterSizeBytes ); m_LightmapPixelWriter.WritePixelNoAdvance( color[2][0], color[2][1], color[2][2], color[2][3] ); m_LightmapPixelWriter.SkipBytes( nLightmap0WriterSizeBytes ); m_LightmapPixelWriter.WritePixelNoAdvance( color[3][0], color[3][1], color[3][2], color[3][3] ); // Write data to the bitmapped represenations so that PFM files can be written if ( pfmOut ) { PixRGBAF pixelData; pixelData.Red = color[0][0]; pixelData.Green = color[0][1]; pixelData.Blue = color[0][2]; pixelData.Alpha = alpha; pfmOut->WritePixelRGBAF(pOffsetIntoLightmapPage[0] + s, pOffsetIntoLightmapPage[1] + t, pixelData); } } } #else // this is an optimized XBOX implementation. For a clearer // presentation of the algorithm, see the PC implementation // above. // First check for the most common case, using an efficient // branch rather than a switch: if (m_LightmapPixelWriter.GetFormat() == IMAGE_FORMAT_LINEAR_BGRA8888) { // broken out into a static to make things more readable // and be nicer to the instruction cache BumpedLightmapBitsToPixelWriter_HDRI_BGRA_X360( pFloatImage, pFloatImageBump1, pFloatImageBump2, pFloatImageBump3, pLightmapSize, pOffsetIntoLightmapPage, pfmOut, &m_LightmapPixelWriter ); } else { // This case should actually never be hit -- we do not use RGBA. for( int t = 0; t < pLightmapSize[1]; t++ ) { // assert that 1 * 4 = 4 COMPILE_TIME_ASSERT(sizeof( Vector4D ) == sizeof(float) * 4); #define FOUR (sizeof( Vector4D ) / sizeof( float )) // in case this ever changes int srcTexelOffset = ( FOUR ) * ( 0 + t * nLightmapSize0 ); m_LightmapPixelWriter.Seek( pOffsetIntoLightmapPage[0], pOffsetIntoLightmapPage[1] + t ); for( int s = 0; s < nLightmapSize0; s++, m_LightmapPixelWriter.SkipBytes(nRewindToNextPixel),srcTexelOffset += ( FOUR )) { static const fltx4 vSixteen = {16.0f, 16.0f, 16.0f, 16.0f}; fltx4 vColor[4]; fltx4 vFloatImage = LoadUnalignedSIMD(&pFloatImage[srcTexelOffset]); fltx4 vFloatImageBump1 = LoadUnalignedSIMD(&pFloatImageBump1[srcTexelOffset]); fltx4 vFloatImageBump2 = LoadUnalignedSIMD(&pFloatImageBump2[srcTexelOffset]); fltx4 vFloatImageBump3 = LoadUnalignedSIMD(&pFloatImageBump3[srcTexelOffset]); // perform an arcane averaging operation upon the bump map values ColorSpace::LinearToBumpedLightmap( vFloatImage, vFloatImageBump1, vFloatImageBump2, vFloatImageBump3, vColor[0], vColor[1], vColor[2], vColor[3] ); // convert each color to RGB scaled. // DO NOT! make this into a for loop. The (April07 XDK) compiler // in fact DOES NOT unroll them, and will perform very naive // scheduling if you try. // clamp to 0..16 float vColor[0] = MinSIMD(vColor[0], vSixteen); vColor[1] = MinSIMD(vColor[1], vSixteen); vColor[2] = MinSIMD(vColor[2], vSixteen); vColor[3] = MinSIMD(vColor[3], vSixteen); // compute the scaling factor, transform the RGB, // and place the scale in w. Obliterates whatever was // already in alpha. // This code is why it is important to not use a for // loop: you need to let the compiler interleave the // inlined instructions. vColor[0] = ConvertLightmapColorToRGBScale( vColor[0] ); vColor[1] = ConvertLightmapColorToRGBScale( vColor[1] ); vColor[2] = ConvertLightmapColorToRGBScale( vColor[2] ); vColor[3] = ConvertLightmapColorToRGBScale( vColor[3] ); m_LightmapPixelWriter.WritePixelNoAdvance( vColor[0] ); m_LightmapPixelWriter.SkipBytes( nLightmap0WriterSizeBytes ); m_LightmapPixelWriter.WritePixelNoAdvance( vColor[1] ); m_LightmapPixelWriter.SkipBytes( nLightmap0WriterSizeBytes ); m_LightmapPixelWriter.WritePixelNoAdvance( vColor[2] ); m_LightmapPixelWriter.SkipBytes( nLightmap0WriterSizeBytes ); m_LightmapPixelWriter.WritePixelNoAdvance( vColor[3] ); AssertMsg(!pfmOut, "Runtime conversion of lightmaps to files is no longer supported on 360.\n"); // Write data to the bitmapped represenations so that PFM files can be written if ( pfmOut ) { Warning("**************************************************\n" "Lightmap output to files on 360 HAS BEEN DISABLED.\n" "A grave error has just occurred.\n" "**************************************************\n"); DebuggerBreakIfDebugging(); /* PixRGBAF pixelData; pixelData.Red = color[0][0]; pixelData.Green = color[0][1]; pixelData.Blue = color[0][2]; pixelData.Alpha = alpha; pfmOut->WritePixelRGBAF(pOffsetIntoLightmapPage[0] + s, pOffsetIntoLightmapPage[1] + t, pixelData); */ } } } } #endif } } void CMatLightmaps::LightmapBitsToPixelWriter_LDR( float* pFloatImage, int pLightmapSize[2], int pOffsetIntoLightmapPage[2], FloatBitMap_t *pfmOut ) { // non-HDR lightmap processing float *pSrc = pFloatImage; for( int t = 0; t < pLightmapSize[1]; ++t ) { m_LightmapPixelWriter.Seek( pOffsetIntoLightmapPage[0], pOffsetIntoLightmapPage[1] + t ); for( int s = 0; s < pLightmapSize[0]; ++s, pSrc += (sizeof(Vector4D)/sizeof(*pSrc)) ) { unsigned char color[4]; ColorSpace::LinearToLightmap( color, pSrc ); color[3] = RoundFloatToByte( pSrc[3] * 255.0f ); m_LightmapPixelWriter.WritePixel( color[0], color[1], color[2], color[3] ); if ( pfmOut ) { // Write data to the bitmapped represenations so that PFM files can be written PixRGBAF pixelData; pixelData.Red = color[0]; pixelData.Green = color[1]; pixelData.Blue = color[2]; pixelData.Alpha = color[3]; pfmOut->WritePixelRGBAF( pOffsetIntoLightmapPage[0] + s, pOffsetIntoLightmapPage[1] + t, pixelData ); } } } } void CMatLightmaps::LightmapBitsToPixelWriter_HDRF( float* pFloatImage, int pLightmapSize[2], int pOffsetIntoLightmapPage[2], FloatBitMap_t *pfmOut ) { if ( IsX360() ) { // 360 does not support HDR float Assert( 0 ); return; } // float HDR lightmap processing float *pSrc = pFloatImage; for ( int t = 0; t < pLightmapSize[1]; ++t ) { m_LightmapPixelWriter.Seek( pOffsetIntoLightmapPage[0], pOffsetIntoLightmapPage[1] + t ); for ( int s = 0; s < pLightmapSize[0]; ++s, pSrc += (sizeof(Vector4D)/sizeof(*pSrc)) ) { m_LightmapPixelWriter.WritePixelF( pSrc[0], pSrc[1], pSrc[2], pSrc[3] ); } } } // numbers come in on the domain [0..16] void CMatLightmaps::LightmapBitsToPixelWriter_HDRI( float* RESTRICT pFloatImage, int pLightmapSize[2], int pOffsetIntoLightmapPage[2], FloatBitMap_t * RESTRICT pfmOut ) { #ifndef X360_USE_SIMD_LIGHTMAP // PC code (and old, pre-SIMD xbox version -- unshippably slow) if ( m_LightmapPixelWriter.IsUsingFloatFormat() ) { // integer HDR lightmap processing float *pSrc = pFloatImage; for ( int t = 0; t < pLightmapSize[1]; ++t ) { m_LightmapPixelWriter.Seek( pOffsetIntoLightmapPage[0], pOffsetIntoLightmapPage[1] + t ); for ( int s = 0; s < pLightmapSize[0]; ++s, pSrc += (sizeof(Vector4D)/sizeof(*pSrc)) ) { int r, g, b, a; r = ColorSpace::LinearFloatToCorrectedShort( pSrc[0] ); g = ColorSpace::LinearFloatToCorrectedShort( pSrc[1] ); b = ColorSpace::LinearFloatToCorrectedShort( pSrc[2] ); a = ColorSpace::LinearToUnsignedShort( pSrc[3], 16 ); float toFloat = ( 1.0f / ( float )( 1 << 16 ) ); #if ( defined( USE_32BIT_LIGHTMAPS_ON_360 ) ) if( IsX360() ) { Vector4D vRGBScale; vRGBScale.x = r * (16.0f / 65535.0f); vRGBScale.y = g * (16.0f / 65535.0f); vRGBScale.z = b * (16.0f / 65535.0f); vRGBScale = ConvertLightmapColorToRGBScale( &vRGBScale.x ); r = RoundFloatToByte( vRGBScale.x * 255.0f ); g = RoundFloatToByte( vRGBScale.y * 255.0f ); b = RoundFloatToByte( vRGBScale.z * 255.0f ); a = RoundFloatToByte( vRGBScale.w * 255.0f ); toFloat = ( 1.0f / ( float )( 1 << 8 ) ); } #endif Assert( pSrc[3] >= 0.0f && pSrc[3] <= 1.0f ); m_LightmapPixelWriter.WritePixelF( r * toFloat, g * toFloat, b * toFloat, pSrc[3] ); } } } else { // integer HDR lightmap processing float *pSrc = pFloatImage; for ( int t = 0; t < pLightmapSize[1]; ++t ) { m_LightmapPixelWriter.Seek( pOffsetIntoLightmapPage[0], pOffsetIntoLightmapPage[1] + t ); for ( int s = 0; s < pLightmapSize[0]; ++s, pSrc += (sizeof(Vector4D)/sizeof(*pSrc)) ) { int r, g, b, a; r = ColorSpace::LinearFloatToCorrectedShort( pSrc[0] ); g = ColorSpace::LinearFloatToCorrectedShort( pSrc[1] ); b = ColorSpace::LinearFloatToCorrectedShort( pSrc[2] ); a = ColorSpace::LinearToUnsignedShort( pSrc[3], 16 ); #if ( defined( USE_32BIT_LIGHTMAPS_ON_360 ) ) if( IsX360() ) { Vector4D vRGBScale; vRGBScale.x = r * (16.0f / 65535.0f); vRGBScale.y = g * (16.0f / 65535.0f); vRGBScale.z = b * (16.0f / 65535.0f); vRGBScale = ConvertLightmapColorToRGBScale( &vRGBScale.x ); r = RoundFloatToByte( vRGBScale.x * 255.0f ); g = RoundFloatToByte( vRGBScale.y * 255.0f ); b = RoundFloatToByte( vRGBScale.z * 255.0f ); a = RoundFloatToByte( vRGBScale.w * 255.0f ); } #endif m_LightmapPixelWriter.WritePixel( r, g, b, a ); if ( pfmOut ) { // Write data to the bitmapped represenations so that PFM files can be written PixRGBAF pixelData; pixelData.Red = pSrc[0]; pixelData.Green = pSrc[1]; pixelData.Blue = pSrc[2]; pixelData.Alpha = pSrc[3]; pfmOut->WritePixelRGBAF( pOffsetIntoLightmapPage[0] + s, pOffsetIntoLightmapPage[1] + t, pixelData ); } } } } #else // XBOX360 code if ( m_LightmapPixelWriter.IsUsingFloatFormat() ) { if( IsX360() ) { AssertMsg( false, "Float-format pixel writers do not exist on x360." ); } else { // This code is here as an example only, in case floating point // format is restored to 360. // integer HDR lightmap processing float * RESTRICT pSrc = pFloatImage; for ( int t = 0; t < pLightmapSize[1]; ++t ) { m_LightmapPixelWriter.Seek( pOffsetIntoLightmapPage[0], pOffsetIntoLightmapPage[1] + t ); for ( int s = 0; s < pLightmapSize[0]; ++s, pSrc += (sizeof(Vector4D)/sizeof(*pSrc)) ) { int r, g, b, a; r = ColorSpace::LinearFloatToCorrectedShort( pSrc[0] ); g = ColorSpace::LinearFloatToCorrectedShort( pSrc[1] ); b = ColorSpace::LinearFloatToCorrectedShort( pSrc[2] ); a = ColorSpace::LinearToUnsignedShort( pSrc[3], 16 ); float toFloat = ( 1.0f / ( float )( 1 << 16 ) ); #if ( defined( USE_32BIT_LIGHTMAPS_ON_360 ) ) if( IsX360() ) { Vector4D vRGBScale; vRGBScale.x = r * (16.0f / 65535.0f); vRGBScale.y = g * (16.0f / 65535.0f); vRGBScale.z = b * (16.0f / 65535.0f); vRGBScale = ConvertLightmapColorToRGBScale( &vRGBScale.x ); r = RoundFloatToByte( vRGBScale.x * 255.0f ); g = RoundFloatToByte( vRGBScale.y * 255.0f ); b = RoundFloatToByte( vRGBScale.z * 255.0f ); a = RoundFloatToByte( vRGBScale.w * 255.0f ); toFloat = ( 1.0f / ( float )( 1 << 8 ) ); } #endif Assert( pSrc[3] >= 0.0f && pSrc[3] <= 1.0f ); m_LightmapPixelWriter.WritePixelF( r * toFloat, g * toFloat, b * toFloat, pSrc[3] ); } } } } else { // This is the fast X360 pathway. // integer HDR lightmap processing float * RESTRICT pSrc = pFloatImage; // Assert((reinterpret_cast(pSrc) & 15) == 0); // 16-byte aligned? COMPILE_TIME_ASSERT(sizeof(Vector4D)/sizeof(*pSrc) == 4); // assert that 1 * 4 = 4 #ifndef USE_32BIT_LIGHTMAPS_ON_360 #pragma error("This function only supports 32 bit lightmaps.") #endif // input numbers from pSrc are on the domain [0..+inf] // we clamp them to the range [0..16] // output is RGBA // the shader does this: rOut = Rin * Ain * 16.0f // where Rin is [0..1], a float computed from a byte value [0..255] // Ain is therefore the brightest channel (say R) divided by 16 and quantized // Rin is computed from pSrc->r by dividing by Ain // rather than switching inside WritePixel for each different format, // thus causing a 23-cycle pipeline clear for every pixel, we'll // branch on the format here. That will allow us to unroll the inline // pixel write functions differently depending on their different // latencies. Assert(!pfmOut); // should never happen on 360. #ifndef ALLOW_PFM_OUTPUT_ON_360 if ( pfmOut ) { Warning("*****************************************\n" "Lightmap output on 360 HAS BEEN DISABLED.\n" "A grave error has just occurred.\n" "*****************************************\n"); } #endif // switch once, here, outside the loop, rather than // switching inside each pixel. Switches are not fast // on x360: they are usually implemented as jumps // through function tables, which have a 24-cycle // stall. switch (m_LightmapPixelWriter.GetFormat()) { // note: format names are low-order-byte first. case IMAGE_FORMAT_RGBA8888: case IMAGE_FORMAT_LINEAR_RGBA8888: { for ( int t = 0; t < pLightmapSize[1]; ++t ) { m_LightmapPixelWriter.Seek( pOffsetIntoLightmapPage[0], pOffsetIntoLightmapPage[1] + t ); for ( int s = 0; s < pLightmapSize[0]; ++s, pSrc += 4 ) { static const fltx4 vSixteen = {16.0f, 16.0f, 16.0f, 16.0f}; fltx4 rgba = LoadUnalignedSIMD(pSrc); // clamp to 0..16 float rgba = MinSIMD(rgba, vSixteen); // compute the scaling factor, place it in w, and // scale the rest by it. rgba = ConvertLightmapColorToRGBScale( rgba ); // rgba is now float 0..255 in each component m_LightmapPixelWriter.WritePixelNoAdvance_RGBA8888(rgba); /* // not supported on X360 if ( pfmOut ) { // Write data to the bitmapped represenations so that PFM files can be written PixRGBAF pixelData; XMStoreVector4(&pixelData,rgba); pfmOut->WritePixelRGBAF( pOffsetIntoLightmapPage[0] + s, pOffsetIntoLightmapPage[1] + t, pixelData ); } */ } } break; } case IMAGE_FORMAT_BGRA8888: // NOTE! : the low order bits are first in this naming convention. case IMAGE_FORMAT_LINEAR_BGRA8888: { for ( int t = 0; t < pLightmapSize[1]; ++t ) { m_LightmapPixelWriter.Seek( pOffsetIntoLightmapPage[0], pOffsetIntoLightmapPage[1] + t ); for ( int s = 0; s < pLightmapSize[0]; ++s, pSrc += 4 ) { static const fltx4 vSixteen = {16.0f, 16.0f, 16.0f, 16.0f}; fltx4 rgba = LoadUnalignedSIMD(pSrc); // clamp to 0..16 float rgba = MinSIMD(rgba, vSixteen); // compute the scaling factor, place it in w, and // scale the rest by it. rgba = ConvertLightmapColorToRGBScale( rgba ); // rgba is now float 0..255 in each component m_LightmapPixelWriter.WritePixelNoAdvance_BGRA8888(rgba); // forcibly advance m_LightmapPixelWriter.SkipBytes(4); /* // not supported on X360 if ( pfmOut ) { // Write data to the bitmapped represenations so that PFM files can be written PixRGBAF pixelData; XMStoreVector4(&pixelData,rgba); pfmOut->WritePixelRGBAF( pOffsetIntoLightmapPage[0] + s, pOffsetIntoLightmapPage[1] + t, pixelData ); } */ } } break; } default: AssertMsg1(false,"Unsupported pixel format %d while writing lightmaps!", m_LightmapPixelWriter.GetFormat() ); Warning("Unsupported pixel format used in lightmap. Lightmaps could not be downloaded.\n"); break; } } #endif } void CMatLightmaps::BeginUpdateLightmaps( void ) { CMatCallQueue *pCallQueue = GetMaterialSystem()->GetRenderContextInternal()->GetCallQueueInternal(); if ( pCallQueue ) { pCallQueue->QueueCall( this, &CMatLightmaps::BeginUpdateLightmaps ); return; } m_nUpdatingLightmapsStackDepth++; } void CMatLightmaps::EndUpdateLightmaps( void ) { CMatCallQueue *pCallQueue = GetMaterialSystem()->GetRenderContextInternal()->GetCallQueueInternal(); if ( pCallQueue ) { pCallQueue->QueueCall( this, &CMatLightmaps::EndUpdateLightmaps ); return; } m_nUpdatingLightmapsStackDepth--; Assert( m_nUpdatingLightmapsStackDepth >= 0 ); if( m_nUpdatingLightmapsStackDepth <= 0 && m_nLockedLightmap != -1 ) { g_pShaderAPI->TexUnlock(); m_nLockedLightmap = -1; } } int CMatLightmaps::AllocateDynamicLightmap( int lightmapSize[2], int *pOutOffsetIntoPage, int frameID ) { // check frameID, fail if current for ( int i = 0; i < COUNT_DYNAMIC_LIGHTMAP_PAGES; i++ ) { int dynamicIndex = (m_dynamic.currentDynamicIndex + i) % COUNT_DYNAMIC_LIGHTMAP_PAGES; int lightmapPageIndex = m_firstDynamicLightmap + dynamicIndex; if ( m_dynamic.lightmapLockFrame[dynamicIndex] != frameID ) { m_dynamic.lightmapLockFrame[dynamicIndex] = frameID; m_dynamic.imagePackers[dynamicIndex].Reset( 0, m_pLightmapPages[lightmapPageIndex].m_Width, m_pLightmapPages[lightmapPageIndex].m_Height ); } if ( m_dynamic.imagePackers[dynamicIndex].AddBlock( lightmapSize[0], lightmapSize[1], &pOutOffsetIntoPage[0], &pOutOffsetIntoPage[1] ) ) { return lightmapPageIndex; } } return -1; } //----------------------------------------------------------------------------- // Updates the lightmap //----------------------------------------------------------------------------- void CMatLightmaps::UpdateLightmap( int lightmapPageID, int lightmapSize[2], int offsetIntoLightmapPage[2], float *pFloatImage, float *pFloatImageBump1, float *pFloatImageBump2, float *pFloatImageBump3 ) { VPROF( "CMatRenderContext::UpdateLightmap" ); bool hasBump = false; int uSize = 1; FloatBitMap_t *pfmOut = NULL; if ( pFloatImageBump1 && pFloatImageBump2 && pFloatImageBump3 ) { hasBump = true; uSize = 4; } if ( lightmapPageID >= GetNumLightmapPages() || lightmapPageID < 0 ) { Error( "MaterialSystem_Interface_t::UpdateLightmap lightmapPageID=%d out of range\n", lightmapPageID ); return; } bool bDynamic = IsDynamicLightmap(lightmapPageID); if ( bDynamic ) { int dynamicIndex = lightmapPageID-m_firstDynamicLightmap; Assert(dynamicIndex < COUNT_DYNAMIC_LIGHTMAP_PAGES); m_dynamic.currentDynamicIndex = (dynamicIndex + 1) % COUNT_DYNAMIC_LIGHTMAP_PAGES; } if ( mat_lightmap_pfms.GetBool()) { // Allocate and initialize lightmap data that will be written to a PFM file if (NULL == m_pLightmapDataPtrArray[lightmapPageID]) { m_pLightmapDataPtrArray[lightmapPageID] = new FloatBitMap_t(m_pLightmapPages[lightmapPageID].m_Width, m_pLightmapPages[lightmapPageID].m_Height); m_pLightmapDataPtrArray[lightmapPageID]->Clear(0, 0, 0, 1); } pfmOut = m_pLightmapDataPtrArray[lightmapPageID]; } // NOTE: Change how the lock is taking place if you ever change how bumped // lightmaps are put into the page. Right now, we assume that they're all // added to the right of the original lightmap. bool bLockSubRect; { VPROF_( "Locking lightmaps", 2, VPROF_BUDGETGROUP_DLIGHT_RENDERING, false, 0 ); // vprof scope bLockSubRect = m_nUpdatingLightmapsStackDepth <= 0 && !bDynamic; if( bLockSubRect ) { VPROF_INCREMENT_COUNTER( "lightmap subrect texlock", 1 ); g_pShaderAPI->ModifyTexture( m_LightmapPageTextureHandles[lightmapPageID] ); if (!g_pShaderAPI->TexLock( 0, 0, offsetIntoLightmapPage[0], offsetIntoLightmapPage[1], lightmapSize[0] * uSize, lightmapSize[1], m_LightmapPixelWriter )) { return; } } else if( lightmapPageID != m_nLockedLightmap ) { if ( !LockLightmap( lightmapPageID ) ) { ExecuteNTimes( 10, Warning( "Failed to lock lightmap\n" ) ); return; } } } int subRectOffset[2] = {0,0}; { // account for the part spent in math: VPROF_( "LightmapBitsToPixelWriter", 2, VPROF_BUDGETGROUP_DLIGHT_RENDERING, false, 0 ); if ( hasBump ) { switch( HardwareConfig()->GetHDRType() ) { case HDR_TYPE_NONE: BumpedLightmapBitsToPixelWriter_LDR( pFloatImage, pFloatImageBump1, pFloatImageBump2, pFloatImageBump3, lightmapSize, bLockSubRect ? subRectOffset : offsetIntoLightmapPage, pfmOut ); break; case HDR_TYPE_INTEGER: BumpedLightmapBitsToPixelWriter_HDRI( pFloatImage, pFloatImageBump1, pFloatImageBump2, pFloatImageBump3, lightmapSize, bLockSubRect ? subRectOffset : offsetIntoLightmapPage, pfmOut ); break; case HDR_TYPE_FLOAT: BumpedLightmapBitsToPixelWriter_HDRF( pFloatImage, pFloatImageBump1, pFloatImageBump2, pFloatImageBump3, lightmapSize, bLockSubRect ? subRectOffset : offsetIntoLightmapPage, pfmOut ); break; } } else { switch ( HardwareConfig()->GetHDRType() ) { case HDR_TYPE_NONE: LightmapBitsToPixelWriter_LDR( pFloatImage, lightmapSize, bLockSubRect ? subRectOffset : offsetIntoLightmapPage, pfmOut ); break; case HDR_TYPE_INTEGER: LightmapBitsToPixelWriter_HDRI( pFloatImage, lightmapSize, bLockSubRect ? subRectOffset : offsetIntoLightmapPage, pfmOut ); break; case HDR_TYPE_FLOAT: LightmapBitsToPixelWriter_HDRF( pFloatImage, lightmapSize, bLockSubRect ? subRectOffset : offsetIntoLightmapPage, pfmOut ); break; default: Assert( 0 ); break; } } } if( bLockSubRect ) { VPROF_( "Unlocking Lightmaps", 2, VPROF_BUDGETGROUP_DLIGHT_RENDERING, false, 0 ); g_pShaderAPI->TexUnlock(); } } //----------------------------------------------------------------------------- // //----------------------------------------------------------------------------- int CMatLightmaps::GetNumSortIDs( void ) { return m_numSortIDs; } //----------------------------------------------------------------------------- // //----------------------------------------------------------------------------- void CMatLightmaps::ComputeSortInfo( MaterialSystem_SortInfo_t* pInfo, int& sortId, bool alpha ) { int lightmapPageID; for ( MaterialHandle_t i = GetMaterialDict()->FirstMaterial(); i != GetMaterialDict()->InvalidMaterial(); i = GetMaterialDict()->NextMaterial(i) ) { IMaterialInternal* pMaterial = GetMaterialInternal(i); if ( pMaterial->GetMinLightmapPageID() > pMaterial->GetMaxLightmapPageID() ) { continue; } // const IMaterialVar *pTransVar = pMaterial->GetMaterialProperty( MATERIAL_PROPERTY_OPACITY ); // if( ( !alpha && ( pTransVar->GetIntValue() == MATERIAL_TRANSLUCENT ) ) || // ( alpha && !( pTransVar->GetIntValue() == MATERIAL_TRANSLUCENT ) ) ) // { // return true; // } // Warning( "sort stuff: %s %s\n", material->GetName(), bAlpha ? "alpha" : "not alpha" ); // fill in the lightmapped materials for ( lightmapPageID = pMaterial->GetMinLightmapPageID(); lightmapPageID <= pMaterial->GetMaxLightmapPageID(); ++lightmapPageID ) { pInfo[sortId].material = pMaterial->GetQueueFriendlyVersion(); pInfo[sortId].lightmapPageID = lightmapPageID; #if 0 char buf[128]; Q_snprintf( buf, sizeof( buf ), "ComputeSortInfo: %s lightmapPageID: %d sortID: %d\n", pMaterial->GetName(), lightmapPageID, sortId ); OutputDebugString( buf ); #endif ++sortId; } } } //----------------------------------------------------------------------------- // //----------------------------------------------------------------------------- void CMatLightmaps::ComputeWhiteLightmappedSortInfo( MaterialSystem_SortInfo_t* pInfo, int& sortId, bool alpha ) { for (MaterialHandle_t i = GetMaterialDict()->FirstMaterial(); i != GetMaterialDict()->InvalidMaterial(); i = GetMaterialDict()->NextMaterial(i) ) { IMaterialInternal* pMaterial = GetMaterialInternal(i); // fill in the lightmapped materials that are actually used by this level if( pMaterial->GetNeedsWhiteLightmap() && ( pMaterial->GetReferenceCount() > 0 ) ) { // const IMaterialVar *pTransVar = pMaterial->GetMaterialProperty( MATERIAL_PROPERTY_OPACITY ); // if( ( !alpha && ( pTransVar->GetIntValue() == MATERIAL_TRANSLUCENT ) ) || // ( alpha && !( pTransVar->GetIntValue() == MATERIAL_TRANSLUCENT ) ) ) // { // return true; // } pInfo[sortId].material = pMaterial->GetQueueFriendlyVersion(); if( pMaterial->GetPropertyFlag( MATERIAL_PROPERTY_NEEDS_BUMPED_LIGHTMAPS ) ) { pInfo[sortId].lightmapPageID = MATERIAL_SYSTEM_LIGHTMAP_PAGE_WHITE_BUMP; } else { pInfo[sortId].lightmapPageID = MATERIAL_SYSTEM_LIGHTMAP_PAGE_WHITE; } sortId++; } } } //----------------------------------------------------------------------------- // //----------------------------------------------------------------------------- void CMatLightmaps::GetSortInfo( MaterialSystem_SortInfo_t *pSortInfoArray ) { // sort non-alpha blended materials first int sortId = 0; ComputeSortInfo( pSortInfoArray, sortId, false ); ComputeWhiteLightmappedSortInfo( pSortInfoArray, sortId, false ); Assert( m_numSortIDs == sortId ); } //----------------------------------------------------------------------------- // //----------------------------------------------------------------------------- void CMatLightmaps::EnableLightmapFiltering( bool enabled ) { int i; for( i = 0; i < GetNumLightmapPages(); i++ ) { g_pShaderAPI->ModifyTexture( m_LightmapPageTextureHandles[i] ); if( enabled ) { g_pShaderAPI->TexMinFilter( SHADER_TEXFILTERMODE_LINEAR ); g_pShaderAPI->TexMagFilter( SHADER_TEXFILTERMODE_LINEAR ); } else { g_pShaderAPI->TexMinFilter( SHADER_TEXFILTERMODE_NEAREST ); g_pShaderAPI->TexMagFilter( SHADER_TEXFILTERMODE_NEAREST ); } } }