using Unity.Collections; using Unity.Collections.LowLevel.Unsafe; using System.Runtime.CompilerServices; using Unity.Mathematics; using static Unity.Mathematics.math; namespace UnityEngine.Rendering.Universal.Internal { // This structure is designed to be Burst friendly. // It can be copied by value. internal struct DeferredTiler { // Precomputed light data internal struct PrePunctualLight { // view-space position. public float3 posVS; // Radius in world unit. public float radius; // Distance between closest bound of the light and the camera. Used for sorting lights front-to-back. public float minDist; // Projected position of the sphere centre on the screen (near plane). public float2 screenPos; // Index into renderingData.lightData.visibleLights native array. public ushort visLightIndex; } enum ClipResult { Unknown, In, Out, } int m_TilePixelWidth; int m_TilePixelHeight; int m_TileXCount; int m_TileYCount; // Fixed header size in uint in m_TileHeader. // Only finest tiler requires to store extra per-tile information (light list depth range, bitmask for 2.5D culling). int m_TileHeaderSize; // Indicative average lights per tile. Only used when initializing the size of m_DataTile for the first time. int m_AvgLightPerTile; // 0, 1 or 2 (see DeferredConfig.kTilerDepth) int m_TilerLevel; // Camera frustum planes, adjusted to account for tile size. FrustumPlanes m_FrustumPlanes; // Are we dealing with an orthographic projection. bool m_IsOrthographic; // Atomic counters are put in a NativeArray so they can be accessed/shared from jobs. // [0] maxLightPerTile: Only valid for finest tiler: max light counter per tile. Reset every frame. // [1] tileDataSize: reset every frame. // [2] tileDataCapacity: extra amount of memory required by each tiler (depends on number of lights visible). Externally maintained. [Unity.Collections.LowLevel.Unsafe.NativeDisableContainerSafetyRestriction] NativeArray m_Counters; // Store all visible light indices for all tiles. // (currently) Contains sequential blocks of ushort values (light indices and optionally lightDepthRange), for each tile // For example for platforms using 16x16px tiles: // in a finest tiler DeferredLights.m_Tilers[0] ( 16x16px tiles), each tile will use a block of 1 * 1 * 32 = 32 ushort values // in an intermediate tiler DeferredLights.m_Tilers[1] ( 64x64px tiles), each tile will use a block of 4 * 4 * 32 = 512 ushort values // in a coarsest tiler DeferredLights.m_Tilers[2] (256x256px tiles), each tile will use a block of 16 * 16 * 32 = 8192 ushort values [Unity.Collections.LowLevel.Unsafe.NativeDisableContainerSafetyRestriction] NativeArray m_TileData; // Store tile header (fixed size per tile) // light offset, light count, optionally additional per-tile "header" values. [Unity.Collections.LowLevel.Unsafe.NativeDisableContainerSafetyRestriction] NativeArray m_TileHeaders; // Precompute tile data. [Unity.Collections.LowLevel.Unsafe.NativeDisableContainerSafetyRestriction] NativeArray m_PreTiles; public DeferredTiler(int tilePixelWidth, int tilePixelHeight, int avgLightPerTile, int tilerLevel) { m_TilePixelWidth = tilePixelWidth; m_TilePixelHeight = tilePixelHeight; m_TileXCount = 0; m_TileYCount = 0; // Finest tiler (at index 0) computes extra tile data stored into the header, so it requires more space. See CullFinalLights() vs CullIntermediateLights(). // Finest tiler: lightListOffset, lightCount, listDepthRange, listBitMask // Coarse tilers: lightListOffset, lightCount m_TileHeaderSize = tilerLevel == 0 ? 4 : 2; m_AvgLightPerTile = avgLightPerTile; m_TilerLevel = tilerLevel; m_FrustumPlanes = new FrustumPlanes { left = 0, right = 0, bottom = 0, top = 0, zNear = 0, zFar = 0 }; m_IsOrthographic = false; m_Counters = new NativeArray(); m_TileData = new NativeArray(); m_TileHeaders = new NativeArray(); m_PreTiles = new NativeArray(); } public int TilerLevel { get { return m_TilerLevel; } } public int TileXCount { get { return m_TileXCount; } } public int TileYCount { get { return m_TileYCount; } } public int TilePixelWidth { get { return m_TilePixelWidth; } } public int TilePixelHeight { get { return m_TilePixelHeight; } } public int TileHeaderSize { get { return m_TileHeaderSize; } } public int MaxLightPerTile { get { return m_Counters.IsCreated ? m_Counters[0] : 0; } } public int TileDataCapacity { get { return m_Counters.IsCreated ? m_Counters[2] : 0; } } public NativeArray Tiles { get { return m_TileData; } } public NativeArray TileHeaders { get { return m_TileHeaders; } } [MethodImpl(MethodImplOptions.AggressiveInlining)] public void GetTileOffsetAndCount(int i, int j, out int offset, out int count) { int headerOffset = GetTileHeaderOffset(i, j); offset = (int)m_TileHeaders[headerOffset + 0]; count = (int)m_TileHeaders[headerOffset + 1]; } [MethodImpl(MethodImplOptions.AggressiveInlining)] public int GetTileHeaderOffset(int i, int j) { return (i + j * m_TileXCount) * m_TileHeaderSize; } public void Setup(int tileDataCapacity) { if (tileDataCapacity <= 0) tileDataCapacity = m_TileXCount * m_TileYCount * m_AvgLightPerTile; m_Counters = new NativeArray(3, Allocator.Temp, NativeArrayOptions.UninitializedMemory); m_TileData = new NativeArray(tileDataCapacity, Allocator.Temp, NativeArrayOptions.UninitializedMemory); m_TileHeaders = new NativeArray(m_TileXCount * m_TileYCount * m_TileHeaderSize, Allocator.Temp, NativeArrayOptions.UninitializedMemory); m_Counters[0] = 0; m_Counters[1] = 0; m_Counters[2] = tileDataCapacity; } public void OnCameraCleanup() { if (m_TileHeaders.IsCreated) m_TileHeaders.Dispose(); if (m_TileData.IsCreated) m_TileData.Dispose(); if (m_Counters.IsCreated) m_Counters.Dispose(); } public void PrecomputeTiles(Matrix4x4 proj, bool isOrthographic, int renderWidth, int renderHeight) { m_TileXCount = (renderWidth + m_TilePixelWidth - 1) / m_TilePixelWidth; m_TileYCount = (renderHeight + m_TilePixelHeight - 1) / m_TilePixelHeight; m_PreTiles = DeferredShaderData.instance.GetPreTiles(m_TilerLevel, m_TileXCount * m_TileYCount); // Adjust render width and height to account for tile size expanding over the screen (tiles have a fixed pixel size). int adjustedRenderWidth = Align(renderWidth, m_TilePixelWidth); int adjustedRenderHeight = Align(renderHeight, m_TilePixelHeight); // Now adjust the right and bottom clipping planes. m_FrustumPlanes = proj.decomposeProjection; m_FrustumPlanes.right = m_FrustumPlanes.left + (m_FrustumPlanes.right - m_FrustumPlanes.left) * (adjustedRenderWidth / (float)renderWidth); m_FrustumPlanes.bottom = m_FrustumPlanes.top + (m_FrustumPlanes.bottom - m_FrustumPlanes.top) * (adjustedRenderHeight / (float)renderHeight); m_IsOrthographic = isOrthographic; // Tile size in world units. float tileWidthWS = (m_FrustumPlanes.right - m_FrustumPlanes.left) / m_TileXCount; float tileHeightWS = (m_FrustumPlanes.top - m_FrustumPlanes.bottom) / m_TileYCount; if (!isOrthographic) // perspective { for (int j = 0; j < m_TileYCount; ++j) { float tileTop = m_FrustumPlanes.top - tileHeightWS * j; float tileBottom = tileTop - tileHeightWS; for (int i = 0; i < m_TileXCount; ++i) { float tileLeft = m_FrustumPlanes.left + tileWidthWS * i; float tileRight = tileLeft + tileWidthWS; // Camera view space is always OpenGL RH coordinates system. // In view space with perspective projection, all planes pass by (0,0,0). PreTile preTile; preTile.planeLeft = MakePlane(new float3(tileLeft, tileBottom, -m_FrustumPlanes.zNear), new float3(tileLeft, tileTop, -m_FrustumPlanes.zNear)); preTile.planeRight = MakePlane(new float3(tileRight, tileTop, -m_FrustumPlanes.zNear), new float3(tileRight, tileBottom, -m_FrustumPlanes.zNear)); preTile.planeBottom = MakePlane(new float3(tileRight, tileBottom, -m_FrustumPlanes.zNear), new float3(tileLeft, tileBottom, -m_FrustumPlanes.zNear)); preTile.planeTop = MakePlane(new float3(tileLeft, tileTop, -m_FrustumPlanes.zNear), new float3(tileRight, tileTop, -m_FrustumPlanes.zNear)); m_PreTiles[i + j * m_TileXCount] = preTile; } } } else { for (int j = 0; j < m_TileYCount; ++j) { float tileTop = m_FrustumPlanes.top - tileHeightWS * j; float tileBottom = tileTop - tileHeightWS; for (int i = 0; i < m_TileXCount; ++i) { float tileLeft = m_FrustumPlanes.left + tileWidthWS * i; float tileRight = tileLeft + tileWidthWS; // Camera view space is always OpenGL RH coordinates system. PreTile preTile; preTile.planeLeft = MakePlane(new float3(tileLeft, tileBottom, -m_FrustumPlanes.zNear), new float3(tileLeft, tileBottom, -m_FrustumPlanes.zNear - 1.0f), new float3(tileLeft, tileTop, -m_FrustumPlanes.zNear)); preTile.planeRight = MakePlane(new float3(tileRight, tileTop, -m_FrustumPlanes.zNear), new float3(tileRight, tileTop, -m_FrustumPlanes.zNear - 1.0f), new float3(tileRight, tileBottom, -m_FrustumPlanes.zNear)); preTile.planeBottom = MakePlane(new float3(tileRight, tileBottom, -m_FrustumPlanes.zNear), new float3(tileRight, tileBottom, -m_FrustumPlanes.zNear - 1.0f), new float3(tileLeft, tileBottom, -m_FrustumPlanes.zNear)); preTile.planeTop = MakePlane(new float3(tileLeft, tileTop, -m_FrustumPlanes.zNear), new float3(tileLeft, tileTop, -m_FrustumPlanes.zNear - 1.0f), new float3(tileRight, tileTop, -m_FrustumPlanes.zNear)); m_PreTiles[i + j * m_TileXCount] = preTile; } } } } // This differs from CullIntermediateLights in 3 ways: // - tile-frustums/light intersection use different algorithm // - depth range of the light shape intersecting the tile-frustums is output in the tile list header section // - light indices written out are indexing visible_lights, rather than the array of PrePunctualLights. unsafe public void CullFinalLights(ref NativeArray punctualLights, ref NativeArray lightIndices, int lightStartIndex, int lightCount, int istart, int iend, int jstart, int jend) { // Interestingly, 2-3% faster when using unsafe arrays. PrePunctualLight* _punctualLights = (PrePunctualLight*)NativeArrayUnsafeUtility.GetUnsafeBufferPointerWithoutChecks(punctualLights); ushort* _lightIndices = (ushort*)NativeArrayUnsafeUtility.GetUnsafeBufferPointerWithoutChecks(lightIndices); uint* _tileHeaders = (uint*)NativeArrayUnsafeUtility.GetUnsafeBufferPointerWithoutChecks(m_TileHeaders); if (lightCount == 0) { for (int j = jstart; j < jend; ++j) for (int i = istart; i < iend; ++i) { int headerOffset = GetTileHeaderOffset(i, j); _tileHeaders[headerOffset + 0] = 0; _tileHeaders[headerOffset + 1] = 0; _tileHeaders[headerOffset + 2] = 0; _tileHeaders[headerOffset + 3] = 0; } return; } // Store culled lights in temporary buffer. Additionally store depth range of each light for a given tile too. // the depth range is a 32bit mask, but packed into a 16bits value since the range of the light is continuous // (only need to store first bit enabled, and count of enabled bits). ushort* tiles = stackalloc ushort[lightCount * 2]; float2* depthRanges = stackalloc float2[lightCount]; int maxLightPerTile = 0; // for stats int lightEndIndex = lightStartIndex + lightCount; float2 tileSize = new float2((m_FrustumPlanes.right - m_FrustumPlanes.left) / m_TileXCount, (m_FrustumPlanes.top - m_FrustumPlanes.bottom) / m_TileYCount); float2 tileExtents = tileSize * 0.5f; float2 tileExtentsInv = new float2(1.0f / tileExtents.x, 1.0f / tileExtents.y); for (int j = jstart; j < jend; ++j) { float tileYCentre = m_FrustumPlanes.top - (tileExtents.y + j * tileSize.y); for (int i = istart; i < iend; ++i) { float tileXCentre = m_FrustumPlanes.left + tileExtents.x + i * tileSize.x; PreTile preTile = m_PreTiles[i + j * m_TileXCount]; int culledLightCount = 0; // For the current tile's light list, min&max depth range (absolute values). float listMinDepth = float.MaxValue; float listMaxDepth = -float.MaxValue; // Duplicate the inner loop twice. Testing for the ortographic case inside the inner loop would cost an extra 8% otherwise. // Missing C++ template argument here! if (!m_IsOrthographic) { for (int vi = lightStartIndex; vi < lightEndIndex; ++vi) { ushort lightIndex = _lightIndices[vi]; PrePunctualLight ppl = _punctualLights[lightIndex]; // Offset tileCentre toward the light to calculate a more conservative minMax depth bound, // but it must remains inside the tile and must not pass further than the light centre. float2 tileCentre = new float2(tileXCentre, tileYCentre); float2 dir = ppl.screenPos - tileCentre; float2 d = abs(dir * tileExtentsInv); float sInv = 1.0f / max3(d.x, d.y, 1.0f); float3 tileOffCentre = new float3(tileCentre.x + dir.x * sInv, tileCentre.y + dir.y * sInv, -m_FrustumPlanes.zNear); float3 tileOrigin = new float3(0.0f); float t0, t1; // This is more expensive than Clip() but allow to compute min&max depth range for the part of the light inside the tile. if (!IntersectionLineSphere(ppl.posVS, ppl.radius, tileOrigin, tileOffCentre, out t0, out t1)) continue; listMinDepth = listMinDepth < t0 ? listMinDepth : t0; listMaxDepth = listMaxDepth > t1 ? listMaxDepth : t1; depthRanges[culledLightCount] = new float2(t0, t1); // Because this always output to the finest tiles, contrary to CullLights(), // the result are indices into visibleLights, instead of indices into punctualLights. tiles[culledLightCount] = ppl.visLightIndex; ++culledLightCount; } } else { for (int vi = lightStartIndex; vi < lightEndIndex; ++vi) { ushort lightIndex = _lightIndices[vi]; PrePunctualLight ppl = _punctualLights[lightIndex]; // Offset tileCentre toward the light to calculate a more conservative minMax depth bound, // but it must remains inside the tile and must not pass further than the light centre. float2 tileCentre = new float2(tileXCentre, tileYCentre); float2 dir = ppl.screenPos - tileCentre; float2 d = abs(dir * tileExtentsInv); float sInv = 1.0f / max3(d.x, d.y, 1.0f); float3 tileOffCentre = new float3(0, 0, -m_FrustumPlanes.zNear); float3 tileOrigin = new float3(tileCentre.x + dir.x * sInv, tileCentre.y + dir.y * sInv, 0.0f); float t0, t1; // This is more expensive than Clip() but allow to compute min&max depth range for the part of the light inside the tile. if (!IntersectionLineSphere(ppl.posVS, ppl.radius, tileOrigin, tileOffCentre, out t0, out t1)) continue; listMinDepth = listMinDepth < t0 ? listMinDepth : t0; listMaxDepth = listMaxDepth > t1 ? listMaxDepth : t1; depthRanges[culledLightCount] = new float2(t0, t1); // Because this always output to the finest tiles, contrary to CullLights(), // the result are indices into visibleLights, instead of indices into punctualLights. tiles[culledLightCount] = ppl.visLightIndex; ++culledLightCount; } } // Post-multiply by zNear to get actual world unit absolute depth values, then clamp to valid depth range. listMinDepth = max2(listMinDepth * m_FrustumPlanes.zNear, m_FrustumPlanes.zNear); listMaxDepth = min2(listMaxDepth * m_FrustumPlanes.zNear, m_FrustumPlanes.zFar); // Calculate bitmask for 2.5D culling. uint bitMask = 0; float depthRangeInv = 1.0f / (listMaxDepth - listMinDepth); for (int culledLightIndex = 0; culledLightIndex < culledLightCount; ++culledLightIndex) { float lightMinDepth = max2(depthRanges[culledLightIndex].x * m_FrustumPlanes.zNear, m_FrustumPlanes.zNear); float lightMaxDepth = min2(depthRanges[culledLightIndex].y * m_FrustumPlanes.zNear, m_FrustumPlanes.zFar); int firstBit = (int)((lightMinDepth - listMinDepth) * 32.0f * depthRangeInv); int lastBit = (int)((lightMaxDepth - listMinDepth) * 32.0f * depthRangeInv); int bitCount = min(lastBit - firstBit + 1, 32 - firstBit); bitMask |= (uint)((0xFFFFFFFF >> (32 - bitCount)) << firstBit); tiles[culledLightCount + culledLightIndex] = (ushort)((uint)firstBit | (uint)(bitCount << 8)); } // As listMinDepth and listMaxDepth are used to calculate the geometry 2.5D bitmask, // we can optimize the shader execution (TileDepthInfo.shader) by refactoring the calculation. // int bitIndex = 32.0h * (geoDepth - listMinDepth) / (listMaxDepth - listMinDepth); // Equivalent to: // a = 32.0 / (listMaxDepth - listMinDepth) // b = -listMinDepth * 32.0 / (listMaxDepth - listMinDepth) // int bitIndex = geoDepth * a + b; float a = 32.0f * depthRangeInv; float b = -listMinDepth * a; int tileDataSize = culledLightCount * 2; int tileOffset = culledLightCount > 0 ? AddTileData(tiles, ref tileDataSize) : 0; int headerOffset = GetTileHeaderOffset(i, j); _tileHeaders[headerOffset + 0] = (uint)tileOffset; _tileHeaders[headerOffset + 1] = (uint)(tileDataSize == 0 ? 0 : culledLightCount); _tileHeaders[headerOffset + 2] = _f32tof16(a) | (_f32tof16(b) << 16); _tileHeaders[headerOffset + 3] = bitMask; maxLightPerTile = max(maxLightPerTile, culledLightCount); } } m_Counters[0] = max(m_Counters[0], maxLightPerTile); // TODO make it atomic } // TODO: finer culling for spot lights unsafe public void CullIntermediateLights(ref NativeArray punctualLights, ref NativeArray lightIndices, int lightStartIndex, int lightCount, int istart, int iend, int jstart, int jend) { // Interestingly, 2-3% faster when using unsafe arrays. PrePunctualLight* _punctualLights = (PrePunctualLight*)NativeArrayUnsafeUtility.GetUnsafeBufferPointerWithoutChecks(punctualLights); ushort* _lightIndices = (ushort*)NativeArrayUnsafeUtility.GetUnsafeBufferPointerWithoutChecks(lightIndices); uint* _tileHeaders = (uint*)NativeArrayUnsafeUtility.GetUnsafeBufferPointerWithoutChecks(m_TileHeaders); if (lightCount == 0) { for (int j = jstart; j < jend; ++j) for (int i = istart; i < iend; ++i) { int headerOffset = GetTileHeaderOffset(i, j); _tileHeaders[headerOffset + 0] = 0; _tileHeaders[headerOffset + 1] = 0; } return; } // Store culled result in temporary buffer. ushort* tiles = stackalloc ushort[lightCount]; int lightEndIndex = lightStartIndex + lightCount; for (int j = jstart; j < jend; ++j) { for (int i = istart; i < iend; ++i) { PreTile preTile = m_PreTiles[i + j * m_TileXCount]; int culledLightCount = 0; for (int vi = lightStartIndex; vi < lightEndIndex; ++vi) { ushort lightIndex = _lightIndices[vi]; PrePunctualLight ppl = _punctualLights[lightIndex]; // This is slightly faster than IntersectionLineSphere(). if (!Clip(ref preTile, ppl.posVS, ppl.radius)) continue; tiles[culledLightCount] = lightIndex; ++culledLightCount; } // Copy the culled light list. int tileOffset = culledLightCount > 0 ? AddTileData(tiles, ref culledLightCount) : 0; int headerOffset = GetTileHeaderOffset(i, j); _tileHeaders[headerOffset + 0] = (uint)tileOffset; _tileHeaders[headerOffset + 1] = (uint)culledLightCount; } } } [MethodImpl(MethodImplOptions.AggressiveInlining)] unsafe int AddTileData(ushort* lightData, ref int size) { int* _Counters = (int*)m_Counters.GetUnsafePtr(); int tileDataSize = System.Threading.Interlocked.Add(ref _Counters[1], size); int offset = tileDataSize - size; if (tileDataSize <= m_TileData.Length) { ushort* _TileData = (ushort*)m_TileData.GetUnsafePtr(); UnsafeUtility.MemCpy(_TileData + offset, lightData, size * 2); return offset; } else { // Buffer overflow. Ignore data to add. // Gracefully increasing the buffer size is possible but costs extra CPU time (see commented code below) due to the needed critical section. m_Counters[2] = max(m_Counters[2], tileDataSize); // use an atomic max instead? size = 0; return 0; } /* lock (this) { int offset = m_TileDataSize; m_TileDataSize += size; ushort* _TileData = (ushort*)m_TileData.GetUnsafePtr(); if (m_TileDataSize > m_TileDataCapacity) { m_TileDataCapacity = max(m_TileDataSize, m_TileDataCapacity * 2); NativeArray newTileData = new NativeArray(m_TileDataCapacity, Allocator.Temp, NativeArrayOptions.UninitializedMemory); ushort* _newTileData = (ushort*)newTileData.GetUnsafePtr(); UnsafeUtility.MemCpy(_newTileData, _TileData, offset * 2); m_TileData.Dispose(); m_TileData = newTileData; _TileData = _newTileData; } UnsafeUtility.MemCpy(_TileData + offset, lightData, size * 2); return offset; } */ } // Return parametric intersection between a sphere and a line. // The intersections points P0 and P1 are: // P0 = raySource + rayDirection * t0. // P1 = raySource + rayDirection * t1. [MethodImpl(MethodImplOptions.AggressiveInlining)] unsafe static bool IntersectionLineSphere(float3 centre, float radius, float3 raySource, float3 rayDirection, out float t0, out float t1) { float A = dot(rayDirection, rayDirection); // always >= 0 float B = dot(raySource - centre, rayDirection); float C = dot(raySource, raySource) + dot(centre, centre) - (radius * radius) - 2 * dot(raySource, centre); float discriminant = (B * B) - A * C; if (discriminant > 0) { float sqrt_discriminant = sqrt(discriminant); float A_inv = 1.0f / A; t0 = (-B - sqrt_discriminant) * A_inv; t1 = (-B + sqrt_discriminant) * A_inv; return true; } else { t0 = 0.0f; // invalid t1 = 0.0f; // invalid return false; } } // Clip a sphere against a 2D tile. Near and far planes are ignored (already tested). [MethodImpl(MethodImplOptions.AggressiveInlining)] static bool Clip(ref PreTile tile, float3 posVS, float radius) { // Simplified clipping code, only deals with 4 clipping planes. // zNear and zFar clipping planes are ignored as presumably the light is already visible to the camera frustum. float radiusSq = radius * radius; int insideCount = 0; ClipResult res; res = ClipPartial(tile.planeLeft, tile.planeBottom, tile.planeTop, posVS, radius, radiusSq, ref insideCount); if (res != ClipResult.Unknown) return res == ClipResult.In; res = ClipPartial(tile.planeRight, tile.planeBottom, tile.planeTop, posVS, radius, radiusSq, ref insideCount); if (res != ClipResult.Unknown) return res == ClipResult.In; res = ClipPartial(tile.planeTop, tile.planeLeft, tile.planeRight, posVS, radius, radiusSq, ref insideCount); if (res != ClipResult.Unknown) return res == ClipResult.In; res = ClipPartial(tile.planeBottom, tile.planeLeft, tile.planeRight, posVS, radius, radiusSq, ref insideCount); if (res != ClipResult.Unknown) return res == ClipResult.In; return insideCount == 4; } // Internal function to clip against 1 plane of a cube, with additional 2 side planes for false-positive detection (normally 4 planes, but near and far planes are ignored). [MethodImpl(MethodImplOptions.AggressiveInlining)] static ClipResult ClipPartial(float4 plane, float4 sidePlaneA, float4 sidePlaneB, float3 posVS, float radius, float radiusSq, ref int insideCount) { float d = DistanceToPlane(plane, posVS); if (d + radius <= 0.0f) // completely outside return ClipResult.Out; else if (d < 0.0f) // intersection: further check: only need to consider case where more than half the sphere is outside { float3 p = posVS - plane.xyz * d; float rSq = radiusSq - d * d; if (SignedSq(DistanceToPlane(sidePlaneA, p)) >= -rSq && SignedSq(DistanceToPlane(sidePlaneB, p)) >= -rSq) return ClipResult.In; } else // consider as good as completely inside ++insideCount; return ClipResult.Unknown; } [MethodImpl(MethodImplOptions.AggressiveInlining)] static float4 MakePlane(float3 pb, float3 pc) { float3 v0 = pb; float3 v1 = pc; float3 n = cross(v0, v1); n = normalize(n); // The planes pass all by the origin. return new float4(n.x, n.y, n.z, 0.0f); } [MethodImpl(MethodImplOptions.AggressiveInlining)] static float4 MakePlane(float3 pa, float3 pb, float3 pc) { float3 v0 = pb - pa; float3 v1 = pc - pa; float3 n = cross(v0, v1); n = normalize(n); return new float4(n.x, n.y, n.z, -dot(n, pa)); } [MethodImpl(MethodImplOptions.AggressiveInlining)] static float DistanceToPlane(float4 plane, float3 p) { return plane.x * p.x + plane.y * p.y + plane.z * p.z + plane.w; } [MethodImpl(MethodImplOptions.AggressiveInlining)] static float SignedSq(float f) { // slower! //return Mathf.Sign(f) * (f * f); return (f < 0.0f ? -1.0f : 1.0f) * (f * f); } // Unity.Mathematics.max() function calls Single_IsNan() which significantly slow down the code (up to 20% of CullFinalLights())! [MethodImpl(MethodImplOptions.AggressiveInlining)] static float min2(float a, float b) { return a < b ? a : b; } // Unity.Mathematics.min() function calls Single_IsNan() which significantly slow down the code (up to 20% of CullFinalLights())! [MethodImpl(MethodImplOptions.AggressiveInlining)] static float max2(float a, float b) { return a > b ? a : b; } [MethodImpl(MethodImplOptions.AggressiveInlining)] static float max3(float a, float b, float c) { return a > b ? (a > c ? a : c) : (b > c ? b : c); } // This is copy-pasted from Unity.Mathematics.math.f32tof16(), but use min2() function that does not check for NaN (which would consume 10% of the execution time of CullFinalLights()). [MethodImpl(MethodImplOptions.AggressiveInlining)] public static uint _f32tof16(float x) { const int infinity_32 = 255 << 23; const uint msk = 0x7FFFF000u; uint ux = asuint(x); uint uux = ux & msk; uint h = (uint)(asuint(min2(asfloat(uux) * 1.92592994e-34f, 260042752.0f)) + 0x1000) >> 13; // Clamp to signed infinity if overflowed h = select(h, select(0x7c00u, 0x7e00u, (int)uux > infinity_32), (int)uux >= infinity_32); // NaN->qNaN and Inf->Inf return h | (ux & ~msk) >> 16; } [MethodImpl(MethodImplOptions.AggressiveInlining)] static int Align(int s, int alignment) { return ((s + alignment - 1) / alignment) * alignment; } } }