419 lines
14 KiB
HLSL
419 lines
14 KiB
HLSL
#ifndef UNITY_AREA_LIGHTING_INCLUDED
|
|
#define UNITY_AREA_LIGHTING_INCLUDED
|
|
|
|
#define APPROXIMATE_POLY_LIGHT_AS_SPHERE_LIGHT
|
|
#define APPROXIMATE_SPHERE_LIGHT_NUMERICALLY
|
|
|
|
// Not normalized by the factor of 1/TWO_PI.
|
|
real3 ComputeEdgeFactor(real3 V1, real3 V2)
|
|
{
|
|
real V1oV2 = dot(V1, V2);
|
|
real3 V1xV2 = cross(V1, V2);
|
|
#if 0
|
|
return normalize(V1xV2) * acos(V1oV2));
|
|
#else
|
|
// Approximate: { y = rsqrt(1.0 - V1oV2 * V1oV2) * acos(V1oV2) } on [0, 1].
|
|
// Fit: HornerForm[MiniMaxApproximation[ArcCos[x]/Sqrt[1 - x^2], {x, {0, 1 - $MachineEpsilon}, 6, 0}][[2, 1]]].
|
|
// Maximum relative error: 2.6855360216340534 * 10^-6. Intensities up to 1000 are artifact-free.
|
|
real x = abs(V1oV2);
|
|
real y = 1.5707921083647782 + x * (-0.9995697178013095 + x * (0.778026455830408 + x * (-0.6173111361273548 + x * (0.4202724111150622 + x * (-0.19452783598217288 + x * 0.04232040013661036)))));
|
|
|
|
if (V1oV2 < 0)
|
|
{
|
|
// Undo range reduction.
|
|
const float epsilon = 1e-5f;
|
|
y = PI * rsqrt(max(epsilon, saturate(1 - V1oV2 * V1oV2))) - y;
|
|
}
|
|
|
|
return V1xV2 * y;
|
|
#endif
|
|
}
|
|
|
|
// Not normalized by the factor of 1/TWO_PI.
|
|
// Ref: Improving radiosity solutions through the use of analytically determined form-factors.
|
|
real IntegrateEdge(real3 V1, real3 V2)
|
|
{
|
|
// 'V1' and 'V2' are represented in a coordinate system with N = (0, 0, 1).
|
|
return ComputeEdgeFactor(V1, V2).z;
|
|
}
|
|
|
|
// 'sinSqSigma' is the sine^2 of the half-angle subtended by the sphere (aperture) as seen from the shaded point.
|
|
// 'cosOmega' is the cosine of the angle between the normal and the direction to the center of the light.
|
|
// N.b.: this function accounts for horizon clipping.
|
|
real DiffuseSphereLightIrradiance(real sinSqSigma, real cosOmega)
|
|
{
|
|
#ifdef APPROXIMATE_SPHERE_LIGHT_NUMERICALLY
|
|
real x = sinSqSigma;
|
|
real y = cosOmega;
|
|
|
|
// Use a numerical fit found in Mathematica. Mean absolute error: 0.00476944.
|
|
// You can use the following Mathematica code to reproduce our results:
|
|
// t = Flatten[Table[{x, y, f[x, y]}, {x, 0, 0.999999, 0.001}, {y, -0.999999, 0.999999, 0.002}], 1]
|
|
// m = NonlinearModelFit[t, x * (y + e) * (0.5 + (y - e) * (a + b * x + c * x^2 + d * x^3)), {a, b, c, d, e}, {x, y}]
|
|
return saturate(x * (0.9245867471551246 + y) * (0.5 + (-0.9245867471551246 + y) * (0.5359050373687144 + x * (-1.0054221851257754 + x * (1.8199061187417047 - x * 1.3172081704209504)))));
|
|
#else
|
|
#if 0 // Ref: Area Light Sources for Real-Time Graphics, page 4 (1996).
|
|
real sinSqOmega = saturate(1 - cosOmega * cosOmega);
|
|
real cosSqSigma = saturate(1 - sinSqSigma);
|
|
real sinSqGamma = saturate(cosSqSigma / sinSqOmega);
|
|
real cosSqGamma = saturate(1 - sinSqGamma);
|
|
|
|
real sinSigma = sqrt(sinSqSigma);
|
|
real sinGamma = sqrt(sinSqGamma);
|
|
real cosGamma = sqrt(cosSqGamma);
|
|
|
|
real sigma = asin(sinSigma);
|
|
real omega = acos(cosOmega);
|
|
real gamma = asin(sinGamma);
|
|
|
|
if (omega >= HALF_PI + sigma)
|
|
{
|
|
// Full horizon occlusion (case #4).
|
|
return 0;
|
|
}
|
|
|
|
real e = sinSqSigma * cosOmega;
|
|
|
|
UNITY_BRANCH
|
|
if (omega < HALF_PI - sigma)
|
|
{
|
|
// No horizon occlusion (case #1).
|
|
return e;
|
|
}
|
|
else
|
|
{
|
|
real g = (-2 * sqrt(sinSqOmega * cosSqSigma) + sinGamma) * cosGamma + (HALF_PI - gamma);
|
|
real h = cosOmega * (cosGamma * sqrt(saturate(sinSqSigma - cosSqGamma)) + sinSqSigma * asin(saturate(cosGamma / sinSigma)));
|
|
|
|
if (omega < HALF_PI)
|
|
{
|
|
// Partial horizon occlusion (case #2).
|
|
return saturate(e + INV_PI * (g - h));
|
|
}
|
|
else
|
|
{
|
|
// Partial horizon occlusion (case #3).
|
|
return saturate(INV_PI * (g + h));
|
|
}
|
|
}
|
|
#else // Ref: Moving Frostbite to Physically Based Rendering, page 47 (2015, optimized).
|
|
real cosSqOmega = cosOmega * cosOmega; // y^2
|
|
|
|
UNITY_BRANCH
|
|
if (cosSqOmega > sinSqSigma) // (y^2)>x
|
|
{
|
|
return saturate(sinSqSigma * cosOmega); // Clip[x*y,{0,1}]
|
|
}
|
|
else
|
|
{
|
|
real cotSqSigma = rcp(sinSqSigma) - 1; // 1/x-1
|
|
real tanSqSigma = rcp(cotSqSigma); // x/(1-x)
|
|
real sinSqOmega = 1 - cosSqOmega; // 1-y^2
|
|
|
|
real w = sinSqOmega * tanSqSigma; // (1-y^2)*(x/(1-x))
|
|
real x = -cosOmega * rsqrt(w); // -y*Sqrt[(1/x-1)/(1-y^2)]
|
|
real y = sqrt(sinSqOmega * tanSqSigma - cosSqOmega); // Sqrt[(1-y^2)*(x/(1-x))-y^2]
|
|
real z = y * cotSqSigma; // Sqrt[(1-y^2)*(x/(1-x))-y^2]*(1/x-1)
|
|
|
|
real a = cosOmega * acos(x) - z; // y*ArcCos[-y*Sqrt[(1/x-1)/(1-y^2)]]-Sqrt[(1-y^2)*(x/(1-x))-y^2]*(1/x-1)
|
|
real b = atan(y); // ArcTan[Sqrt[(1-y^2)*(x/(1-x))-y^2]]
|
|
|
|
return saturate(INV_PI * (a * sinSqSigma + b));
|
|
}
|
|
#endif
|
|
#endif
|
|
}
|
|
|
|
// This function does not check whether light's contribution is 0.
|
|
real3 PolygonFormFactor(real4x3 L)
|
|
{
|
|
L[0] = normalize(L[0]);
|
|
L[1] = normalize(L[1]);
|
|
L[2] = normalize(L[2]);
|
|
L[3] = normalize(L[3]);
|
|
|
|
real3 F = ComputeEdgeFactor(L[0], L[1]);
|
|
F += ComputeEdgeFactor(L[1], L[2]);
|
|
F += ComputeEdgeFactor(L[2], L[3]);
|
|
F += ComputeEdgeFactor(L[3], L[0]);
|
|
|
|
return INV_TWO_PI * F;
|
|
}
|
|
|
|
// See "Real-Time Area Lighting: a Journey from Research to Production", slide 102.
|
|
// Turns out, despite the authors claiming that this function "calculates an approximation of
|
|
// the clipped sphere form factor", that is simply not true.
|
|
// First of all, above horizon, the function should then just return 'F.z', which it does not.
|
|
// Secondly, if we use the correct function called DiffuseSphereLightIrradiance(), it results
|
|
// in severe light leaking if the light is placed vertically behind the camera.
|
|
// So this function is clearly a hack designed to work around these problems.
|
|
real PolygonIrradianceFromVectorFormFactor(float3 F)
|
|
{
|
|
#if 1
|
|
float l = length(F);
|
|
return max(0, (l * l + F.z) / (l + 1));
|
|
#else
|
|
real sff = saturate(dot(F, F));
|
|
real sinSqAperture = sqrt(sff);
|
|
real cosElevationAngle = F.z * rsqrt(sff);
|
|
|
|
return DiffuseSphereLightIrradiance(sinSqAperture, cosElevationAngle);
|
|
#endif
|
|
}
|
|
|
|
// Expects non-normalized vertex positions.
|
|
real PolygonIrradiance(real4x3 L)
|
|
{
|
|
#ifdef APPROXIMATE_POLY_LIGHT_AS_SPHERE_LIGHT
|
|
real3 F = PolygonFormFactor(L);
|
|
|
|
return PolygonIrradianceFromVectorFormFactor(F);
|
|
#else
|
|
// 1. ClipQuadToHorizon
|
|
|
|
// detect clipping config
|
|
uint config = 0;
|
|
if (L[0].z > 0) config += 1;
|
|
if (L[1].z > 0) config += 2;
|
|
if (L[2].z > 0) config += 4;
|
|
if (L[3].z > 0) config += 8;
|
|
|
|
// The fifth vertex for cases when clipping cuts off one corner.
|
|
// Due to a compiler bug, copying L into a vector array with 5 rows
|
|
// messes something up, so we need to stick with the matrix + the L4 vertex.
|
|
real3 L4 = L[3];
|
|
|
|
// This switch is surprisingly fast. Tried replacing it with a lookup array of vertices.
|
|
// Even though that replaced the switch with just some indexing and no branches, it became
|
|
// way, way slower - mem fetch stalls?
|
|
|
|
// clip
|
|
uint n = 0;
|
|
switch (config)
|
|
{
|
|
case 0: // clip all
|
|
break;
|
|
|
|
case 1: // V1 clip V2 V3 V4
|
|
n = 3;
|
|
L[1] = -L[1].z * L[0] + L[0].z * L[1];
|
|
L[2] = -L[3].z * L[0] + L[0].z * L[3];
|
|
break;
|
|
|
|
case 2: // V2 clip V1 V3 V4
|
|
n = 3;
|
|
L[0] = -L[0].z * L[1] + L[1].z * L[0];
|
|
L[2] = -L[2].z * L[1] + L[1].z * L[2];
|
|
break;
|
|
|
|
case 3: // V1 V2 clip V3 V4
|
|
n = 4;
|
|
L[2] = -L[2].z * L[1] + L[1].z * L[2];
|
|
L[3] = -L[3].z * L[0] + L[0].z * L[3];
|
|
break;
|
|
|
|
case 4: // V3 clip V1 V2 V4
|
|
n = 3;
|
|
L[0] = -L[3].z * L[2] + L[2].z * L[3];
|
|
L[1] = -L[1].z * L[2] + L[2].z * L[1];
|
|
break;
|
|
|
|
case 5: // V1 V3 clip V2 V4: impossible
|
|
break;
|
|
|
|
case 6: // V2 V3 clip V1 V4
|
|
n = 4;
|
|
L[0] = -L[0].z * L[1] + L[1].z * L[0];
|
|
L[3] = -L[3].z * L[2] + L[2].z * L[3];
|
|
break;
|
|
|
|
case 7: // V1 V2 V3 clip V4
|
|
n = 5;
|
|
L4 = -L[3].z * L[0] + L[0].z * L[3];
|
|
L[3] = -L[3].z * L[2] + L[2].z * L[3];
|
|
break;
|
|
|
|
case 8: // V4 clip V1 V2 V3
|
|
n = 3;
|
|
L[0] = -L[0].z * L[3] + L[3].z * L[0];
|
|
L[1] = -L[2].z * L[3] + L[3].z * L[2];
|
|
L[2] = L[3];
|
|
break;
|
|
|
|
case 9: // V1 V4 clip V2 V3
|
|
n = 4;
|
|
L[1] = -L[1].z * L[0] + L[0].z * L[1];
|
|
L[2] = -L[2].z * L[3] + L[3].z * L[2];
|
|
break;
|
|
|
|
case 10: // V2 V4 clip V1 V3: impossible
|
|
break;
|
|
|
|
case 11: // V1 V2 V4 clip V3
|
|
n = 5;
|
|
L[3] = -L[2].z * L[3] + L[3].z * L[2];
|
|
L[2] = -L[2].z * L[1] + L[1].z * L[2];
|
|
break;
|
|
|
|
case 12: // V3 V4 clip V1 V2
|
|
n = 4;
|
|
L[1] = -L[1].z * L[2] + L[2].z * L[1];
|
|
L[0] = -L[0].z * L[3] + L[3].z * L[0];
|
|
break;
|
|
|
|
case 13: // V1 V3 V4 clip V2
|
|
n = 5;
|
|
L[3] = L[2];
|
|
L[2] = -L[1].z * L[2] + L[2].z * L[1];
|
|
L[1] = -L[1].z * L[0] + L[0].z * L[1];
|
|
break;
|
|
|
|
case 14: // V2 V3 V4 clip V1
|
|
n = 5;
|
|
L4 = -L[0].z * L[3] + L[3].z * L[0];
|
|
L[0] = -L[0].z * L[1] + L[1].z * L[0];
|
|
break;
|
|
|
|
case 15: // V1 V2 V3 V4
|
|
n = 4;
|
|
break;
|
|
}
|
|
|
|
if (n == 0) return 0;
|
|
|
|
// 2. Project onto sphere
|
|
L[0] = normalize(L[0]);
|
|
L[1] = normalize(L[1]);
|
|
L[2] = normalize(L[2]);
|
|
|
|
switch (n)
|
|
{
|
|
case 3:
|
|
L[3] = L[0];
|
|
break;
|
|
case 4:
|
|
L[3] = normalize(L[3]);
|
|
L4 = L[0];
|
|
break;
|
|
case 5:
|
|
L[3] = normalize(L[3]);
|
|
L4 = normalize(L4);
|
|
break;
|
|
}
|
|
|
|
// 3. Integrate
|
|
real sum = 0;
|
|
sum += IntegrateEdge(L[0], L[1]);
|
|
sum += IntegrateEdge(L[1], L[2]);
|
|
sum += IntegrateEdge(L[2], L[3]);
|
|
if (n >= 4)
|
|
sum += IntegrateEdge(L[3], L4);
|
|
if (n == 5)
|
|
sum += IntegrateEdge(L4, L[0]);
|
|
|
|
sum *= INV_TWO_PI; // Normalization
|
|
|
|
sum = max(sum, 0.0);
|
|
|
|
return isfinite(sum) ? sum : 0.0;
|
|
#endif
|
|
}
|
|
|
|
real LineFpo(real tLDDL, real lrcpD, real rcpD)
|
|
{
|
|
// Compute: ((l / d) / (d * d + l * l)) + (1.0 / (d * d)) * atan(l / d).
|
|
return tLDDL + (rcpD * rcpD) * FastATan(lrcpD);
|
|
}
|
|
|
|
real LineFwt(real tLDDL, real l)
|
|
{
|
|
// Compute: l * ((l / d) / (d * d + l * l)).
|
|
return l * tLDDL;
|
|
}
|
|
|
|
// Computes the integral of the clamped cosine over the line segment.
|
|
// 'l1' and 'l2' define the integration interval.
|
|
// 'tangent' is the line's tangent direction.
|
|
// 'normal' is the direction orthogonal to the tangent. It is the shortest vector between
|
|
// the shaded point and the line, pointing away from the shaded point.
|
|
real LineIrradiance(real l1, real l2, real3 normal, real3 tangent)
|
|
{
|
|
real d = length(normal);
|
|
real l1rcpD = l1 * rcp(d);
|
|
real l2rcpD = l2 * rcp(d);
|
|
real tLDDL1 = l1rcpD / (d * d + l1 * l1);
|
|
real tLDDL2 = l2rcpD / (d * d + l2 * l2);
|
|
real intWt = LineFwt(tLDDL2, l2) - LineFwt(tLDDL1, l1);
|
|
real intP0 = LineFpo(tLDDL2, l2rcpD, rcp(d)) - LineFpo(tLDDL1, l1rcpD, rcp(d));
|
|
return intP0 * normal.z + intWt * tangent.z;
|
|
}
|
|
|
|
// Computes 1.0 / length(mul(ortho, transpose(inverse(invM)))).
|
|
real ComputeLineWidthFactor(real3x3 invM, real3 ortho)
|
|
{
|
|
// transpose(inverse(M)) = (1.0 / determinant(M)) * cofactor(M).
|
|
// Take into account that m12 = m21 = m23 = m32 = 0 and m33 = 1.
|
|
real det = invM._11 * invM._22 - invM._22 * invM._31 * invM._13;
|
|
real3x3 cof = {invM._22, 0.0, -invM._22 * invM._31,
|
|
0.0, invM._11 - invM._13 * invM._31, 0.0,
|
|
-invM._13 * invM._22, 0.0, invM._11 * invM._22};
|
|
|
|
// 1.0 / length(mul(V, (1.0 / s * M))) = abs(s) / length(mul(V, M)).
|
|
return abs(det) / length(mul(ortho, cof));
|
|
}
|
|
|
|
// For line lights.
|
|
real LTCEvaluate(real3 P1, real3 P2, real3 B, real3x3 invM)
|
|
{
|
|
real result = 0.0;
|
|
// Inverse-transform the endpoints.
|
|
P1 = mul(P1, invM);
|
|
P2 = mul(P2, invM);
|
|
|
|
// Terminate the algorithm if both points are below the horizon.
|
|
if (!(P1.z <= 0.0 && P2.z <= 0.0))
|
|
{
|
|
real width = ComputeLineWidthFactor(invM, B);
|
|
|
|
if (P1.z > P2.z)
|
|
{
|
|
// Convention: 'P2' is above 'P1', with the tangent pointing upwards.
|
|
Swap(P1, P2);
|
|
}
|
|
|
|
// Recompute the length and the tangent in the new coordinate system.
|
|
real len = length(P2 - P1);
|
|
real3 T = normalize(P2 - P1);
|
|
|
|
// Clip the part of the light below the horizon.
|
|
if (P1.z <= 0.0)
|
|
{
|
|
// P = P1 + t * T; P.z == 0.
|
|
real t = -P1.z / T.z;
|
|
P1 = real3(P1.xy + t * T.xy, 0.0);
|
|
|
|
// Set the length of the visible part of the light.
|
|
len -= t;
|
|
}
|
|
|
|
// Compute the normal direction to the line, s.t. it is the shortest vector
|
|
// between the shaded point and the line, pointing away from the shaded point.
|
|
// Can be interpreted as a point on the line, since the shaded point is at the origin.
|
|
real proj = dot(P1, T);
|
|
real3 P0 = P1 - proj * T;
|
|
|
|
// Compute the parameterization: distances from 'P1' and 'P2' to 'P0'.
|
|
real l1 = proj;
|
|
real l2 = l1 + len;
|
|
|
|
// Integrate the clamped cosine over the line segment.
|
|
real irradiance = LineIrradiance(l1, l2, P0, T);
|
|
|
|
// Guard against numerical precision issues.
|
|
result = max(INV_PI * width * irradiance, 0.0);
|
|
}
|
|
return result;
|
|
}
|
|
|
|
#endif // UNITY_AREA_LIGHTING_INCLUDED
|