@@ -289,7 +289,7 @@ std::string getCompilerOptionsString(size_t VectorSize,
289289
290290 std::stringstream CompilerOptions;
291291
292- if (is16BitType<T>())
292+ if (is16BitType<T>() || is16BitType<OUT_TYPE>() )
293293 CompilerOptions << " -enable-16bit-types" ;
294294
295295 CompilerOptions << " -DTYPE=" << getHLSLTypeString<T>();
@@ -685,7 +685,6 @@ template <typename T> uint32_t FirstBitLow(T A) {
685685DEFAULT_OP_2 (OpType::And, (A & B));
686686DEFAULT_OP_2 (OpType::Or, (A | B));
687687DEFAULT_OP_2 (OpType::Xor, (A ^ B));
688- DEFAULT_OP_1 (OpType::Not, (~A));
689688DEFAULT_OP_2 (OpType::LeftShift, (A << B));
690689DEFAULT_OP_2 (OpType::RightShift, (A >> B));
691690DEFAULT_OP_1 (OpType::Saturate, (Saturate(A)));
@@ -708,6 +707,61 @@ BITWISE_OP(OpType::FirstBitLow, (FirstBitLow(A)));
708707
709708DEFAULT_OP_1 (OpType::Initialize, (A));
710709
710+ //
711+ // Cast
712+ //
713+
714+ #define CAST_OP (OP, TYPE, IMPL ) \
715+ template <typename T> struct Op <OP, T> : StrictValidation { \
716+ TYPE operator ()(T A) { return IMPL; } \
717+ };
718+
719+ template <typename T> HLSLBool_t CastToBool (T A) { return (bool )A; }
720+ template <> HLSLBool_t CastToBool (HLSLHalf_t A) { return (bool )((float )A); }
721+
722+ template <typename T> HLSLHalf_t CastToFloat16 (T A) {
723+ return HLSLHalf_t (float (A));
724+ }
725+
726+ template <typename T> float CastToFloat32 (T A) { return (float )A; }
727+
728+ template <typename T> double CastToFloat64 (T A) { return (double )A; }
729+ template <> double CastToFloat64 (HLSLHalf_t A) { return (double )((float )A); }
730+
731+ template <typename T> int16_t CastToInt16 (T A) { return (int16_t )A; }
732+ template <> int16_t CastToInt16 (HLSLHalf_t A) { return (int16_t )((float )A); }
733+
734+ template <typename T> int32_t CastToInt32 (T A) { return (int32_t )A; }
735+ template <> int32_t CastToInt32 (HLSLHalf_t A) { return (int32_t )((float )A); }
736+
737+ template <typename T> int64_t CastToInt64 (T A) { return (int64_t )A; }
738+ template <> int64_t CastToInt64 (HLSLHalf_t A) { return (int64_t )((float )A); }
739+
740+ template <typename T> uint16_t CastToUint16 (T A) { return (uint16_t )A; }
741+ template <> uint16_t CastToUint16 (HLSLHalf_t A) { return (uint16_t )((float )A); }
742+
743+ template <typename T> uint32_t CastToUint32 (T A) { return (uint32_t )A; }
744+ template <> uint32_t CastToUint32 (HLSLHalf_t A) { return (uint32_t )((float )A); }
745+
746+ template <typename T> uint64_t CastToUint64 (T A) { return (uint64_t )A; }
747+ template <> uint64_t CastToUint64 (HLSLHalf_t A) { return (uint64_t )((float )A); }
748+
749+ CAST_OP (OpType::CastToBool, HLSLBool_t, (CastToBool(A)));
750+ CAST_OP (OpType::CastToInt16, int16_t , (CastToInt16(A)));
751+ CAST_OP (OpType::CastToInt32, int32_t , (CastToInt32(A)));
752+ CAST_OP (OpType::CastToInt64, int64_t , (CastToInt64(A)));
753+ CAST_OP (OpType::CastToUint16, uint16_t , (CastToUint16(A)));
754+ CAST_OP (OpType::CastToUint32, uint32_t , (CastToUint32(A)));
755+ CAST_OP (OpType::CastToUint64, uint64_t , (CastToUint64(A)));
756+ CAST_OP (OpType::CastToUint16_FromFP, uint16_t , (CastToUint16(A)));
757+ CAST_OP (OpType::CastToUint32_FromFP, uint32_t , (CastToUint32(A)));
758+ CAST_OP (OpType::CastToUint64_FromFP, uint64_t , (CastToUint64(A)));
759+ CAST_OP (OpType::CastToFloat16, HLSLHalf_t, (CastToFloat16(A)));
760+ CAST_OP (OpType::CastToFloat32, float , (CastToFloat32(A)));
761+ CAST_OP (OpType::CastToFloat64, double , (CastToFloat64(A)));
762+
763+ #undef CAST_OP
764+
711765//
712766// Trigonometric
713767//
@@ -1314,7 +1368,6 @@ class DxilConf_SM69_Vectorized {
13141368 HLK_TEST (Or, uint16_t , ScalarOp2);
13151369 HLK_TEST (Xor, uint16_t , Vector);
13161370 HLK_TEST (Xor, uint16_t , ScalarOp2);
1317- HLK_TEST (Not, uint16_t , Vector);
13181371 HLK_TEST (ReverseBits, uint16_t , Vector);
13191372 HLK_TEST (CountBits, uint16_t , Vector);
13201373 HLK_TEST (FirstBitHigh, uint16_t , Vector);
@@ -1329,7 +1382,6 @@ class DxilConf_SM69_Vectorized {
13291382 HLK_TEST (Or, uint32_t , ScalarOp2);
13301383 HLK_TEST (Xor, uint32_t , Vector);
13311384 HLK_TEST (Xor, uint32_t , ScalarOp2);
1332- HLK_TEST (Not, uint32_t , Vector);
13331385 HLK_TEST (LeftShift, uint32_t , Vector);
13341386 HLK_TEST (LeftShift, uint32_t , ScalarOp2);
13351387 HLK_TEST (RightShift, uint32_t , Vector);
@@ -1344,7 +1396,6 @@ class DxilConf_SM69_Vectorized {
13441396 HLK_TEST (Or, uint64_t , ScalarOp2);
13451397 HLK_TEST (Xor, uint64_t , Vector);
13461398 HLK_TEST (Xor, uint64_t , ScalarOp2);
1347- HLK_TEST (Not, uint64_t , Vector);
13481399 HLK_TEST (LeftShift, uint64_t , Vector);
13491400 HLK_TEST (LeftShift, uint64_t , ScalarOp2);
13501401 HLK_TEST (RightShift, uint64_t , Vector);
@@ -1359,7 +1410,6 @@ class DxilConf_SM69_Vectorized {
13591410 HLK_TEST (Or, int16_t , ScalarOp2);
13601411 HLK_TEST (Xor, int16_t , Vector);
13611412 HLK_TEST (Xor, int16_t , ScalarOp2);
1362- HLK_TEST (Not, int16_t , Vector);
13631413 HLK_TEST (LeftShift, int16_t , Vector);
13641414 HLK_TEST (LeftShift, int16_t , ScalarOp2);
13651415 HLK_TEST (RightShift, int16_t , Vector);
@@ -1374,7 +1424,6 @@ class DxilConf_SM69_Vectorized {
13741424 HLK_TEST (Or, int32_t , ScalarOp2);
13751425 HLK_TEST (Xor, int32_t , Vector);
13761426 HLK_TEST (Xor, int32_t , ScalarOp2);
1377- HLK_TEST (Not, int32_t , Vector);
13781427 HLK_TEST (LeftShift, int32_t , Vector);
13791428 HLK_TEST (LeftShift, int32_t , ScalarOp2);
13801429 HLK_TEST (RightShift, int32_t , Vector);
@@ -1389,7 +1438,6 @@ class DxilConf_SM69_Vectorized {
13891438 HLK_TEST (Or, int64_t , ScalarOp2);
13901439 HLK_TEST (Xor, int64_t , Vector);
13911440 HLK_TEST (Xor, int64_t , ScalarOp2);
1392- HLK_TEST (Not, int64_t , Vector);
13931441 HLK_TEST (LeftShift, int64_t , Vector);
13941442 HLK_TEST (LeftShift, int64_t , ScalarOp2);
13951443 HLK_TEST (RightShift, int64_t , Vector);
@@ -1415,6 +1463,108 @@ class DxilConf_SM69_Vectorized {
14151463 HLK_TEST (Initialize, float , Vector);
14161464 HLK_TEST (Initialize, double , Vector);
14171465
1466+ // Explicit Cast
1467+
1468+ HLK_TEST (CastToInt16, HLSLBool_t, Vector);
1469+ HLK_TEST (CastToInt32, HLSLBool_t, Vector);
1470+ HLK_TEST (CastToInt64, HLSLBool_t, Vector);
1471+ HLK_TEST (CastToUint16, HLSLBool_t, Vector);
1472+ HLK_TEST (CastToUint32, HLSLBool_t, Vector);
1473+ HLK_TEST (CastToUint64, HLSLBool_t, Vector);
1474+ HLK_TEST (CastToFloat16, HLSLBool_t, Vector);
1475+ HLK_TEST (CastToFloat32, HLSLBool_t, Vector);
1476+ HLK_TEST (CastToFloat64, HLSLBool_t, Vector);
1477+
1478+ HLK_TEST (CastToBool, HLSLHalf_t, Vector);
1479+ HLK_TEST (CastToInt16, HLSLHalf_t, Vector);
1480+ HLK_TEST (CastToInt32, HLSLHalf_t, Vector);
1481+ HLK_TEST (CastToInt64, HLSLHalf_t, Vector);
1482+ HLK_TEST (CastToUint16_FromFP, HLSLHalf_t, Vector);
1483+ HLK_TEST (CastToUint32_FromFP, HLSLHalf_t, Vector);
1484+ HLK_TEST (CastToUint64_FromFP, HLSLHalf_t, Vector);
1485+ HLK_TEST (CastToFloat32, HLSLHalf_t, Vector);
1486+ HLK_TEST (CastToFloat64, HLSLHalf_t, Vector);
1487+
1488+ HLK_TEST (CastToBool, float , Vector);
1489+ HLK_TEST (CastToInt16, float , Vector);
1490+ HLK_TEST (CastToInt32, float , Vector);
1491+ HLK_TEST (CastToInt64, float , Vector);
1492+ HLK_TEST (CastToUint16_FromFP, float , Vector);
1493+ HLK_TEST (CastToUint32_FromFP, float , Vector);
1494+ HLK_TEST (CastToUint64_FromFP, float , Vector);
1495+ HLK_TEST (CastToFloat16, float , Vector);
1496+ HLK_TEST (CastToFloat64, float , Vector);
1497+
1498+ HLK_TEST (CastToBool, double , Vector);
1499+ HLK_TEST (CastToInt16, double , Vector);
1500+ HLK_TEST (CastToInt32, double , Vector);
1501+ HLK_TEST (CastToInt64, double , Vector);
1502+ HLK_TEST (CastToUint16_FromFP, double , Vector);
1503+ HLK_TEST (CastToUint32_FromFP, double , Vector);
1504+ HLK_TEST (CastToUint64_FromFP, double , Vector);
1505+ HLK_TEST (CastToFloat16, double , Vector);
1506+ HLK_TEST (CastToFloat32, double , Vector);
1507+
1508+ HLK_TEST (CastToBool, uint16_t , Vector);
1509+ HLK_TEST (CastToInt16, uint16_t , Vector);
1510+ HLK_TEST (CastToInt32, uint16_t , Vector);
1511+ HLK_TEST (CastToInt64, uint16_t , Vector);
1512+ HLK_TEST (CastToUint32, uint16_t , Vector);
1513+ HLK_TEST (CastToUint64, uint16_t , Vector);
1514+ HLK_TEST (CastToFloat16, uint16_t , Vector);
1515+ HLK_TEST (CastToFloat32, uint16_t , Vector);
1516+ HLK_TEST (CastToFloat64, uint16_t , Vector);
1517+
1518+ HLK_TEST (CastToBool, uint32_t , Vector);
1519+ HLK_TEST (CastToInt16, uint32_t , Vector);
1520+ HLK_TEST (CastToInt32, uint32_t , Vector);
1521+ HLK_TEST (CastToInt64, uint32_t , Vector);
1522+ HLK_TEST (CastToUint16, uint32_t , Vector);
1523+ HLK_TEST (CastToUint64, uint32_t , Vector);
1524+ HLK_TEST (CastToFloat16, uint32_t , Vector);
1525+ HLK_TEST (CastToFloat32, uint32_t , Vector);
1526+ HLK_TEST (CastToFloat64, uint32_t , Vector);
1527+
1528+ HLK_TEST (CastToBool, uint64_t , Vector);
1529+ HLK_TEST (CastToInt16, uint64_t , Vector);
1530+ HLK_TEST (CastToInt32, uint64_t , Vector);
1531+ HLK_TEST (CastToInt64, uint64_t , Vector);
1532+ HLK_TEST (CastToUint16, uint64_t , Vector);
1533+ HLK_TEST (CastToUint32, uint64_t , Vector);
1534+ HLK_TEST (CastToFloat16, uint64_t , Vector);
1535+ HLK_TEST (CastToFloat32, uint64_t , Vector);
1536+ HLK_TEST (CastToFloat64, uint64_t , Vector);
1537+
1538+ HLK_TEST (CastToBool, int16_t , Vector);
1539+ HLK_TEST (CastToInt32, int16_t , Vector);
1540+ HLK_TEST (CastToInt64, int16_t , Vector);
1541+ HLK_TEST (CastToUint16, int16_t , Vector);
1542+ HLK_TEST (CastToUint32, int16_t , Vector);
1543+ HLK_TEST (CastToUint64, int16_t , Vector);
1544+ HLK_TEST (CastToFloat16, int16_t , Vector);
1545+ HLK_TEST (CastToFloat32, int16_t , Vector);
1546+ HLK_TEST (CastToFloat64, int16_t , Vector);
1547+
1548+ HLK_TEST (CastToBool, int32_t , Vector);
1549+ HLK_TEST (CastToInt16, int32_t , Vector);
1550+ HLK_TEST (CastToInt64, int32_t , Vector);
1551+ HLK_TEST (CastToUint16, int32_t , Vector);
1552+ HLK_TEST (CastToUint32, int32_t , Vector);
1553+ HLK_TEST (CastToUint64, int32_t , Vector);
1554+ HLK_TEST (CastToFloat16, int32_t , Vector);
1555+ HLK_TEST (CastToFloat32, int32_t , Vector);
1556+ HLK_TEST (CastToFloat64, int32_t , Vector);
1557+
1558+ HLK_TEST (CastToBool, int64_t , Vector);
1559+ HLK_TEST (CastToInt16, int64_t , Vector);
1560+ HLK_TEST (CastToInt32, int64_t , Vector);
1561+ HLK_TEST (CastToUint16, int64_t , Vector);
1562+ HLK_TEST (CastToUint32, int64_t , Vector);
1563+ HLK_TEST (CastToUint64, int64_t , Vector);
1564+ HLK_TEST (CastToFloat16, int64_t , Vector);
1565+ HLK_TEST (CastToFloat32, int64_t , Vector);
1566+ HLK_TEST (CastToFloat64, int64_t , Vector);
1567+
14181568 // Trigonometric
14191569
14201570 HLK_TEST (Acos, HLSLHalf_t, Vector);
@@ -1626,4 +1776,4 @@ class DxilConf_SM69_Vectorized {
16261776private:
16271777 bool Initialized = false ;
16281778 bool VerboseLogging = false ;
1629- };
1779+ };
0 commit comments