From ea311465a1e818ed90c300b8bff2c7d51811b59c Mon Sep 17 00:00:00 2001 From: Yingchi Long Date: Mon, 17 Mar 2025 20:57:27 +0800 Subject: [PATCH] init --- Makefile | 25 +++++++++++++++++++++++++ unittest.cpp | 50 ++++++++++++++++++++++++++++++++++++++++++++++++++ x264_sum.ll | 32 ++++++++++++++++++++++++++++++++ 3 files changed, 107 insertions(+) create mode 100644 Makefile create mode 100644 unittest.cpp create mode 100644 x264_sum.ll diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..4fdc7c9 --- /dev/null +++ b/Makefile @@ -0,0 +1,25 @@ +all: unittest_scalar unittest_vector unittest_vector_unalign + ./unittest_scalar + ./unittest_vector + ./unittest_vector_unalign + +unittest_scalar: unittest.o x264_scalar.o + $(CXX) -lstdc++ $(LDFLAGS) $^ -o $@ + +unittest_vector: unittest.o x264_vector.o + $(CXX) -lstdc++ $(LDFLAGS) $^ -o $@ + +unittest_vector_unalign: unittest.o x264_vector_unalign.o + $(CXX) -lstdc++ $(LDFLAGS) $^ -o $@ + +unittest.o: unittest.cpp + $(CXX) $(CXXFLAGS) -c $^ -o $@ + +x264_scalar.o: x264_sum.ll + $(CXX) $(CXXFLAGS) -c $^ -o $@ + +x264_vector.o: x264_sum.ll + $(CXX) -msimd -mcpu=sw8a $(CXXFLAGS) -c $^ -o $@ + +x264_vector_unalign.o: x264_sum.ll + $(CXX) -msimd -mcpu=sw8a -mllvm -sw64-allows-misaligned-memory-accesses $(CXXFLAGS) -c $^ -o $@ \ No newline at end of file diff --git a/unittest.cpp b/unittest.cpp new file mode 100644 index 0000000..1c6e211 --- /dev/null +++ b/unittest.cpp @@ -0,0 +1,50 @@ +#include +#include +#include +#include // 使用 std::uint8_t 替代用户定义的 uint8_t +#include + +// 声明外部实现的 SAD 函数(不包含函数体) +extern "C" { + int x264_pixel_sad_16x16(uint8_t *pix1, int i_stride_pix1, + uint8_t *pix2, int i_stride_pix2); +} + +// 单元测试函数(保持不变) +void test_sad() { + // ...(与之前相同) +} + +// 改进的计时函数(多次执行取平均) +void measure_time(int iterations = 1000) { + // 准备测试数据(可填充任意数据) + uint8_t pix1[16 * 16] = {0}; + uint8_t pix2[16 * 16] = {0}; + + // 避免编译器优化掉循环 + int result_sum = 0; + auto start = std::chrono::high_resolution_clock::now(); + for (int i = 0; i < iterations; ++i) { + result_sum += x264_pixel_sad_16x16(pix1, 16, pix2, 16); + } + auto end = std::chrono::high_resolution_clock::now(); + + // 计算时间差(微秒) + auto total_duration = std::chrono::duration_cast(end - start).count(); + auto avg_duration = total_duration / static_cast(iterations); + std::cout << "Total Execution Time for " << iterations << " iterations: " + << total_duration << " μs" << std::endl; + std::cout << "Average Execution Time: " << avg_duration << " μs" << std::endl; + std::cout << "Total Result Sum: " << result_sum << std::endl; // 验证结果未被优化掉 +} + +int main() { + // 运行单元测试 + test_sad(); + std::cout << "All tests passed!" << std::endl; + + // 执行计时(默认 1000 次迭代) + measure_time(); + + return 0; +} \ No newline at end of file diff --git a/x264_sum.ll b/x264_sum.ll new file mode 100644 index 0000000..7bf89ed --- /dev/null +++ b/x264_sum.ll @@ -0,0 +1,32 @@ +define dso_local noundef i32 @x264_pixel_sad_16x16(ptr %0, i32 noundef %1, ptr %2, i32 noundef %3) { + %5 = sext i32 %1 to i64 + %6 = sext i32 %3 to i64 + br label %7 + +7: + %8 = phi i32 [ 0, %4 ], [ %23, %7 ] + %9 = phi i32 [ 0, %4 ], [ %20, %7 ] + %10 = phi ptr [ %0, %4 ], [ %21, %7 ] + %11 = phi ptr [ %2, %4 ], [ %22, %7 ] + %12 = load <16 x i8>, ptr %10, align 1 + %13 = zext <16 x i8> %12 to <16 x i16> + %14 = load <16 x i8>, ptr %11, align 1 + %15 = zext <16 x i8> %14 to <16 x i16> + %16 = sub nsw <16 x i16> %13, %15 + %17 = tail call <16 x i16> @llvm.abs.v16i16(<16 x i16> %16, i1 false) + %18 = zext <16 x i16> %17 to <16 x i32> + %19 = tail call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %18) + %20 = add i32 %19, %9 + %21 = getelementptr inbounds i8, ptr %10, i64 %5 + %22 = getelementptr inbounds i8, ptr %11, i64 %6 + %23 = add nuw nsw i32 %8, 1 + %24 = icmp eq i32 %23, 16 + br i1 %24, label %25, label %7 + +25: + ret i32 %20 +} + +declare <16 x i16> @llvm.abs.v16i16(<16 x i16>, i1 immarg) + +declare i32 @llvm.vector.reduce.add.v16i32(<16 x i32>) \ No newline at end of file