init
This commit is contained in:
25
Makefile
Normal file
25
Makefile
Normal file
@@ -0,0 +1,25 @@
|
||||
all: unittest_scalar unittest_vector unittest_vector_unalign
|
||||
./unittest_scalar
|
||||
./unittest_vector
|
||||
./unittest_vector_unalign
|
||||
|
||||
unittest_scalar: unittest.o x264_scalar.o
|
||||
$(CXX) -lstdc++ $(LDFLAGS) $^ -o $@
|
||||
|
||||
unittest_vector: unittest.o x264_vector.o
|
||||
$(CXX) -lstdc++ $(LDFLAGS) $^ -o $@
|
||||
|
||||
unittest_vector_unalign: unittest.o x264_vector_unalign.o
|
||||
$(CXX) -lstdc++ $(LDFLAGS) $^ -o $@
|
||||
|
||||
unittest.o: unittest.cpp
|
||||
$(CXX) $(CXXFLAGS) -c $^ -o $@
|
||||
|
||||
x264_scalar.o: x264_sum.ll
|
||||
$(CXX) $(CXXFLAGS) -c $^ -o $@
|
||||
|
||||
x264_vector.o: x264_sum.ll
|
||||
$(CXX) -msimd -mcpu=sw8a $(CXXFLAGS) -c $^ -o $@
|
||||
|
||||
x264_vector_unalign.o: x264_sum.ll
|
||||
$(CXX) -msimd -mcpu=sw8a -mllvm -sw64-allows-misaligned-memory-accesses $(CXXFLAGS) -c $^ -o $@
|
||||
50
unittest.cpp
Normal file
50
unittest.cpp
Normal file
@@ -0,0 +1,50 @@
|
||||
#include <iostream>
|
||||
#include <cassert>
|
||||
#include <chrono>
|
||||
#include <cstdint> // 使用 std::uint8_t 替代用户定义的 uint8_t
|
||||
#include <limits>
|
||||
|
||||
// 声明外部实现的 SAD 函数(不包含函数体)
|
||||
extern "C" {
|
||||
int x264_pixel_sad_16x16(uint8_t *pix1, int i_stride_pix1,
|
||||
uint8_t *pix2, int i_stride_pix2);
|
||||
}
|
||||
|
||||
// 单元测试函数(保持不变)
|
||||
void test_sad() {
|
||||
// ...(与之前相同)
|
||||
}
|
||||
|
||||
// 改进的计时函数(多次执行取平均)
|
||||
void measure_time(int iterations = 1000) {
|
||||
// 准备测试数据(可填充任意数据)
|
||||
uint8_t pix1[16 * 16] = {0};
|
||||
uint8_t pix2[16 * 16] = {0};
|
||||
|
||||
// 避免编译器优化掉循环
|
||||
int result_sum = 0;
|
||||
auto start = std::chrono::high_resolution_clock::now();
|
||||
for (int i = 0; i < iterations; ++i) {
|
||||
result_sum += x264_pixel_sad_16x16(pix1, 16, pix2, 16);
|
||||
}
|
||||
auto end = std::chrono::high_resolution_clock::now();
|
||||
|
||||
// 计算时间差(微秒)
|
||||
auto total_duration = std::chrono::duration_cast<std::chrono::microseconds>(end - start).count();
|
||||
auto avg_duration = total_duration / static_cast<double>(iterations);
|
||||
std::cout << "Total Execution Time for " << iterations << " iterations: "
|
||||
<< total_duration << " μs" << std::endl;
|
||||
std::cout << "Average Execution Time: " << avg_duration << " μs" << std::endl;
|
||||
std::cout << "Total Result Sum: " << result_sum << std::endl; // 验证结果未被优化掉
|
||||
}
|
||||
|
||||
int main() {
|
||||
// 运行单元测试
|
||||
test_sad();
|
||||
std::cout << "All tests passed!" << std::endl;
|
||||
|
||||
// 执行计时(默认 1000 次迭代)
|
||||
measure_time();
|
||||
|
||||
return 0;
|
||||
}
|
||||
32
x264_sum.ll
Normal file
32
x264_sum.ll
Normal file
@@ -0,0 +1,32 @@
|
||||
define dso_local noundef i32 @x264_pixel_sad_16x16(ptr %0, i32 noundef %1, ptr %2, i32 noundef %3) {
|
||||
%5 = sext i32 %1 to i64
|
||||
%6 = sext i32 %3 to i64
|
||||
br label %7
|
||||
|
||||
7:
|
||||
%8 = phi i32 [ 0, %4 ], [ %23, %7 ]
|
||||
%9 = phi i32 [ 0, %4 ], [ %20, %7 ]
|
||||
%10 = phi ptr [ %0, %4 ], [ %21, %7 ]
|
||||
%11 = phi ptr [ %2, %4 ], [ %22, %7 ]
|
||||
%12 = load <16 x i8>, ptr %10, align 1
|
||||
%13 = zext <16 x i8> %12 to <16 x i16>
|
||||
%14 = load <16 x i8>, ptr %11, align 1
|
||||
%15 = zext <16 x i8> %14 to <16 x i16>
|
||||
%16 = sub nsw <16 x i16> %13, %15
|
||||
%17 = tail call <16 x i16> @llvm.abs.v16i16(<16 x i16> %16, i1 false)
|
||||
%18 = zext <16 x i16> %17 to <16 x i32>
|
||||
%19 = tail call i32 @llvm.vector.reduce.add.v16i32(<16 x i32> %18)
|
||||
%20 = add i32 %19, %9
|
||||
%21 = getelementptr inbounds i8, ptr %10, i64 %5
|
||||
%22 = getelementptr inbounds i8, ptr %11, i64 %6
|
||||
%23 = add nuw nsw i32 %8, 1
|
||||
%24 = icmp eq i32 %23, 16
|
||||
br i1 %24, label %25, label %7
|
||||
|
||||
25:
|
||||
ret i32 %20
|
||||
}
|
||||
|
||||
declare <16 x i16> @llvm.abs.v16i16(<16 x i16>, i1 immarg)
|
||||
|
||||
declare i32 @llvm.vector.reduce.add.v16i32(<16 x i32>)
|
||||
Reference in New Issue
Block a user