diff --git a/src/speccpu/x264.py b/src/speccpu/x264.py index 1724367..1360e5c 100644 --- a/src/speccpu/x264.py +++ b/src/speccpu/x264.py @@ -87,49 +87,52 @@ class MCBuilder: self.mc_src = self.mc_src.replace("src[x] * weight->i_scale", "src[x]") return self + def with_disabled_vectorize_get_ref(self): + """ + Disable auto-vectorization for the get_ref function's loop. + Adds a pragma directive before the main for loop in get_ref. + + Returns: + self: Returns the builder instance for method chaining + + Example: + >>> builder = MCBuilder() + >>> "#pragma clang loop vectorize(disable)" not in builder.mc_src + True + >>> modified = builder.with_disabled_vectorize_get_ref() + >>> "#pragma clang loop vectorize(disable)" in modified.mc_src + True + >>> target_loop = "for( int x = 0; x < i_width; x++ )" + >>> lines = modified.mc_src.splitlines() + >>> for i, line in enumerate(lines): + ... if target_loop in line and i > 0: + ... if "#pragma clang loop vectorize(disable)" in lines[i-1]: + ... print("Pragma correctly placed") + ... break + Pragma correctly placed + """ + pragma_str = "#pragma clang loop vectorize(disable)" + target_loop = "for( int x = 0; x < i_width; x++ )" + + # Split the source into lines to process + lines = self.mc_src.splitlines() + modified_lines = [] + + # Process each line + for line in lines: + # Add pragma before the target loop + if target_loop in line: + modified_lines.append(pragma_str) + modified_lines.append(line) + + # Rebuild the source string + self.mc_src = "\n".join(modified_lines) + return self + def build(self): return self.mc_src -def get_ref_add_pragma(mc_lines): - # Initialize variables - modified_lines = [] - pragma_added = False - - pragma_str = "#pragma clang loop vectorize(disable)" - - # Iterate through each line to find the target for loop - for i, line in enumerate(mc_lines): - # If there are already #pragma string, skip it. - if i > 1 and pragma_str in mc_lines[i - 1]: - continue - if "for( int x = 0; x < i_width; x++ )" in line: - modified_lines.append("#pragma clang loop vectorize(disable)\n") - pragma_added = True - - modified_lines.append(line) - - return modified_lines, pragma_added - - -def disable_mc_loop_vectorize(mc_path: Path) -> bool: - """ - Disable x264 mc.c loop vectorizing, mainly for "get_ref" performance. - If needed, update the file located at mc_path, returns true if that file is updated. - """ - # Read the content of mc.c - with open(mc_path, "r") as mc_file: - mc_lines = mc_file.readlines() - - mc_pragma_lines, mc_modified = get_ref_add_pragma(mc_lines) - - if mc_modified: - with open(mc_path, "w") as mc: - mc.writelines(mc_pragma_lines) - - return mc_modified - - def update_exe(build_exe, exe_dir): exe_file = exe_dir / "x264_s_base.mytest-m64"