Avoid excessive const inlining

Also removed unnecessary assembly bloat resulting from the generic index accesses through (unck get)
3 years ago · ee78f9b1ce
parent c3d977c703
commit ee78f9b1ce
2 changed files with 24 additions and 20 deletions
--- a/server/src/kvengine/encoding.rs
+++ b/server/src/kvengine/encoding.rs
@ -72,7 +72,7 @@ pub const ENCODING_LUT: BoolTable<fn(&[u8]) -> bool> =

 /// This table maps bytes to character classes that helps us reduce the size of the
 /// transition table and generate bitmasks
-const UTF8_MAP_BYTE_TO_CHAR_CLASS: [u8; 256] = [
+static UTF8_MAP_BYTE_TO_CHAR_CLASS: [u8; 256] = [
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
@ -86,7 +86,7 @@ const UTF8_MAP_BYTE_TO_CHAR_CLASS: [u8; 256] = [

 /// This table is a transition table that maps the combination of a state of the
 /// automaton and a char class to a state
-const UTF8_TRANSITION_MAP: [u8; 108] = [
+static UTF8_TRANSITION_MAP: [u8; 108] = [
    0, 12, 24, 36, 60, 96, 84, 12, 12, 12, 48, 72, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
    12, 0, 12, 12, 12, 12, 12, 0, 12, 0, 12, 12, 12, 24, 12, 12, 12, 12, 12, 24, 12, 24, 12, 12,
    12, 12, 12, 12, 12, 12, 12, 24, 12, 12, 12, 12, 12, 24, 12, 12, 12, 12, 12, 12, 12, 24, 12, 12,
@ -157,6 +157,18 @@ pub const fn is_okay_no_encoding(_inp: &[u8]) -> bool {
    true
 }

+macro_rules! utf_transition {
+    ($idx:expr) => {
+        ucidx!(UTF8_TRANSITION_MAP, $idx)
+    };
+}
+
+macro_rules! utfmap {
+    ($idx:expr) => {
+        ucidx!(UTF8_MAP_BYTE_TO_CHAR_CLASS, $idx)
+    };
+}
+
 /// This method uses a dual-stream deterministic finite automaton
 /// [(DFA)](https://en.wikipedia.org/wiki/Deterministic_finite_automaton) that is used to validate
 /// UTF-8 bytes that use the encoded finite state machines defined in this module.
@ -170,7 +182,7 @@ pub fn is_utf8(bytes: impl AsRef<[u8]>) -> bool {
    let bytes = bytes.as_ref();
    let mut half = bytes.len() / 2;
    unsafe {
-        while *bytes.get_unchecked(half) <= 0xBF && *bytes.get_unchecked(half) >= 0x80 && half > 0 {
+        while ucidx!(bytes, half) <= 0xBF && ucidx!(bytes, half) >= 0x80 && half > 0 {
            half -= 1;
        }
    }
@ -179,17 +191,8 @@ pub fn is_utf8(bytes: impl AsRef<[u8]>) -> bool {
    let mut j = half;
    while i < half {
        unsafe {
-            fsm_state_1 = *UTF8_TRANSITION_MAP.get_unchecked(
-                (fsm_state_1
-                    + (UTF8_MAP_BYTE_TO_CHAR_CLASS
-                        .get_unchecked((*bytes.get_unchecked(i)) as usize)))
-                    as usize,
-            );
-            fsm_state_2 = *UTF8_TRANSITION_MAP.get_unchecked(
-                (fsm_state_2
-                    + (UTF8_MAP_BYTE_TO_CHAR_CLASS.get_unchecked(*bytes.get_unchecked(j) as usize)))
-                    as usize,
-            );
+            fsm_state_1 = utf_transition!((fsm_state_1 + (utfmap!((ucidx!(bytes, i))))));
+            fsm_state_2 = utf_transition!((fsm_state_2 + (utfmap!(ucidx!(bytes, j)))));
        }
        i += 1;
        j += 1;
@ -197,12 +200,7 @@ pub fn is_utf8(bytes: impl AsRef<[u8]>) -> bool {
    let mut j = half * 2;
    while j < bytes.len() {
        unsafe {
-            fsm_state_2 = *UTF8_TRANSITION_MAP.get_unchecked(
-                (fsm_state_2
-                    + (UTF8_MAP_BYTE_TO_CHAR_CLASS
-                        .get_unchecked((*bytes.get_unchecked(j)) as usize)))
-                    as usize,
-            );
+            fsm_state_2 = utf_transition!((fsm_state_2 + (utfmap!(ucidx!(bytes, j)))));
        }
        j += 1;
    }
--- a/server/src/util.rs
+++ b/server/src/util.rs
@ -284,3 +284,9 @@ macro_rules! tmut_bool {
        (tmut_bool!($a), tmut_bool!($b))
    };
 }
+
+macro_rules! ucidx {
+    ($base:ident, $idx:expr) => {
+        *($base.as_ptr().add($idx as usize))
+    };
+}