Skip to content

Commit cd90b4b

Browse files
Sunnie912meta-codesync[bot]
authored andcommitted
Optimize Thrift-Python Map Conversion: Lazy Access values for String/Primitive Keys
Summary: **Overview** The diff implements a lazy conversion optimization for Thrift maps, deferring the type conversion of map values until they are actually accessed. This approach covers over **82%** of use cases D85260454 and achieves approximately **30x** performance improvements for random access operations when the map key is [NC|str] This diff Implements lazy value conversion for map values when key conversion is not required (i.e., keys are of types int, float, str and etc.). In these cases, value conversion is deferred until the value is accessed for the first time. **Two Internal Storage Fields** * `_fbthrift_internal_elements` * Type: `types.MappingProxyType` (read-only dict wrapper) * Purpose**: Holds the original internal representation before Python type conversion * When set**: Only when lazy conversion is enabled * When cleared**: Set to `None` after full conversion is performed * `_fbthrift_elements` * Type**: `dict` | `types.MappingProxyType` (read-only dict wrapper) * Contains**: Python-converted data * Purpose**: Cache for converted key-value pairs * Fully populated immediately with all converted data for **"non-lazy"** mode * Starts as empty `{}`, gradually populated as keys are accessed for **"lazy"** mode. **Performance Wins:** **Random Access with `NC` | `String` Keys**: **27-38x faster** (96-97% improvement) {F1983317953} {F1983317951} Reviewed By: ahilger, yoney Differential Revision: D86317214 fbshipit-source-id: f63252b8919a68858238a2b808469d8e2569a4ef
1 parent d8a4f2a commit cd90b4b

File tree

6 files changed

+294
-45
lines changed

6 files changed

+294
-45
lines changed

third-party/thrift/src/thrift/lib/python/test/containers.thrift

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,9 @@ struct Maps {
9393
# @lint-ignore THRIFTCHECKS
9494
35: map<float, float> floatMap;
9595
8: map<string, string> stringMap;
96+
10: map<i32, string> i32_string_map;
97+
11: map<float, string> float_string_map;
98+
12: map<double, string> double_string_map;
9699
97: map<binary, binary> binaryMap;
97100
93: map<IOBuf, IOBuf> iobufMap;
98101
# @lint-ignore THRIFTCHECKS

third-party/thrift/src/thrift/lib/python/test/cpp_conversion/cpp_converter.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -105,15 +105,15 @@ def test_constructor_error(self) -> None:
105105
with self.assertRaises(UnicodeDecodeError):
106106
rt.strList
107107

108-
# Currently the conversion from internal data to python value happens
109-
# for all key-value pairs in a map at access time.
110-
# If we make this lazy, this step will pass, and the UnicodeDecodeError
111-
# will be deferred until the corrupted fields are accessed, while the
112-
# valid unicode fields will become accessible.
108+
# Maps with string keys now pass internal dict directly
109+
# Accessing the map field succeeds (no eager conversion)
110+
map_s = s.strToStrMap
111+
map_rt = rt.strToStrMap
112+
# Iteration triggers conversion and raises on corrupted fields
113113
with self.assertRaises(UnicodeDecodeError):
114-
s.strToStrMap
114+
list(map_s.items())
115115
with self.assertRaises(UnicodeDecodeError):
116-
rt.strToStrMap
116+
list(map_rt.items())
117117

118118
def test_type_error(self) -> None:
119119
with self.assertRaises(TypeError):

third-party/thrift/src/thrift/lib/python/test/maps.py

Lines changed: 75 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222

2323
from collections.abc import ItemsView, KeysView, ValuesView
2424

25-
from enum import Enum
25+
from enum import Enum, IntEnum
2626
from typing import Dict, Type, TypeVar
2727

2828
import python_test.containers.thrift_mutable_types as mutable_containers_types
@@ -38,6 +38,7 @@
3838
from python_test.containers.thrift_types import (
3939
Color as ColorType,
4040
Foo as FooType,
41+
Maps as ImmutableMaps,
4142
Maps as MapsType,
4243
)
4344
from python_test.maps.thrift_types import (
@@ -382,6 +383,79 @@ def test_contains_enum(self) -> None:
382383
# pyre-ignore[6]: Intentional for test
383384
self.assertEqual(cmap.colorMap.get(2), None)
384385

386+
def test_lazy_map_str_str(self) -> None:
387+
# map<str, str>
388+
s = ImmutableMaps(stringMap={"1": "1", "2": "2", "3": "3"})
389+
390+
class StringEnum(str, Enum):
391+
one = "1"
392+
two = "2"
393+
394+
self.assertEqual(s.stringMap[StringEnum.one], "1")
395+
self.assertEqual(s.stringMap["1"], "1")
396+
self.assertEqual(s.stringMap[StringEnum.two], "2")
397+
self.assertEqual(s.stringMap["2"], "2")
398+
for key, value in s.stringMap.items():
399+
self.assertIs(type(key), str)
400+
self.assertIs(type(value), str)
401+
402+
def test_lazy_map_i32_str(self) -> None:
403+
# map<i32, str>
404+
s = ImmutableMaps(i32_string_map={1: "1", 2: "2", 3: "3"})
405+
406+
class MyIntEnum(IntEnum):
407+
one = 1
408+
two = 2
409+
410+
self.assertEqual(s.i32_string_map[MyIntEnum.one], "1")
411+
self.assertEqual(s.i32_string_map[1], "1")
412+
self.assertEqual(s.i32_string_map[MyIntEnum.two], "2")
413+
self.assertEqual(s.i32_string_map[2], "2")
414+
415+
for key, value in s.i32_string_map.items():
416+
self.assertIs(type(key), int)
417+
self.assertIs(type(value), str)
418+
419+
def test_lazy_map_float_str(self) -> None:
420+
# map<float, str>
421+
s = ImmutableMaps(float_string_map={1.0: "1", 2.0: "2", 3.0: "3"})
422+
423+
class MyFloatType(float):
424+
def __init__(self, value):
425+
self.value = float(value)
426+
427+
def __float__(self):
428+
return float(self.value)
429+
430+
self.assertEqual(s.float_string_map[MyFloatType(1.0)], "1")
431+
self.assertEqual(s.float_string_map[1.0], "1")
432+
self.assertEqual(s.float_string_map[MyFloatType(2.0)], "2")
433+
self.assertEqual(s.float_string_map[2.0], "2")
434+
435+
for key, value in s.float_string_map.items():
436+
self.assertIs(type(key), float)
437+
self.assertIs(type(value), str)
438+
439+
def test_lazy_map_double_str(self) -> None:
440+
# map<double, str>
441+
s = ImmutableMaps(double_string_map={1.0: "1", 2.0: "2", 3.0: "3"})
442+
443+
class MyFloat(float):
444+
def __init__(self, value):
445+
self.value = float(value)
446+
447+
def __float__(self):
448+
return float(self.value)
449+
450+
self.assertEqual(s.double_string_map[MyFloat(1.0)], "1")
451+
self.assertEqual(s.double_string_map[1.0], "1")
452+
self.assertEqual(s.double_string_map[MyFloat(2.0)], "2")
453+
self.assertEqual(s.double_string_map[2.0], "2")
454+
455+
for key, value in s.double_string_map.items():
456+
self.assertIs(type(key), float)
457+
self.assertIs(type(value), str)
458+
385459

386460
# TODO: Collapse these two test cases into parameterized test above
387461
class MapMutablePythonTests(unittest.TestCase):

third-party/thrift/src/thrift/lib/python/test/serializer.py

Lines changed: 86 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -536,46 +536,108 @@ def test_non_utf8_container_set(self) -> None:
536536
list(set_field)
537537
# can't compare round-trip serialized set because ordering is random
538538

539-
def test_non_utf8_container_map_bad_key(self) -> None:
539+
def test_non_utf8_container_map_unicode_error_bad_key(self) -> None:
540540
json_bytes = b'{"stringList":[],"stringSet":[],"stringMap":{"key":"val","\xc3\x28":"good"}}'
541541
s = self.serializer.deserialize(
542542
self.UnicodeContainers, json_bytes, Protocol.JSON
543543
)
544-
if self.is_mutable_run:
545-
# mutable thrift-python lazily converts elements when accessed
546-
self.assertEqual(s.stringMap["key"], "val")
547-
with self.assertRaises(UnicodeDecodeError):
548-
# note items() returns a view, need to do something with it
549-
list(s.stringMap.items())
550-
else:
551-
# immutable thrift-python eagerly converts all elements on field access
552-
# when we implement lazy behavior, it will match mutable python behavior
553-
with self.assertRaises(UnicodeDecodeError):
554-
s.stringMap
544+
# Both mutable and immutable thrift-python now pass internal
545+
# dict directly for maps with string keys
546+
# Accessing the map field succeeds (no eager conversion)
547+
map_field = s.stringMap
548+
# Accessing valid keys works
549+
self.assertEqual(map_field["key"], "val")
550+
# Iteration triggers conversion and raises on bad key
551+
with self.assertRaises(UnicodeDecodeError):
552+
list(map_field.items())
553+
# validate it raises consistently
554+
with self.assertRaises(UnicodeDecodeError):
555+
list(map_field.items())
555556

556557
round_trip_bytes = self.serializer.serialize(s, Protocol.JSON)
557558
self.assertEqual(json_bytes, round_trip_bytes)
558559

559-
def test_non_utf8_container_map_bad_val(self) -> None:
560+
def test_non_utf8_container_map_unicode_error_bad_val(self) -> None:
560561
json_bytes = b'{"stringList":[],"stringSet":[],"stringMap":{"key":"val","good":"\xc3\x28"}}'
561562
s = self.serializer.deserialize(
562563
self.UnicodeContainers, json_bytes, Protocol.JSON
563564
)
564-
if self.is_mutable_run:
565-
# mutable thrift-python lazily converts elements when accessed
566-
self.assertEqual(s.stringMap["key"], "val")
567-
with self.assertRaises(UnicodeDecodeError):
568-
# note items() returns a view, need to do something with it
569-
s.stringMap["good"]
570-
else:
571-
# immutable thrift-python eagerly converts all elements on field access
572-
# when we implement lazy behavior, it will match mutable python behavior
573-
with self.assertRaises(UnicodeDecodeError):
574-
s.stringMap
565+
# Both mutable and immutable thrift-python now pass internal
566+
# dict directly for maps with string keys
567+
# Accessing the map field succeeds (no eager conversion)
568+
map_field = s.stringMap
569+
# Accessing valid keys works
570+
self.assertEqual(map_field["key"], "val")
571+
# Accessing the bad value triggers conversion and raises error
572+
with self.assertRaises(UnicodeDecodeError):
573+
map_field["good"]
574+
# validate it raises consistently
575+
with self.assertRaises(UnicodeDecodeError):
576+
map_field["good"]
575577

576578
round_trip_bytes = self.serializer.serialize(s, Protocol.JSON)
577579
self.assertEqual(json_bytes, round_trip_bytes)
578580

581+
def test_lazy_map_keyerror_non_existent_key(self) -> None:
582+
"""Test that accessing non-existent keys raises KeyError in lazy mode."""
583+
json_bytes = b'{"stringList":[],"stringSet":[],"stringMap":{"key":"val","good":"\xc3\x28"}}'
584+
s = self.serializer.deserialize(
585+
self.UnicodeContainers, json_bytes, Protocol.JSON
586+
)
587+
map_field = s.stringMap
588+
# Accessing non-existent key should raise KeyError even in lazy mode
589+
with self.assertRaises(KeyError):
590+
map_field["nonexistent"]
591+
# Map should still be in lazy mode - accessing bad value should
592+
# raise UnicodeDecodeError
593+
with self.assertRaises(UnicodeDecodeError):
594+
map_field["good"]
595+
596+
def test_lazy_map_membership_no_conversion(self) -> None:
597+
"""Test that membership operator works without triggering full conversion."""
598+
json_bytes = b'{"stringList":[],"stringSet":[],"stringMap":{"key":"val","good":"\xc3\x28"}}'
599+
s = self.serializer.deserialize(
600+
self.UnicodeContainers, json_bytes, Protocol.JSON
601+
)
602+
map_field = s.stringMap
603+
# Membership testing should work without triggering conversion
604+
self.assertTrue("key" in map_field)
605+
self.assertTrue("good" in map_field)
606+
self.assertFalse("nonexistent" in map_field)
607+
# Map should still be in lazy mode - iteration should still raise
608+
with self.assertRaises(UnicodeDecodeError):
609+
list(map_field.items())
610+
611+
def test_lazy_map_len_no_conversion(self) -> None:
612+
"""Test that len() works without triggering full conversion."""
613+
json_bytes = b'{"stringList":[],"stringSet":[],"stringMap":{"key":"val","good":"\xc3\x28"}}'
614+
s = self.serializer.deserialize(
615+
self.UnicodeContainers, json_bytes, Protocol.JSON
616+
)
617+
map_field = s.stringMap
618+
# len() should work without triggering conversion
619+
self.assertEqual(len(map_field), 2)
620+
# Map should still be in lazy mode - iteration should still raise
621+
with self.assertRaises(UnicodeDecodeError):
622+
list(map_field.items())
623+
624+
def test_lazy_map_get_with_default(self) -> None:
625+
"""Test that get() method works correctly in lazy mode."""
626+
json_bytes = b'{"stringList":[],"stringSet":[],"stringMap":{"key":"val","good":"\xc3\x28"}}'
627+
s = self.serializer.deserialize(
628+
self.UnicodeContainers, json_bytes, Protocol.JSON
629+
)
630+
map_field = s.stringMap
631+
# get() with existing key should work
632+
self.assertEqual(map_field.get("key"), "val")
633+
# get() with non-existent key should return None
634+
self.assertIsNone(map_field.get("nonexistent"))
635+
# get() with default should return default for non-existent key
636+
self.assertEqual(map_field.get("nonexistent", "default"), "default")
637+
# get() with bad value should raise UnicodeDecodeError
638+
with self.assertRaises(UnicodeDecodeError):
639+
map_field.get("good")
640+
579641
# Test binary field is b64encoded in SimpleJSON protocol.
580642
def test_binary_serialization_simplejson(self) -> None:
581643
json_bytes = b'{"val_bool":false,"val_i32":0,"val_i64":0,"val_string":"abcdef","val_binary":"YWJjZGU","val_iobuf":"YWJjZGVm","val_enum":0,"val_union":{},"val_list":[],"val_map":{},"val_struct_with_containers":{"color_list":[],"color_set":[],"color_map":{}}}'

third-party/thrift/src/thrift/lib/python/types.pxd

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -321,8 +321,12 @@ cdef class ImmutableInternalMap(dict):
321321

322322
cdef class Map(Container):
323323
# may be dict, ImmutableInternalMap, or MappingProxyType
324-
cdef object _fbthrift_elements
324+
cdef object _fbthrift_elements
325325
cdef object _fbthrift_key_info
326+
cdef object _fbthrift_internal_elements
327+
cdef bint _fbthrift_needs_lazy_conversion
328+
cdef object _fbthrift_get_elements(self)
329+
cdef _fbthrift_lazy_getitem(self, object)
326330

327331
cdef void set_struct_field(tuple struct_tuple, int16_t index, value) except *
328332

0 commit comments

Comments
 (0)