Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions sdks/python/apache_beam/typehints/opcodes.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
"""
# pytype: skip-file

import dataclasses
import inspect
import logging
import sys
Expand Down Expand Up @@ -447,6 +448,11 @@ def _getattr(o, name):
return Const(BoundMethod(func, o))
elif isinstance(o, row_type.RowTypeConstraint):
return o.get_type_for(name)
elif inspect.isclass(o) and dataclasses.is_dataclass(o):
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

The inspect.isclass(o) check is unnecessarily restrictive. dataclasses.is_dataclass(o) returns True for both dataclass classes and instances. Removing the class check allows the type inference engine to correctly resolve fields when it encounters a dataclass instance (for example, when an instance is wrapped in a Const object during the inference process).

Suggested change
elif inspect.isclass(o) and dataclasses.is_dataclass(o):
elif dataclasses.is_dataclass(o):

field = o.__dataclass_fields__.get(name)
if field is not None:
return field.type
return Any
else:
return Any

Expand Down
21 changes: 21 additions & 0 deletions sdks/python/apache_beam/typehints/row_type_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,27 @@ class DerivedDataClass(BaseDataClass):
getattr(DerivedDataClass, row_type._BEAM_SCHEMA_ID))
self.assertNotEqual(schema_for_derived.id, schema_for_base.id)

def test_dataclass_map_typehints(self):
@beam.coders.typecoders.registry.register_row
@dataclass(frozen=True)
class MyDataClass:
id: int
name: str

p = beam.Pipeline()
pa = (p | beam.Create([MyDataClass(1, "a"), MyDataClass(2, "b")]))
self.assertEqual(pa.element_type, MyDataClass)

pb = (
pa | beam.Map(
lambda x: beam.Row(id=x.id, name=x.name, name_hash=hash(x.name))))
self.assertTrue(
isinstance(pb.element_type, row_type.GeneratedClassRowTypeConstraint))
self.assertEqual(
pb.element_type,
row_type.GeneratedClassRowTypeConstraint(
fields=[('id', int), ('name', str), ('name_hash', int)]))


if __name__ == '__main__':
unittest.main()
12 changes: 12 additions & 0 deletions sdks/python/apache_beam/typehints/trivial_inference_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@

# pytype: skip-file

import dataclasses
import types
import unittest

Expand Down Expand Up @@ -487,6 +488,17 @@ def testPyCallable(self):
python_callable.PythonCallableWithSource("lambda x: (x, str(x))"),
[int])

def testDataClassFields(self):
@dataclasses.dataclass
class MyDataClass:
id: int
name: str

self.assertReturnType(
typehints.Tuple[int, str],
python_callable.PythonCallableWithSource("lambda x: (x.id, x.name)"),
[MyDataClass])


if __name__ == '__main__':
unittest.main()
Loading