From 2f4dd153d0b08217e69652f6b445b15c4fd77d6b Mon Sep 17 00:00:00 2001 From: Gayathri Srividya Rajavarapu Date: Thu, 28 May 2026 09:58:35 +0530 Subject: [PATCH] docs(python): add doctests to builder and config APIs, enable --doctest-modules (#82) - Add `addopts = "--doctest-modules"` to [tool.pytest.ini_options] in pyproject.toml so pytest auto-discovers doctests in the hudi/ package. - Add runnable Examples: blocks to every public HudiTableBuilder method (from_base_uri, with_hudi_option, with_hudi_options, with_storage_option, with_storage_options, with_option, with_options). The build() example uses # doctest: +SKIP to avoid requiring a real table path. - Add Examples: blocks to HudiTableConfig, HudiReadConfig, and HudiPlanConfig showing how to access the key string from each enum (e.g. HudiTableConfig.BASE_PATH.value == 'hoodie.base.path'). Closes #82 --- python/hudi/_config.py | 27 +++++++++++++-- python/hudi/table/builder.py | 64 ++++++++++++++++++++++++++++++++++++ python/pyproject.toml | 1 + 3 files changed, 90 insertions(+), 2 deletions(-) diff --git a/python/hudi/_config.py b/python/hudi/_config.py index 2a4eb3b3..2e59d914 100644 --- a/python/hudi/_config.py +++ b/python/hudi/_config.py @@ -38,6 +38,13 @@ ) HudiTableConfig.__doc__ = ( "Configurations for Hudi tables, most of them are persisted in `hoodie.properties`." + "\n\n" + "Examples:\n" + " >>> from hudi import HudiTableConfig\n" + " >>> HudiTableConfig.BASE_PATH.value\n" + " 'hoodie.base.path'\n" + " >>> HudiTableConfig.TABLE_NAME.value\n" + " 'hoodie.table.name'\n" ) HudiReadConfig = Enum( @@ -47,7 +54,16 @@ module=__name__, qualname="HudiReadConfig", ) -HudiReadConfig.__doc__ = "Configurations for reading Hudi tables." +HudiReadConfig.__doc__ = ( + "Configurations for reading Hudi tables." + "\n\n" + "Examples:\n" + " >>> from hudi import HudiReadConfig\n" + " >>> HudiReadConfig.QUERY_TYPE.value\n" + " 'hoodie.read.query.type'\n" + " >>> HudiReadConfig.AS_OF_TIMESTAMP.value\n" + " 'hoodie.read.as.of.timestamp'\n" +) HudiPlanConfig = Enum( "HudiPlanConfig", @@ -56,6 +72,13 @@ module=__name__, qualname="HudiPlanConfig", ) -HudiPlanConfig.__doc__ = "Configurations for query planning in Hudi." +HudiPlanConfig.__doc__ = ( + "Configurations for query planning in Hudi." + "\n\n" + "Examples:\n" + " >>> from hudi import HudiPlanConfig\n" + " >>> HudiPlanConfig.LISTING_PARALLELISM.value\n" + " 'hoodie.plan.listing.parallelism'\n" +) __all__ = ["HudiPlanConfig", "HudiReadConfig", "HudiTableConfig"] diff --git a/python/hudi/table/builder.py b/python/hudi/table/builder.py index ae31abf7..d732efe5 100644 --- a/python/hudi/table/builder.py +++ b/python/hudi/table/builder.py @@ -55,6 +55,16 @@ def from_base_uri(cls, base_uri: str) -> "HudiTableBuilder": Returns: HudiTableBuilder: An instance of the builder. + + Examples: + >>> from hudi import HudiTableBuilder + >>> builder = HudiTableBuilder.from_base_uri("s3://my-bucket/trips") + >>> builder.base_uri + 's3://my-bucket/trips' + >>> builder.hudi_options + {} + >>> builder.storage_options + {} """ builder = cls(base_uri) return builder @@ -75,6 +85,13 @@ def with_hudi_option(self, k: ConfigKey, v: str) -> "HudiTableBuilder": Returns: HudiTableBuilder: The builder instance. + + Examples: + >>> from hudi import HudiTableBuilder + >>> builder = HudiTableBuilder.from_base_uri("s3://my-bucket/trips") + >>> builder = builder.with_hudi_option("read.timeline.num-instants", "5") + >>> builder.hudi_options + {'read.timeline.num-instants': '5'} """ self._add_options({_coerce_key(k): v}, "hudi") return self @@ -88,6 +105,16 @@ def with_hudi_options(self, hudi_options: Dict[str, str]) -> "HudiTableBuilder": Returns: HudiTableBuilder: The builder instance. + + Examples: + >>> from hudi import HudiTableBuilder + >>> builder = HudiTableBuilder.from_base_uri("s3://my-bucket/trips") + >>> builder = builder.with_hudi_options({ + ... "read.timeline.num-instants": "5", + ... "read.use.new.log.record.reader": "true", + ... }) + >>> sorted(builder.hudi_options.items()) + [('read.timeline.num-instants', '5'), ('read.use.new.log.record.reader', 'true')] """ self._add_options(hudi_options, "hudi") return self @@ -102,6 +129,13 @@ def with_storage_option(self, k: str, v: str) -> "HudiTableBuilder": Returns: HudiTableBuilder: The builder instance. + + Examples: + >>> from hudi import HudiTableBuilder + >>> builder = HudiTableBuilder.from_base_uri("s3://my-bucket/trips") + >>> builder = builder.with_storage_option("aws.region", "us-east-1") + >>> builder.storage_options + {'aws.region': 'us-east-1'} """ self._add_options({k: v}, "storage") return self @@ -117,6 +151,13 @@ def with_storage_options( Returns: HudiTableBuilder: The builder instance. + + Examples: + >>> from hudi import HudiTableBuilder + >>> builder = HudiTableBuilder.from_base_uri("s3://my-bucket/trips") + >>> builder = builder.with_storage_options({"aws.region": "us-east-1"}) + >>> builder.storage_options + {'aws.region': 'us-east-1'} """ self._add_options(storage_options, "storage") return self @@ -131,6 +172,13 @@ def with_option(self, k: ConfigKey, v: str) -> "HudiTableBuilder": Returns: HudiTableBuilder: The builder instance. + + Examples: + >>> from hudi import HudiTableBuilder + >>> builder = HudiTableBuilder.from_base_uri("s3://my-bucket/trips") + >>> builder = builder.with_option("aws.region", "eu-west-1") + >>> builder.options + {'aws.region': 'eu-west-1'} """ self._add_options({_coerce_key(k): v}) return self @@ -144,6 +192,13 @@ def with_options(self, options: Dict[str, str]) -> "HudiTableBuilder": Returns: HudiTableBuilder: The builder instance. + + Examples: + >>> from hudi import HudiTableBuilder + >>> builder = HudiTableBuilder.from_base_uri("s3://my-bucket/trips") + >>> builder = builder.with_options({"aws.region": "ap-southeast-1"}) + >>> builder.options + {'aws.region': 'ap-southeast-1'} """ self._add_options(options) return self @@ -154,6 +209,15 @@ def build(self) -> "HudiTable": Returns: HudiTable: The constructed HudiTable object. + + Examples: + >>> from hudi import HudiTableBuilder + >>> table = ( + ... HudiTableBuilder.from_base_uri("s3://my-bucket/trips") + ... .with_hudi_option("read.timeline.num-instants", "5") + ... .with_storage_option("aws.region", "us-east-1") + ... .build() + ... ) # doctest: +SKIP """ return build_hudi_table( self.base_uri, self.hudi_options, self.storage_options, self.options diff --git a/python/pyproject.toml b/python/pyproject.toml index 7add64d9..14a5c94e 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -80,6 +80,7 @@ exclude = "^tests" strict = true [tool.pytest.ini_options] +addopts = "--doctest-modules" testpaths = [ "tests", "hudi",