From 4ad70a2f7654b556e7fcccf641c09e28e4dbf131 Mon Sep 17 00:00:00 2001 From: Joshua Catt Date: Fri, 3 Apr 2026 18:16:11 -0400 Subject: [PATCH 1/4] add llm_gateway_generate_text --- CHANGELOG.md | 40 +++++++++++++++++++ README.md | 30 ++++++++++++-- .../proxy/client/LocalProxyClientProvider.py | 3 ++ src/datacustomcode/proxy/client/base.py | 3 ++ 4 files changed, 73 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1928786..f9839e9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,45 @@ # Changelog +## 1.0.1 + +### Added + +- **`llm_gateway_generate_text()` UDF wrapper for AI-powered DataFrame transformations.** + + New method on proxy providers to generate AI completions in DataFrame operations via the `llm_gateway_generate` UDF. + + ```python + from datacustomcode import Client + from pyspark.sql.functions import col + + client = Client() + + # Generate summaries in a DataFrame column + df = df.withColumn( + "summary", + client._proxy.llm_gateway_generate_text( + "Summarize {company}: revenue={revenue}, CEO={ceo}", + { + "company": col("company"), + "revenue": col("revenue"), + "ceo": col("ceo") + }, + llmModelId="sfdc_ai__DefaultGPT4Omni", + maxTokens=200 + ) + ) + ``` + + **Local Development:** Returns placeholder string (doesn't execute) + **BYOC Production:** Calls real `llm_gateway_generate` UDF + + **Parameters:** + - `template` (str): Prompt template with {placeholder} syntax + - `values` (dict or Column): Dict mapping placeholders to Columns, or pre-built named_struct + - `llmModelId` (str): Model identifier (required, e.g., "sfdc_ai__DefaultGPT4Omni") + - `maxTokens` (int): Maximum response length (required, e.g., 200) + + ## 1.0.0 ### Breaking Changes diff --git a/README.md b/README.md index d3d601a..4681494 100644 --- a/README.md +++ b/README.md @@ -155,7 +155,7 @@ You should only need the following methods: * `write_to_dmo(name, spark_dataframe, write_mode)` – Write to a Data Lake Object by name with a Spark dataframe For example: -``` +```python from datacustomcode import Client client = Client() @@ -166,10 +166,34 @@ sdf = client.read_dlo('my_DLO') client.write_to_dlo('output_DLO') ``` +### LLM Gateway -> [!WARNING] -> Currently we only support reading from DMOs and writing to DMOs or reading from DLOs and writing to DLOs, but they cannot mix. +Generate AI completions in DataFrame transformations using the LLM gateway UDF. +```python +from datacustomcode import Client +from pyspark.sql.functions import col + +client = Client() + +# Use template with placeholders +df = df.withColumn( + "summary", + client._proxy.llm_gateway_generate_text( + "Summarize {company}: revenue={revenue}, CEO={ceo}", + { + "company": col("company"), + "revenue": col("revenue"), + "ceo": col("ceo") + }, + llmModelId="sfdc_ai__DefaultGPT4Omni", + maxTokens=200 + ) +) +``` + +> [!WARNING] +> This method returns a placeholder string in local development and won't execute. It only works when deployed, where it calls the real LLM Gateway service via the `llm_gateway_generate` UDF. ## CLI diff --git a/src/datacustomcode/proxy/client/LocalProxyClientProvider.py b/src/datacustomcode/proxy/client/LocalProxyClientProvider.py index 515db00..264a5e3 100644 --- a/src/datacustomcode/proxy/client/LocalProxyClientProvider.py +++ b/src/datacustomcode/proxy/client/LocalProxyClientProvider.py @@ -27,3 +27,6 @@ def __init__(self, **kwargs: object) -> None: def call_llm_gateway(self, llmModelId: str, prompt: str, maxTokens: int) -> str: return f"Hello, thanks for using {llmModelId}. So many tokens: {maxTokens}" + + def llm_gateway_generate_text(self, template, values, llmModelId: str, maxTokens: int): + return f"Using Generate Text with {llmModelId} and maxTokens: {maxTokens}" diff --git a/src/datacustomcode/proxy/client/base.py b/src/datacustomcode/proxy/client/base.py index 5c840a0..9e86546 100644 --- a/src/datacustomcode/proxy/client/base.py +++ b/src/datacustomcode/proxy/client/base.py @@ -25,3 +25,6 @@ def __init__(self): @abstractmethod def call_llm_gateway(self, llmModelId: str, prompt: str, maxTokens: int) -> str: ... + + @abstractmethod + def llm_gateway_generate_text(self, template, values, llmModelId: str, maxTokens: int): ... From e05189280e29f892c1c055907a4565f30b5098fd Mon Sep 17 00:00:00 2001 From: Joshua Catt Date: Mon, 6 Apr 2026 11:06:08 -0400 Subject: [PATCH 2/4] text fixes --- CHANGELOG.md | 6 +++--- README.md | 5 ++++- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f9839e9..5f9bcf7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,7 +6,7 @@ - **`llm_gateway_generate_text()` UDF wrapper for AI-powered DataFrame transformations.** - New method on proxy providers to generate AI completions in DataFrame operations via the `llm_gateway_generate` UDF. + New method on proxy providers to generate AI completions in DataFrame operations via a built-in UDF. ```python from datacustomcode import Client @@ -31,13 +31,13 @@ ``` **Local Development:** Returns placeholder string (doesn't execute) - **BYOC Production:** Calls real `llm_gateway_generate` UDF + **Production:** Calls a built-in UDF **Parameters:** - `template` (str): Prompt template with {placeholder} syntax - `values` (dict or Column): Dict mapping placeholders to Columns, or pre-built named_struct - `llmModelId` (str): Model identifier (required, e.g., "sfdc_ai__DefaultGPT4Omni") - - `maxTokens` (int): Maximum response length (required, e.g., 200) + - `maxTokens` (int): Maximum tokens that will be spent on this query ## 1.0.0 diff --git a/README.md b/README.md index 4681494..bf1b203 100644 --- a/README.md +++ b/README.md @@ -166,6 +166,9 @@ sdf = client.read_dlo('my_DLO') client.write_to_dlo('output_DLO') ``` +> [!WARNING] +> Currently we only support reading from DMOs and writing to DMOs or reading from DLOs and writing to DLOs, but they cannot mix. + ### LLM Gateway Generate AI completions in DataFrame transformations using the LLM gateway UDF. @@ -193,7 +196,7 @@ df = df.withColumn( ``` > [!WARNING] -> This method returns a placeholder string in local development and won't execute. It only works when deployed, where it calls the real LLM Gateway service via the `llm_gateway_generate` UDF. +> This method returns a placeholder string in local development. It only makes a LLM call and spends tokens when deployed, where it calls the real LLM Gateway service via a UDF. ## CLI From 8d8ec092917e6b9bcff8aaf01e5c5a27ab6a9226 Mon Sep 17 00:00:00 2001 From: Joshua Catt Date: Mon, 6 Apr 2026 11:07:59 -0400 Subject: [PATCH 3/4] more verbage --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index bf1b203..fe2e226 100644 --- a/README.md +++ b/README.md @@ -196,7 +196,7 @@ df = df.withColumn( ``` > [!WARNING] -> This method returns a placeholder string in local development. It only makes a LLM call and spends tokens when deployed, where it calls the real LLM Gateway service via a UDF. +> This method returns a placeholder string in local development. It only makes a LLM call and spends tokens when deployed, where it calls the real LLM Gateway service via a built-in UDF. ## CLI From f502e244f2050b3f7f1fd04d5fa731675d4ff30a Mon Sep 17 00:00:00 2001 From: Joshua Catt Date: Mon, 6 Apr 2026 11:09:52 -0400 Subject: [PATCH 4/4] lint --- CHANGELOG.md | 2 +- src/datacustomcode/proxy/client/LocalProxyClientProvider.py | 4 +++- src/datacustomcode/proxy/client/base.py | 4 +++- 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5f9bcf7..f5dd3e3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -30,7 +30,7 @@ ) ``` - **Local Development:** Returns placeholder string (doesn't execute) + **Local Development:** Returns placeholder string (doesn't execute) **Production:** Calls a built-in UDF **Parameters:** diff --git a/src/datacustomcode/proxy/client/LocalProxyClientProvider.py b/src/datacustomcode/proxy/client/LocalProxyClientProvider.py index 264a5e3..9c08b54 100644 --- a/src/datacustomcode/proxy/client/LocalProxyClientProvider.py +++ b/src/datacustomcode/proxy/client/LocalProxyClientProvider.py @@ -28,5 +28,7 @@ def __init__(self, **kwargs: object) -> None: def call_llm_gateway(self, llmModelId: str, prompt: str, maxTokens: int) -> str: return f"Hello, thanks for using {llmModelId}. So many tokens: {maxTokens}" - def llm_gateway_generate_text(self, template, values, llmModelId: str, maxTokens: int): + def llm_gateway_generate_text( + self, template, values, llmModelId: str, maxTokens: int + ): return f"Using Generate Text with {llmModelId} and maxTokens: {maxTokens}" diff --git a/src/datacustomcode/proxy/client/base.py b/src/datacustomcode/proxy/client/base.py index 9e86546..85e304a 100644 --- a/src/datacustomcode/proxy/client/base.py +++ b/src/datacustomcode/proxy/client/base.py @@ -27,4 +27,6 @@ def __init__(self): def call_llm_gateway(self, llmModelId: str, prompt: str, maxTokens: int) -> str: ... @abstractmethod - def llm_gateway_generate_text(self, template, values, llmModelId: str, maxTokens: int): ... + def llm_gateway_generate_text( + self, template, values, llmModelId: str, maxTokens: int + ): ...