Skip to content

Commit e116a8c

Browse files
authored
update remaining adapter docs (networkx, dgl, cugraph, pyg) (#378)
1 parent 59b8001 commit e116a8c

File tree

12 files changed

+1511
-384
lines changed

12 files changed

+1511
-384
lines changed

site/content/3.10/data-science/adapters/arangodb-cugraph-adapter.md

Lines changed: 90 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,8 @@ To install the latest release of the ArangoDB-cuGraph Adapter,
2424
run the following command:
2525

2626
```bash
27-
conda install -c arangodb adbcug-adapter
27+
pip install --extra-index-url=https://pypi.nvidia.com cudf-cu11 cugraph-cu11
28+
pip install adbcug-adapter
2829
```
2930

3031
## Quickstart
@@ -36,53 +37,108 @@ Check also the
3637
```py
3738
import cudf
3839
import cugraph
39-
from arango import ArangoClient # Python-Arango driver
4040

41-
from adbcug_adapter import ADBCUG_Adapter
41+
from arango import ArangoClient
42+
from adbcug_adapter import ADBCUG_Adapter, ADBCUG_Controller
4243

43-
# Let's assume that the ArangoDB "fraud detection" dataset is imported to this endpoint
44-
db = ArangoClient(hosts="http://localhost:8529").db("_system", username="root", password="")
44+
# Connect to ArangoDB
45+
db = ArangoClient().db()
4546

47+
# Instantiate the adapter
4648
adbcug_adapter = ADBCUG_Adapter(db)
49+
```
50+
51+
### ArangoDB to cuGraph
52+
```py
53+
#######################
54+
# 1.1: via Graph name #
55+
#######################
4756

48-
# Use Case 1.1: ArangoDB to cuGraph via Graph name
49-
cug_fraud_graph = adbcug_adapter.arangodb_graph_to_cugraph("fraud-detection")
57+
cug_g = adbcug_adapter.arangodb_graph_to_cugraph("fraud-detection")
5058

51-
# Use Case 1.2: ArangoDB to cuGraph via Collection names
52-
cug_fraud_graph_2 = adbcug_adapter.arangodb_collections_to_cugraph(
59+
#############################
60+
# 1.2: via Collection names #
61+
#############################
62+
63+
cug_g = adbcug_adapter.arangodb_collections_to_cugraph(
5364
"fraud-detection",
5465
{"account", "bank", "branch", "Class", "customer"}, # Vertex collections
5566
{"accountHolder", "Relationship", "transaction"}, # Edge collections
5667
)
68+
```
5769

58-
# Use Case 2: cuGraph to ArangoDB:
59-
## 1) Create a sample cuGraph
60-
cug_divisibility_graph = cugraph.MultiGraph(directed=True)
61-
cug_divisibility_graph.from_cudf_edgelist(
62-
cudf.DataFrame(
63-
[
64-
(f"numbers/{j}", f"numbers/{i}", j / i)
65-
for i in range(1, 101)
66-
for j in range(1, 101)
67-
if j % i == 0
68-
],
69-
columns=["src", "dst", "weight"],
70-
),
71-
source="src",
72-
destination="dst",
73-
edge_attr="weight",
74-
renumber=False,
75-
)
70+
### cuGraph to ArangoDB
71+
```py
72+
#################################
73+
# 2.1: with a Homogeneous Graph #
74+
#################################
75+
76+
edges = [("Person/A", "Person/B", 1), ("Person/B", "Person/C", -1)]
77+
cug_g = cugraph.MultiGraph(directed=True)
78+
cug_g.from_cudf_edgelist(cudf.DataFrame(edges, columns=["src", "dst", "weight"]), source="src", destination="dst", edge_attr="weight")
7679

77-
## 2) Create ArangoDB Edge Definitions
7880
edge_definitions = [
7981
{
80-
"edge_collection": "is_divisible_by",
81-
"from_vertex_collections": ["numbers"],
82-
"to_vertex_collections": ["numbers"],
82+
"edge_collection": "knows",
83+
"from_vertex_collections": ["Person"],
84+
"to_vertex_collections": ["Person"],
8385
}
8486
]
8587

86-
## 3) Convert cuGraph to ArangoDB
87-
adb_graph = adbcug_adapter.cugraph_to_arangodb("DivisibilityGraph", cug_graph, edge_definitions)
88-
```
88+
adb_g = adbcug_adapter.cugraph_to_arangodb("Knows", cug_g, edge_definitions, edge_attr="weight")
89+
90+
##############################################################
91+
# 2.2: with a Homogeneous Graph & a custom ADBCUG Controller #
92+
##############################################################
93+
94+
class Custom_ADBCUG_Controller(ADBCUG_Controller):
95+
"""ArangoDB-cuGraph controller.
96+
97+
Responsible for controlling how nodes & edges are handled when
98+
transitioning from ArangoDB to cuGraph & vice-versa.
99+
"""
100+
101+
def _prepare_cugraph_node(self, cug_node: dict, col: str) -> None:
102+
"""Prepare a cuGraph node before it gets inserted into the ArangoDB
103+
collection **col**.
104+
105+
:param cug_node: The cuGraph node object to (optionally) modify.
106+
:param col: The ArangoDB collection the node belongs to.
107+
"""
108+
cug_node["foo"] = "bar"
109+
110+
def _prepare_cugraph_edge(self, cug_edge: dict, col: str) -> None:
111+
"""Prepare a cuGraph edge before it gets inserted into the ArangoDB
112+
collection **col**.
113+
114+
:param cug_edge: The cuGraph edge object to (optionally) modify.
115+
:param col: The ArangoDB collection the edge belongs to.
116+
"""
117+
cug_edge["bar"] = "foo"
118+
119+
adb_g = ADBCUG_Adapter(db, Custom_ADBCUG_Controller()).cugraph_to_arangodb("Knows", cug_g, edge_definitions)
120+
121+
###################################
122+
# 2.3: with a Heterogeneous Graph #
123+
###################################
124+
125+
edges = [
126+
('student:101', 'lecture:101'),
127+
('student:102', 'lecture:102'),
128+
('student:103', 'lecture:103'),
129+
('student:103', 'student:101'),
130+
('student:103', 'student:102'),
131+
('teacher:101', 'lecture:101'),
132+
('teacher:102', 'lecture:102'),
133+
('teacher:103', 'lecture:103'),
134+
('teacher:101', 'teacher:102'),
135+
('teacher:102', 'teacher:103')
136+
]
137+
cug_g = cugraph.MultiGraph(directed=True)
138+
cug_g.from_cudf_edgelist(cudf.DataFrame(edges, columns=["src", "dst"]), source='src', destination='dst')
139+
140+
# ...
141+
142+
# Learn how this example is handled in Colab:
143+
# https://colab.research.google.com/github/arangoml/cugraph-adapter/blob/master/examples/ArangoDB_cuGraph_Adapter.ipynb#scrollTo=nuVoCZQv6oyi
144+
```

site/content/3.10/data-science/adapters/arangodb-dgl-adapter.md

Lines changed: 199 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -38,38 +38,215 @@ Check also the
3838
[interactive tutorial](https://colab.research.google.com/github/arangoml/dgl-adapter/blob/master/examples/ArangoDB_DGL_Adapter.ipynb).
3939

4040
```py
41-
from arango import ArangoClient # Python-Arango driver
42-
from dgl.data import KarateClubDataset # Sample graph from DGL
41+
import dgl
42+
import torch
43+
import pandas
4344

44-
# Let's assume that the ArangoDB "fraud detection" dataset is imported to this endpoint
45-
db = ArangoClient(hosts="http://localhost:8529").db("_system", username="root", password="")
45+
from arango import ArangoClient
46+
from adbdgl_adapter import ADBDGL_Adapter, ADBDGL_Controller
47+
from adbdgl_adapter.encoders import IdentityEncoder, CategoricalEncoder
4648

49+
# Connect to ArangoDB
50+
db = ArangoClient().db()
51+
52+
# Instantiate the adapter
4753
adbdgl_adapter = ADBDGL_Adapter(db)
4854

49-
# Use Case 1.1: ArangoDB to DGL via Graph name
50-
dgl_fraud_graph = adbdgl_adapter.arangodb_graph_to_dgl("fraud-detection")
55+
# Create a DGL Heterogeneous Graph
56+
fake_hetero = dgl.heterograph({
57+
("user", "follows", "user"): (torch.tensor([0, 1]), torch.tensor([1, 2])),
58+
("user", "follows", "topic"): (torch.tensor([1, 1]), torch.tensor([1, 2])),
59+
("user", "plays", "game"): (torch.tensor([0, 3]), torch.tensor([3, 4])),
60+
})
61+
fake_hetero.nodes["user"].data["features"] = torch.tensor([21, 44, 16, 25])
62+
fake_hetero.nodes["user"].data["label"] = torch.tensor([1, 2, 0, 1])
63+
fake_hetero.nodes["game"].data["features"] = torch.tensor([[0, 0], [0, 1], [1, 0], [1, 1], [1, 1]])
64+
fake_hetero.edges[("user", "plays", "game")].data["features"] = torch.tensor([[6, 1], [1000, 0]])
65+
```
66+
67+
### DGL to ArangoDB
68+
```py
69+
############################
70+
# 1.1: without a Metagraph #
71+
############################
5172

52-
# Use Case 1.2: ArangoDB to DGL via Collection names
53-
dgl_fraud_graph_2 = adbdgl_adapter.arangodb_collections_to_dgl(
54-
"fraud-detection",
55-
{"account", "Class", "customer"}, # Vertex collections
56-
{"accountHolder", "Relationship", "transaction"}, # Edge collections
57-
)
73+
adb_g = adbdgl_adapter.dgl_to_arangodb("FakeHetero", fake_hetero)
5874

59-
# Use Case 1.3: ArangoDB to DGL via Metagraph
75+
#########################
76+
# 1.2: with a Metagraph #
77+
#########################
78+
79+
# Specifying a Metagraph provides customized adapter behaviour
6080
metagraph = {
81+
"nodeTypes": {
82+
"user": {
83+
"features": "user_age", # 1) you can specify a string value for attribute renaming
84+
"label": label_tensor_to_2_column_dataframe, # 2) you can specify a function for user-defined handling, as long as the function returns a Pandas DataFrame
85+
},
86+
# 3) You can specify set of strings if you want to preserve the same DGL attribute names for the node/edge type
87+
"game": {"features"} # this is equivalent to {"features": "features"}
88+
},
89+
"edgeTypes": {
90+
("user", "plays", "game"): {
91+
# 4) you can specify a list of strings for tensor dissasembly (if you know the number of node/edge features in advance)
92+
"features": ["hours_played", "is_satisfied_with_game"]
93+
},
94+
},
95+
}
96+
97+
def label_tensor_to_2_column_dataframe(dgl_tensor: torch.Tensor, adb_df: pandas.DataFrame) -> pandas.DataFrame:
98+
"""A user-defined function to create two
99+
ArangoDB attributes out of the 'user' label tensor
100+
101+
:param dgl_tensor: The DGL Tensor containing the data
102+
:type dgl_tensor: torch.Tensor
103+
:param adb_df: The ArangoDB DataFrame to populate, whose
104+
size is preset to the length of **dgl_tensor**.
105+
:type adb_df: pandas.DataFrame
106+
:return: The populated ArangoDB DataFrame
107+
:rtype: pandas.DataFrame
108+
"""
109+
label_map = {0: "Class A", 1: "Class B", 2: "Class C"}
110+
111+
adb_df["label_num"] = dgl_tensor.tolist()
112+
adb_df["label_str"] = adb_df["label_num"].map(label_map)
113+
114+
return adb_df
115+
116+
117+
adb_g = adbdgl_adapter.dgl_to_arangodb("FakeHetero", fake_hetero, metagraph, explicit_metagraph=False)
118+
119+
#######################################################
120+
# 1.3: with a Metagraph and `explicit_metagraph=True` #
121+
#######################################################
122+
123+
# With `explicit_metagraph=True`, the node & edge types omitted from the metagraph will NOT be converted to ArangoDB.
124+
adb_g = adbdgl_adapter.dgl_to_arangodb("FakeHetero", fake_hetero, metagraph, explicit_metagraph=True)
125+
126+
########################################
127+
# 1.4: with a custom ADBDGL Controller #
128+
########################################
129+
130+
class Custom_ADBDGL_Controller(ADBDGL_Controller):
131+
def _prepare_dgl_node(self, dgl_node: dict, node_type: str) -> dict:
132+
"""Optionally modify a DGL node object before it gets inserted into its designated ArangoDB collection.
133+
134+
:param dgl_node: The DGL node object to (optionally) modify.
135+
:param node_type: The DGL Node Type of the node.
136+
:return: The DGL Node object
137+
"""
138+
dgl_node["foo"] = "bar"
139+
return dgl_node
140+
141+
def _prepare_dgl_edge(self, dgl_edge: dict, edge_type: tuple) -> dict:
142+
"""Optionally modify a DGL edge object before it gets inserted into its designated ArangoDB collection.
143+
144+
:param dgl_edge: The DGL edge object to (optionally) modify.
145+
:param edge_type: The Edge Type of the DGL edge. Formatted
146+
as (from_collection, edge_collection, to_collection)
147+
:return: The DGL Edge object
148+
"""
149+
dgl_edge["bar"] = "foo"
150+
return dgl_edge
151+
152+
153+
adb_g = ADBDGL_Adapter(db, Custom_ADBDGL_Controller()).dgl_to_arangodb("FakeHetero", fake_hetero)
154+
```
155+
156+
### ArangoDB to DGL
157+
```py
158+
# Start from scratch!
159+
db.delete_graph("FakeHetero", drop_collections=True, ignore_missing=True)
160+
adbdgl_adapter.dgl_to_arangodb("FakeHetero", fake_hetero)
161+
162+
#######################
163+
# 2.1: via Graph name #
164+
#######################
165+
166+
# Due to risk of ambiguity, this method does not transfer attributes
167+
dgl_g = adbdgl_adapter.arangodb_graph_to_dgl("FakeHetero")
168+
169+
#############################
170+
# 2.2: via Collection names #
171+
#############################
172+
173+
# Due to risk of ambiguity, this method does not transfer attributes
174+
dgl_g = adbdgl_adapter.arangodb_collections_to_dgl("FakeHetero", v_cols={"user", "game"}, e_cols={"plays"})
175+
176+
######################
177+
# 2.3: via Metagraph #
178+
######################
179+
180+
# Transfers attributes "as is", meaning they are already formatted to DGL data standards.
181+
# Learn more about the DGL Data Standards here: https://docs.dgl.ai/guide/graph.html#guide-graph
182+
metagraph_v1 = {
61183
"vertexCollections": {
62-
"account": {"Balance", "account_type", "customer_id", "rank"},
63-
"customer": {"Name", "rank"},
184+
# Move the "features" & "label" ArangoDB attributes to DGL as "features" & "label" Tensors
185+
"user": {"features", "label"}, # equivalent to {"features": "features", "label": "label"}
186+
"game": {"dgl_game_features": "features"},
187+
"topic": {},
64188
},
65189
"edgeCollections": {
66-
"transaction": {"transaction_amt", "sender_bank_id", "receiver_bank_id"},
67-
"accountHolder": {},
190+
"plays": {"dgl_plays_features": "features"},
191+
"follows": {}
68192
},
69193
}
70-
dgl_fraud_graph_3 = adbdgl_adapter.arangodb_to_dgl("fraud-detection", metagraph)
71194

72-
# Use Case 2: DGL to ArangoDB
73-
dgl_karate_graph = KarateClubDataset()[0]
74-
adb_karate_graph = adbdgl_adapter.dgl_to_arangodb("Karate", dgl_karate_graph)
75-
```
195+
dgl_g = adbdgl_adapter.arangodb_to_dgl("FakeHetero", metagraph_v1)
196+
197+
#################################################
198+
# 2.4: via Metagraph with user-defined encoders #
199+
#################################################
200+
201+
# Transforms attributes via user-defined encoders
202+
metagraph_v2 = {
203+
"vertexCollections": {
204+
"Movies": {
205+
"features": { # Build a feature matrix from the "Action" & "Drama" document attributes
206+
"Action": IdentityEncoder(dtype=torch.long),
207+
"Drama": IdentityEncoder(dtype=torch.long),
208+
},
209+
"label": "Comedy",
210+
},
211+
"Users": {
212+
"features": {
213+
"Gender": CategoricalEncoder(), # CategoricalEncoder(mapping={"M": 0, "F": 1}),
214+
"Age": IdentityEncoder(dtype=torch.long),
215+
}
216+
},
217+
},
218+
"edgeCollections": {"Ratings": {"weight": "Rating"}},
219+
}
220+
221+
dgl_g = adbdgl_adapter.arangodb_to_dgl("imdb", metagraph_v2)
222+
223+
##################################################
224+
# 2.5: via Metagraph with user-defined functions #
225+
##################################################
226+
227+
# Transforms attributes via user-defined functions
228+
metagraph_v3 = {
229+
"vertexCollections": {
230+
"user": {
231+
"features": udf_user_features, # supports named functions
232+
"label": lambda df: torch.tensor(df["label"].to_list()), # also supports lambda functions
233+
},
234+
"game": {"features": udf_game_features},
235+
},
236+
"edgeCollections": {
237+
"plays": {"features": (lambda df: torch.tensor(df["features"].to_list()))},
238+
},
239+
}
240+
241+
def udf_user_features(user_df: pandas.DataFrame) -> torch.Tensor:
242+
# user_df["features"] = ...
243+
return torch.tensor(user_df["features"].to_list())
244+
245+
246+
def udf_game_features(game_df: pandas.DataFrame) -> torch.Tensor:
247+
# game_df["features"] = ...
248+
return torch.tensor(game_df["features"].to_list())
249+
250+
251+
dgl_g = adbdgl_adapter.arangodb_to_dgl("FakeHetero", metagraph_v3)
252+
```

0 commit comments

Comments
 (0)