-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathVisualizing_dependancy.py
More file actions
39 lines (32 loc) · 1.1 KB
/
Visualizing_dependancy.py
File metadata and controls
39 lines (32 loc) · 1.1 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
# 3.2.1 Install
# pip install networkx node2vec matplotlib scikit-learn pandas
import networkx as nx
from node2vec import Node2Vec
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt
import pandas as pd
# 1. Load your import graph & churn data
# edges.csv: src,dst
# churn.csv: file,churn
edges = pd.read_csv("edges.csv")
churn = pd.read_csv("churn.csv").set_index("file")
G = nx.from_pandas_edgelist(edges, "src", "dst", create_using=nx.DiGraph())
# 2. Generate Node2Vec walks
n2v = Node2Vec(G, dimensions=64, walk_length=30, num_walks=200, workers=4)
# 3. Fit & get embeddings
model = n2v.fit(window=10, min_count=1)
embs = {node: model.wv[node] for node in G.nodes()}
# 4. PCA to 2D for plotting
X = list(embs.values())
pca = PCA(n_components=2)
coords = pca.fit_transform(X)
nodes = list(embs.keys())
# 5. Plot
plt.figure(figsize=(10, 8))
for (x, y), node in zip(coords, nodes):
size = churn.at[node, "churn"] * 10 # scale for visibility
plt.scatter(x, y, s=size, alpha=0.6, label=node)
plt.title("Module Embeddings with Churn Size")
plt.xlabel("PC 1")
plt.ylabel("PC 2")
plt.show()