-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathcodebase-dump.sh
More file actions
132 lines (114 loc) · 3.94 KB
/
codebase-dump.sh
File metadata and controls
132 lines (114 loc) · 3.94 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
#!/usr/bin/env bash
# ================================================================
# codebase-dump.sh — AI-ready full project codebase dump
# Respects .gitignore | skips lock-files | skips binaries
#
# Usage:
# bash codebase-dump.sh (full project)
# bash codebase-dump.sh -d src/components (subfolder, recursive)
# bash codebase-dump.sh --no-recursive (root files only)
# bash codebase-dump.sh -d src/components --no-recursive (subfolder, top-level only)
# ================================================================
TARGET_DIR="."
RECURSIVE="true"
while [[ $# -gt 0 ]]; do
case "$1" in
-d|--dir) TARGET_DIR="$2"; shift 2 ;;
-R|--no-recursive) RECURSIVE="false"; shift ;;
*) echo "❌ Unknown argument: $1" >&2; exit 1 ;;
esac
done
DUMPS_DIR="dumps/codebase"
mkdir -p "$DUMPS_DIR"
if [[ "$TARGET_DIR" == "." && "$RECURSIVE" == "true" ]]; then
OUTPUT="${DUMPS_DIR}/codebase-dump.txt"
elif [[ "$TARGET_DIR" == "." && "$RECURSIVE" == "false" ]]; then
OUTPUT="${DUMPS_DIR}/codebase-dump_root-only.txt"
elif [[ "$RECURSIVE" == "false" ]]; then
SAFE_DIR=$(echo "$TARGET_DIR" | tr '/' '_')
OUTPUT="${DUMPS_DIR}/codebase-dump_${SAFE_DIR}_top-only.txt"
else
SAFE_DIR=$(echo "$TARGET_DIR" | tr '/' '_')
OUTPUT="${DUMPS_DIR}/codebase-dump_${SAFE_DIR}.txt"
fi
THIS_SCRIPT="$(basename "${BASH_SOURCE[0]:-$0}")"
LOCK_RE='(/|^)(package-lock\.json|yarn\.lock|pnpm-lock\.yaml|composer\.lock|Gemfile\.lock|poetry\.lock|Cargo\.lock|Pipfile\.lock|packages\.lock\.json|npm-shrinkwrap\.json|bun\.lockb|shrinkwrap\.json)$'
if ! git rev-parse --git-dir &>/dev/null; then
echo "❌ Not a git repository. Run from the project root." >&2; exit 1
fi
get_files() {
local prefix
if [[ "$TARGET_DIR" == "." ]]; then
prefix=""
else
prefix="${TARGET_DIR}/"
fi
{
git ls-files "$TARGET_DIR"
# Uncomment next line if untracked git files needed:
git ls-files --others --exclude-standard "$TARGET_DIR"
} 2>/dev/null \
| sort -u \
| grep -vE "$LOCK_RE" \
| grep -vxF "$OUTPUT" \
| grep -vxF "$THIS_SCRIPT" \
| if [[ "$RECURSIVE" == "false" ]]; then
grep -E "^${prefix}[^/]+$" || true
else
cat
fi
}
build_tree() {
awk 'BEGIN { FS="/" } {
n = NF
for (i = 1; i <= n; i++) {
key = ""
for (j = 1; j <= i; j++) key = (key == "" ? $j : key "/" $j)
if (!(key in S)) {
S[key] = 1
pad = ""
for (j = 1; j < i; j++) pad = pad "| "
if (i < n)
print pad "+-- " $i "/"
else
print pad "|-- " $i
}
}
}'
}
is_binary() {
perl -e 'read(STDIN,$b,8192); exit(index($b,"\x00")>=0 ? 0 : 1)' < "$1" 2>/dev/null
}
FILE_LIST=$(get_files)
FILE_COUNT=$(printf '%s\n' "$FILE_LIST" | grep -c . || echo 0)
echo "⏳ Collecting ${FILE_COUNT} files…"
{
printf '==========================================\n'
printf ' PROJECT STRUCTURE\n'
printf '==========================================\n\n'
printf '%s\n' "$FILE_LIST" | build_tree
printf '\n==========================================\n'
printf ' FILE CONTENTS\n'
printf '==========================================\n'
i=0
while IFS= read -r f; do
[[ -z "$f" || ! -f "$f" ]] && continue
i=$((i + 1))
printf "\r\033[K\033[0;36m▸\033[0m [%d/%d] Processing: %s" "$i" "$FILE_COUNT" "$f" >&2
printf '\n================== %s ==================\n\n' "$f"
if is_binary "$f"; then
printf '[BINARY FILE — content skipped]\n'
else
cat "$f"
[[ -n "$(tail -c1 "$f")" ]] && printf '\n'
fi
done < <(printf '%s\n' "$FILE_LIST")
printf "\n" >&2
} > "$OUTPUT"
if [[ ! -s "$OUTPUT" ]]; then
echo "❌ Dump failed — output file is empty or missing" >&2
rm -f "$OUTPUT"
exit 1
fi
SIZE_KB=$(awk "BEGIN{printf \"%.1f\", $(wc -c < "$OUTPUT")/1024}")
echo "✅ Dump saved → ./${OUTPUT} (${FILE_COUNT} files, ${SIZE_KB} KB)"