From 80db187f91a81c354fd9acc2a03d60c14a02c0fd Mon Sep 17 00:00:00 2001 From: Mouad BANI Date: Mon, 23 Mar 2026 16:21:20 +0100 Subject: [PATCH 1/7] chore: allow dry-run for non-prod envs Signed-off-by: Mouad BANI --- .../crowdgit/services/software_value/main.go | 68 ++++++++++++------- .../git_integration/src/crowdgit/settings.py | 1 + 2 files changed, 43 insertions(+), 26 deletions(-) diff --git a/services/apps/git_integration/src/crowdgit/services/software_value/main.go b/services/apps/git_integration/src/crowdgit/services/software_value/main.go index e4dc02a412..847b071039 100644 --- a/services/apps/git_integration/src/crowdgit/services/software_value/main.go +++ b/services/apps/git_integration/src/crowdgit/services/software_value/main.go @@ -51,17 +51,25 @@ func processRepository() StandardResponse { // Process single repository (the target path argument) repoDir := config.TargetPath - insightsDb, err := NewInsightsDB(ctx, config.InsightsDatabase) - if err != nil { - errorCode := ErrorCodeDatabaseConnection - errorMessage := fmt.Sprintf("Error connecting to insights database: %v", err) - return StandardResponse{ - Status: StatusFailure, - ErrorCode: &errorCode, - ErrorMessage: &errorMessage, + dryRun := os.Getenv("IS_PROD_ENV") != "true" + + var insightsDb *InsightsDB + if !dryRun { + var dbErr error + insightsDb, dbErr = NewInsightsDB(ctx, config.InsightsDatabase) + if dbErr != nil { + errorCode := ErrorCodeDatabaseConnection + errorMessage := fmt.Sprintf("Error connecting to insights database: %v", dbErr) + return StandardResponse{ + Status: StatusFailure, + ErrorCode: &errorCode, + ErrorMessage: &errorMessage, + } } + defer insightsDb.Close() + } else { + fmt.Println("[DRY RUN] Skipping database connection") } - defer insightsDb.Close() // Get git URL for the repository gitUrl, err := getGitRepositoryURL(repoDir) @@ -88,24 +96,32 @@ func processRepository() StandardResponse { } report.Repository.URL = gitUrl - // Save to database - if err := insightsDb.saveProjectCost(ctx, report.Repository, report.Cocomo.CostInDollars); err != nil { - errorCode := ErrorCodeDatabaseOperation - errorMessage := fmt.Sprintf("Error saving project cost: %v", err) - return StandardResponse{ - Status: StatusFailure, - ErrorCode: &errorCode, - ErrorMessage: &errorMessage, + if dryRun { + fmt.Printf("[DRY RUN] Would save project cost: repo=%s cost=$%.2f\n", report.Repository.URL, report.Cocomo.CostInDollars) + fmt.Printf("[DRY RUN] Would save %d language stats entries\n", len(report.LanguageStats)) + for _, ls := range report.LanguageStats { + fmt.Printf("[DRY RUN] language=%s lines=%d code=%d\n", ls.LanguageName, ls.Lines, ls.Code) + } + } else { + // Save to database + if err := insightsDb.saveProjectCost(ctx, report.Repository, report.Cocomo.CostInDollars); err != nil { + errorCode := ErrorCodeDatabaseOperation + errorMessage := fmt.Sprintf("Error saving project cost: %v", err) + return StandardResponse{ + Status: StatusFailure, + ErrorCode: &errorCode, + ErrorMessage: &errorMessage, + } } - } - if err := insightsDb.saveLanguageStats(ctx, report.Repository, report.LanguageStats); err != nil { - errorCode := ErrorCodeDatabaseOperation - errorMessage := fmt.Sprintf("Error saving language stats: %v", err) - return StandardResponse{ - Status: StatusFailure, - ErrorCode: &errorCode, - ErrorMessage: &errorMessage, + if err := insightsDb.saveLanguageStats(ctx, report.Repository, report.LanguageStats); err != nil { + errorCode := ErrorCodeDatabaseOperation + errorMessage := fmt.Sprintf("Error saving language stats: %v", err) + return StandardResponse{ + Status: StatusFailure, + ErrorCode: &errorCode, + ErrorMessage: &errorMessage, + } } } @@ -123,7 +139,7 @@ func processRepository() StandardResponse { func getSCCReport(sccPath, dirPath string) (SCCReport, error) { cost, err := getCost(sccPath, dirPath) if err != nil { - return SCCReport{}, fmt.Errorf("error getting SCC report for '%s': %v\"", err) + return SCCReport{}, fmt.Errorf("error getting SCC report for '%s': %v", dirPath, err) } // Skip saving to database if cost is 0 - do we want to do this? diff --git a/services/apps/git_integration/src/crowdgit/settings.py b/services/apps/git_integration/src/crowdgit/settings.py index 447f2f3342..345d714d59 100644 --- a/services/apps/git_integration/src/crowdgit/settings.py +++ b/services/apps/git_integration/src/crowdgit/settings.py @@ -44,3 +44,4 @@ def load_env_var(key: str, required=True, default=None): STUCK_RECURRENT_REPO_TIMEOUT_HOURS = int( load_env_var("STUCK_RECURRENT_REPO_TIMEOUT_HOURS", default="4") ) +IS_PROD_ENV = bool(load_env_var("IS_PROD_ENV")) \ No newline at end of file From 22868fcd03beabe7c1d619203d61cfe9d25358e3 Mon Sep 17 00:00:00 2001 From: Mouad BANI Date: Mon, 23 Mar 2026 16:30:58 +0100 Subject: [PATCH 2/7] chore: set IS_PROD_ENV Signed-off-by: Mouad BANI --- services/apps/git_integration/src/crowdgit/settings.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/apps/git_integration/src/crowdgit/settings.py b/services/apps/git_integration/src/crowdgit/settings.py index 345d714d59..c2f264c565 100644 --- a/services/apps/git_integration/src/crowdgit/settings.py +++ b/services/apps/git_integration/src/crowdgit/settings.py @@ -44,4 +44,4 @@ def load_env_var(key: str, required=True, default=None): STUCK_RECURRENT_REPO_TIMEOUT_HOURS = int( load_env_var("STUCK_RECURRENT_REPO_TIMEOUT_HOURS", default="4") ) -IS_PROD_ENV = bool(load_env_var("IS_PROD_ENV")) \ No newline at end of file +IS_PROD_ENV: bool = load_env_var("NODE_ENV", required=False) == "production" \ No newline at end of file From e6955b890b5bf8d19648534e85f83e2c272854ea Mon Sep 17 00:00:00 2001 From: Mouad BANI Date: Mon, 23 Mar 2026 18:03:07 +0100 Subject: [PATCH 3/7] fix: disable concurrency on scc when repo size greater than 10G Signed-off-by: Mouad BANI --- .../crowdgit/services/software_value/main.go | 47 ++++++++++++------- .../software_value/software_value_service.py | 34 +++++++++++++- .../git_integration/src/crowdgit/settings.py | 2 +- 3 files changed, 64 insertions(+), 19 deletions(-) diff --git a/services/apps/git_integration/src/crowdgit/services/software_value/main.go b/services/apps/git_integration/src/crowdgit/services/software_value/main.go index 847b071039..93229cd347 100644 --- a/services/apps/git_integration/src/crowdgit/services/software_value/main.go +++ b/services/apps/git_integration/src/crowdgit/services/software_value/main.go @@ -3,6 +3,7 @@ package main import ( "context" "encoding/json" + "flag" "fmt" "os" "os/exec" @@ -13,23 +14,27 @@ import ( ) func main() { - response := processRepository() + numProcessors := flag.Int("num-processors", 0, "Number of parallel scc workers (0 = scc default, 1 = minimum for large repos)") + flag.Parse() + + response := processRepository(*numProcessors) outputJSON(response) // Always exit with code 0 - status details are in JSON response } // processRepository handles the main logic and returns a StandardResponse -func processRepository() StandardResponse { +func processRepository(numProcessors int) StandardResponse { ctx := context.Background() - // Get target path from command line argument + // Get target path from remaining non-flag arguments + args := flag.Args() var targetPath string - if len(os.Args) > 1 { - targetPath = os.Args[1] + if len(args) > 0 { + targetPath = args[0] } else { errorCode := ErrorCodeInvalidArguments - errorMessage := fmt.Sprintf("Usage: %s ", os.Args[0]) + errorMessage := fmt.Sprintf("Usage: %s [--num-processors N] ", os.Args[0]) return StandardResponse{ Status: StatusFailure, ErrorCode: &errorCode, @@ -84,7 +89,7 @@ func processRepository() StandardResponse { } // Process the repository with SCC - report, err := getSCCReport(config.SCCPath, repoDir) + report, err := getSCCReport(config.SCCPath, repoDir, numProcessors) if err != nil { errorCode := getErrorCodeFromSCCError(err) errorMessage := fmt.Sprintf("Error processing repository '%s': %v", repoDir, err) @@ -136,8 +141,8 @@ func processRepository() StandardResponse { // getSCCReport analyzes a directory with scc and returns a report containing the estimated cost and language statistics. -func getSCCReport(sccPath, dirPath string) (SCCReport, error) { - cost, err := getCost(sccPath, dirPath) +func getSCCReport(sccPath, dirPath string, numProcessors int) (SCCReport, error) { + cost, err := getCost(sccPath, dirPath, numProcessors) if err != nil { return SCCReport{}, fmt.Errorf("error getting SCC report for '%s': %v", dirPath, err) } @@ -149,7 +154,7 @@ func getSCCReport(sccPath, dirPath string) (SCCReport, error) { projectPath := filepath.Base(dirPath) - langStats, err := getLanguageStats(sccPath, dirPath) + langStats, err := getLanguageStats(sccPath, dirPath, numProcessors) if err != nil { return SCCReport{}, fmt.Errorf("error getting language stats for '%s': %v", dirPath, err) } @@ -193,8 +198,8 @@ func getGitRepositoryURL(dirPath string) (string, error) { } // getCost runs the scc command and parses the output to get the estimated cost. -func getCost(sccPathPath, repoPath string) (float64, error) { - output, err := runSCC(sccPathPath, "--format=short", repoPath) +func getCost(sccPathPath, repoPath string, numProcessors int) (float64, error) { + output, err := runSCC(sccPathPath, numProcessors, "--format=short", repoPath) if err != nil { return 0, fmt.Errorf("failed to run scc command: %w", err) } @@ -208,8 +213,8 @@ func getCost(sccPathPath, repoPath string) (float64, error) { } // getLanguageStats runs the scc command and parses the output to get language statistics. -func getLanguageStats(sccPathPath, repoPath string) ([]LanguageStats, error) { - output, err := runSCC(sccPathPath, "--format=json", repoPath) +func getLanguageStats(sccPathPath, repoPath string, numProcessors int) ([]LanguageStats, error) { + output, err := runSCC(sccPathPath, numProcessors, "--format=json", repoPath) if err != nil { return nil, fmt.Errorf("failed to run scc command: %w", err) } @@ -223,8 +228,18 @@ func getLanguageStats(sccPathPath, repoPath string) ([]LanguageStats, error) { } // runSCC executes the scc command with the given arguments and returns the output. -func runSCC(sccPathPath string, args ...string) (string, error) { - cmd := exec.Command(sccPathPath, args...) +// When numProcessors > 0, scc is run with reduced parallelism to limit memory usage on large repos. +func runSCC(sccPathPath string, numProcessors int, args ...string) (string, error) { + var cmdArgs []string + if numProcessors > 0 { + n := strconv.Itoa(numProcessors) + cmdArgs = append(cmdArgs, + "--directory-walker-job-workers", n, + "--file-process-job-workers", n, + ) + } + cmdArgs = append(cmdArgs, args...) + cmd := exec.Command(sccPathPath, cmdArgs...) output, err := cmd.Output() if err != nil { if exitErr, ok := err.(*exec.ExitError); ok { diff --git a/services/apps/git_integration/src/crowdgit/services/software_value/software_value_service.py b/services/apps/git_integration/src/crowdgit/services/software_value/software_value_service.py index 808d880250..491d46f5e0 100644 --- a/services/apps/git_integration/src/crowdgit/services/software_value/software_value_service.py +++ b/services/apps/git_integration/src/crowdgit/services/software_value/software_value_service.py @@ -1,4 +1,5 @@ import json +import subprocess import time from decimal import Decimal @@ -8,6 +9,21 @@ from crowdgit.services.base.base_service import BaseService from crowdgit.services.utils import run_shell_command +_LARGE_REPO_THRESHOLD_BYTES = 10 * 1024 * 1024 * 1024 # 10 GB + + +def _get_repo_size_bytes(repo_path: str) -> int: + """Return total disk usage of repo_path in bytes using du -sb.""" + try: + result = subprocess.run( + ["du", "-sb", repo_path], capture_output=True, text=True, timeout=120 + ) + if result.returncode == 0: + return int(result.stdout.split()[0]) + except Exception: + pass + return 0 + class SoftwareValueService(BaseService): """Service for calculating software value metrics""" @@ -20,7 +36,9 @@ def __init__(self): async def run(self, repo_id: str, repo_path: str) -> None: """ Triggers software value binary for given repo. - Results are saved into insights database directly + Results are saved into insights database directly. + For repos larger than 10 GB, scc is run with minimum parallelism (1 worker) + to avoid OOM; results are identical. """ start_time = time.time() execution_status = ExecutionStatus.SUCCESS @@ -28,8 +46,20 @@ async def run(self, repo_id: str, repo_path: str) -> None: error_message = None try: + cmd = [self.software_value_executable] + + repo_size = _get_repo_size_bytes(repo_path) + if repo_size >= _LARGE_REPO_THRESHOLD_BYTES: + self.logger.info( + f"Repo size {repo_size / (1024**3):.1f} GB exceeds threshold — " + "running scc with num-processors=1" + ) + cmd += ["--num-processors", "1"] + + cmd.append(repo_path) + self.logger.info("Running software value...") - output = await run_shell_command([self.software_value_executable, repo_path]) + output = await run_shell_command(cmd) self.logger.info(f"Software value output: {output}") # Parse JSON output and extract fields from StandardResponse structure diff --git a/services/apps/git_integration/src/crowdgit/settings.py b/services/apps/git_integration/src/crowdgit/settings.py index c2f264c565..1f0d6ae07f 100644 --- a/services/apps/git_integration/src/crowdgit/settings.py +++ b/services/apps/git_integration/src/crowdgit/settings.py @@ -44,4 +44,4 @@ def load_env_var(key: str, required=True, default=None): STUCK_RECURRENT_REPO_TIMEOUT_HOURS = int( load_env_var("STUCK_RECURRENT_REPO_TIMEOUT_HOURS", default="4") ) -IS_PROD_ENV: bool = load_env_var("NODE_ENV", required=False) == "production" \ No newline at end of file +IS_PROD_ENV: bool = load_env_var("NODE_ENV", required=False) == "production" From d849c6c54c416828beed7f67179481acec78e1f3 Mon Sep 17 00:00:00 2001 From: Mouad BANI Date: Tue, 24 Mar 2026 15:08:55 +0100 Subject: [PATCH 4/7] chore: upgrade scc version Signed-off-by: Mouad BANI --- scripts/services/docker/Dockerfile.git_integration | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/services/docker/Dockerfile.git_integration b/scripts/services/docker/Dockerfile.git_integration index 84895879d1..ef15c18eef 100644 --- a/scripts/services/docker/Dockerfile.git_integration +++ b/scripts/services/docker/Dockerfile.git_integration @@ -7,7 +7,7 @@ FROM golang:1.25-alpine AS go-builder WORKDIR /go/src/software-value # Install scc using the official Go toolchain (specific version as per project README) -RUN go install github.com/boyter/scc/v3@v3.5.0 +RUN go install github.com/boyter/scc/v3@v3.7.0 # Copy Go module files COPY ./services/apps/git_integration/src/crowdgit/services/software_value/go.mod ./ From beb0f2584118cbf0b95e678360c76d2badceb0be Mon Sep 17 00:00:00 2001 From: Mouad BANI Date: Tue, 24 Mar 2026 17:41:24 +0100 Subject: [PATCH 5/7] fix: use no-large param Signed-off-by: Mouad BANI --- .../crowdgit/services/software_value/main.go | 36 +++++++++---------- .../software_value/software_value_service.py | 4 +-- 2 files changed, 18 insertions(+), 22 deletions(-) diff --git a/services/apps/git_integration/src/crowdgit/services/software_value/main.go b/services/apps/git_integration/src/crowdgit/services/software_value/main.go index 93229cd347..70b2de8572 100644 --- a/services/apps/git_integration/src/crowdgit/services/software_value/main.go +++ b/services/apps/git_integration/src/crowdgit/services/software_value/main.go @@ -14,17 +14,17 @@ import ( ) func main() { - numProcessors := flag.Int("num-processors", 0, "Number of parallel scc workers (0 = scc default, 1 = minimum for large repos)") + noLarge := flag.Bool("no-large", false, "Skip files larger than 100MB to avoid OOM on large repos") flag.Parse() - response := processRepository(*numProcessors) + response := processRepository(*noLarge) outputJSON(response) // Always exit with code 0 - status details are in JSON response } // processRepository handles the main logic and returns a StandardResponse -func processRepository(numProcessors int) StandardResponse { +func processRepository(noLarge bool) StandardResponse { ctx := context.Background() // Get target path from remaining non-flag arguments @@ -34,7 +34,7 @@ func processRepository(numProcessors int) StandardResponse { targetPath = args[0] } else { errorCode := ErrorCodeInvalidArguments - errorMessage := fmt.Sprintf("Usage: %s [--num-processors N] ", os.Args[0]) + errorMessage := fmt.Sprintf("Usage: %s [--no-large] ", os.Args[0]) return StandardResponse{ Status: StatusFailure, ErrorCode: &errorCode, @@ -89,7 +89,7 @@ func processRepository(numProcessors int) StandardResponse { } // Process the repository with SCC - report, err := getSCCReport(config.SCCPath, repoDir, numProcessors) + report, err := getSCCReport(config.SCCPath, repoDir, noLarge) if err != nil { errorCode := getErrorCodeFromSCCError(err) errorMessage := fmt.Sprintf("Error processing repository '%s': %v", repoDir, err) @@ -141,8 +141,8 @@ func processRepository(numProcessors int) StandardResponse { // getSCCReport analyzes a directory with scc and returns a report containing the estimated cost and language statistics. -func getSCCReport(sccPath, dirPath string, numProcessors int) (SCCReport, error) { - cost, err := getCost(sccPath, dirPath, numProcessors) +func getSCCReport(sccPath, dirPath string, noLarge bool) (SCCReport, error) { + cost, err := getCost(sccPath, dirPath, noLarge) if err != nil { return SCCReport{}, fmt.Errorf("error getting SCC report for '%s': %v", dirPath, err) } @@ -154,7 +154,7 @@ func getSCCReport(sccPath, dirPath string, numProcessors int) (SCCReport, error) projectPath := filepath.Base(dirPath) - langStats, err := getLanguageStats(sccPath, dirPath, numProcessors) + langStats, err := getLanguageStats(sccPath, dirPath, noLarge) if err != nil { return SCCReport{}, fmt.Errorf("error getting language stats for '%s': %v", dirPath, err) } @@ -198,8 +198,8 @@ func getGitRepositoryURL(dirPath string) (string, error) { } // getCost runs the scc command and parses the output to get the estimated cost. -func getCost(sccPathPath, repoPath string, numProcessors int) (float64, error) { - output, err := runSCC(sccPathPath, numProcessors, "--format=short", repoPath) +func getCost(sccPathPath, repoPath string, noLarge bool) (float64, error) { + output, err := runSCC(sccPathPath, noLarge, "--format=short", repoPath) if err != nil { return 0, fmt.Errorf("failed to run scc command: %w", err) } @@ -213,8 +213,8 @@ func getCost(sccPathPath, repoPath string, numProcessors int) (float64, error) { } // getLanguageStats runs the scc command and parses the output to get language statistics. -func getLanguageStats(sccPathPath, repoPath string, numProcessors int) ([]LanguageStats, error) { - output, err := runSCC(sccPathPath, numProcessors, "--format=json", repoPath) +func getLanguageStats(sccPathPath, repoPath string, noLarge bool) ([]LanguageStats, error) { + output, err := runSCC(sccPathPath, noLarge, "--format=json", repoPath) if err != nil { return nil, fmt.Errorf("failed to run scc command: %w", err) } @@ -228,15 +228,11 @@ func getLanguageStats(sccPathPath, repoPath string, numProcessors int) ([]Langua } // runSCC executes the scc command with the given arguments and returns the output. -// When numProcessors > 0, scc is run with reduced parallelism to limit memory usage on large repos. -func runSCC(sccPathPath string, numProcessors int, args ...string) (string, error) { +// When noLarge is true, files larger than 100MB are skipped to avoid OOM on large repos. +func runSCC(sccPathPath string, noLarge bool, args ...string) (string, error) { var cmdArgs []string - if numProcessors > 0 { - n := strconv.Itoa(numProcessors) - cmdArgs = append(cmdArgs, - "--directory-walker-job-workers", n, - "--file-process-job-workers", n, - ) + if noLarge { + cmdArgs = append(cmdArgs, "--no-large", "--large-file-limit", "100000000") } cmdArgs = append(cmdArgs, args...) cmd := exec.Command(sccPathPath, cmdArgs...) diff --git a/services/apps/git_integration/src/crowdgit/services/software_value/software_value_service.py b/services/apps/git_integration/src/crowdgit/services/software_value/software_value_service.py index 491d46f5e0..b5d9e23001 100644 --- a/services/apps/git_integration/src/crowdgit/services/software_value/software_value_service.py +++ b/services/apps/git_integration/src/crowdgit/services/software_value/software_value_service.py @@ -52,9 +52,9 @@ async def run(self, repo_id: str, repo_path: str) -> None: if repo_size >= _LARGE_REPO_THRESHOLD_BYTES: self.logger.info( f"Repo size {repo_size / (1024**3):.1f} GB exceeds threshold — " - "running scc with num-processors=1" + "running scc with no-large (skipping files >100MB)" ) - cmd += ["--num-processors", "1"] + cmd += ["--no-large"] cmd.append(repo_path) From 0bf3da1e5ae065894620848ada2593c5a8845398 Mon Sep 17 00:00:00 2001 From: Mouad BANI Date: Tue, 24 Mar 2026 17:57:41 +0100 Subject: [PATCH 6/7] fix: param typo Signed-off-by: Mouad BANI --- .../src/crowdgit/services/software_value/main.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/apps/git_integration/src/crowdgit/services/software_value/main.go b/services/apps/git_integration/src/crowdgit/services/software_value/main.go index 70b2de8572..ddfae22174 100644 --- a/services/apps/git_integration/src/crowdgit/services/software_value/main.go +++ b/services/apps/git_integration/src/crowdgit/services/software_value/main.go @@ -232,7 +232,7 @@ func getLanguageStats(sccPathPath, repoPath string, noLarge bool) ([]LanguageSta func runSCC(sccPathPath string, noLarge bool, args ...string) (string, error) { var cmdArgs []string if noLarge { - cmdArgs = append(cmdArgs, "--no-large", "--large-file-limit", "100000000") + cmdArgs = append(cmdArgs, "--no-large", "--large-byte-count", "100000000") } cmdArgs = append(cmdArgs, args...) cmd := exec.Command(sccPathPath, cmdArgs...) From 4a68cc76b0066ba350dc59b4754cb7f76e0d55f5 Mon Sep 17 00:00:00 2001 From: Mouad BANI Date: Tue, 24 Mar 2026 18:19:36 +0100 Subject: [PATCH 7/7] chore: undo dry-run and version change Signed-off-by: Mouad BANI --- .../docker/Dockerfile.git_integration | 2 +- .../crowdgit/services/software_value/main.go | 66 +++++++------------ .../git_integration/src/crowdgit/settings.py | 1 - 3 files changed, 26 insertions(+), 43 deletions(-) diff --git a/scripts/services/docker/Dockerfile.git_integration b/scripts/services/docker/Dockerfile.git_integration index ef15c18eef..84895879d1 100644 --- a/scripts/services/docker/Dockerfile.git_integration +++ b/scripts/services/docker/Dockerfile.git_integration @@ -7,7 +7,7 @@ FROM golang:1.25-alpine AS go-builder WORKDIR /go/src/software-value # Install scc using the official Go toolchain (specific version as per project README) -RUN go install github.com/boyter/scc/v3@v3.7.0 +RUN go install github.com/boyter/scc/v3@v3.5.0 # Copy Go module files COPY ./services/apps/git_integration/src/crowdgit/services/software_value/go.mod ./ diff --git a/services/apps/git_integration/src/crowdgit/services/software_value/main.go b/services/apps/git_integration/src/crowdgit/services/software_value/main.go index ddfae22174..0b2fba9229 100644 --- a/services/apps/git_integration/src/crowdgit/services/software_value/main.go +++ b/services/apps/git_integration/src/crowdgit/services/software_value/main.go @@ -56,25 +56,17 @@ func processRepository(noLarge bool) StandardResponse { // Process single repository (the target path argument) repoDir := config.TargetPath - dryRun := os.Getenv("IS_PROD_ENV") != "true" - - var insightsDb *InsightsDB - if !dryRun { - var dbErr error - insightsDb, dbErr = NewInsightsDB(ctx, config.InsightsDatabase) - if dbErr != nil { - errorCode := ErrorCodeDatabaseConnection - errorMessage := fmt.Sprintf("Error connecting to insights database: %v", dbErr) - return StandardResponse{ - Status: StatusFailure, - ErrorCode: &errorCode, - ErrorMessage: &errorMessage, - } + insightsDb, dbErr := NewInsightsDB(ctx, config.InsightsDatabase) + if dbErr != nil { + errorCode := ErrorCodeDatabaseConnection + errorMessage := fmt.Sprintf("Error connecting to insights database: %v", dbErr) + return StandardResponse{ + Status: StatusFailure, + ErrorCode: &errorCode, + ErrorMessage: &errorMessage, } - defer insightsDb.Close() - } else { - fmt.Println("[DRY RUN] Skipping database connection") } + defer insightsDb.Close() // Get git URL for the repository gitUrl, err := getGitRepositoryURL(repoDir) @@ -101,32 +93,24 @@ func processRepository(noLarge bool) StandardResponse { } report.Repository.URL = gitUrl - if dryRun { - fmt.Printf("[DRY RUN] Would save project cost: repo=%s cost=$%.2f\n", report.Repository.URL, report.Cocomo.CostInDollars) - fmt.Printf("[DRY RUN] Would save %d language stats entries\n", len(report.LanguageStats)) - for _, ls := range report.LanguageStats { - fmt.Printf("[DRY RUN] language=%s lines=%d code=%d\n", ls.LanguageName, ls.Lines, ls.Code) - } - } else { - // Save to database - if err := insightsDb.saveProjectCost(ctx, report.Repository, report.Cocomo.CostInDollars); err != nil { - errorCode := ErrorCodeDatabaseOperation - errorMessage := fmt.Sprintf("Error saving project cost: %v", err) - return StandardResponse{ - Status: StatusFailure, - ErrorCode: &errorCode, - ErrorMessage: &errorMessage, - } + // Save to database + if err := insightsDb.saveProjectCost(ctx, report.Repository, report.Cocomo.CostInDollars); err != nil { + errorCode := ErrorCodeDatabaseOperation + errorMessage := fmt.Sprintf("Error saving project cost: %v", err) + return StandardResponse{ + Status: StatusFailure, + ErrorCode: &errorCode, + ErrorMessage: &errorMessage, } + } - if err := insightsDb.saveLanguageStats(ctx, report.Repository, report.LanguageStats); err != nil { - errorCode := ErrorCodeDatabaseOperation - errorMessage := fmt.Sprintf("Error saving language stats: %v", err) - return StandardResponse{ - Status: StatusFailure, - ErrorCode: &errorCode, - ErrorMessage: &errorMessage, - } + if err := insightsDb.saveLanguageStats(ctx, report.Repository, report.LanguageStats); err != nil { + errorCode := ErrorCodeDatabaseOperation + errorMessage := fmt.Sprintf("Error saving language stats: %v", err) + return StandardResponse{ + Status: StatusFailure, + ErrorCode: &errorCode, + ErrorMessage: &errorMessage, } } diff --git a/services/apps/git_integration/src/crowdgit/settings.py b/services/apps/git_integration/src/crowdgit/settings.py index 1f0d6ae07f..447f2f3342 100644 --- a/services/apps/git_integration/src/crowdgit/settings.py +++ b/services/apps/git_integration/src/crowdgit/settings.py @@ -44,4 +44,3 @@ def load_env_var(key: str, required=True, default=None): STUCK_RECURRENT_REPO_TIMEOUT_HOURS = int( load_env_var("STUCK_RECURRENT_REPO_TIMEOUT_HOURS", default="4") ) -IS_PROD_ENV: bool = load_env_var("NODE_ENV", required=False) == "production"