Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions .env.example
Original file line number Diff line number Diff line change
Expand Up @@ -60,3 +60,11 @@ S3_ACCESS_KEY_ID=your_s3_access_key_id_here
S3_SECRET_ACCESS_KEY=your_s3_secret_access_key_here
S3_BUCKET=your_s3_bucket_name_here
S3_ENDPOINT=https://<ACCOUNT_ID>.r2.cloudflarestorage.com

# ClickHouse database
CLICKHOUSE_HOST=clickhouse
CLICKHOUSE_PORT=8123
CLICKHOUSE_SSL=false
CLICKHOUSE_DATABASE=hackatime_development
CLICKHOUSE_USERNAME=default
CLICKHOUSE_PASSWORD=
17 changes: 17 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,13 @@ jobs:
POSTGRES_USER: postgres
POSTGRES_PASSWORD: postgres
POSTGRES_DB: app_test
clickhouse:
image: clickhouse/clickhouse-server:latest
ports:
- 8123:8123
env:
CLICKHOUSE_DEFAULT_ACCESS_MANAGEMENT: 1
options: --health-cmd "wget --no-verbose --tries=1 --spider http://localhost:8123/ping || exit 1" --health-interval 10s --health-timeout 5s --health-retries 5

steps:
- name: Checkout code
Expand All @@ -130,6 +137,7 @@ jobs:
TEST_DATABASE_URL: postgres://postgres:postgres@localhost:5432/app_test
PGHOST: localhost
PGUSER: postgres
CLICKHOUSE_HOST: localhost
PGPASSWORD: postgres
run: |
bin/rails db:create RAILS_ENV=test
Expand All @@ -142,6 +150,7 @@ jobs:
TEST_DATABASE_URL: postgres://postgres:postgres@localhost:5432/app_test
PGHOST: localhost
PGUSER: postgres
CLICKHOUSE_HOST: localhost
PGPASSWORD: postgres
run: |
bin/rails rswag:specs:swaggerize
Expand All @@ -160,6 +169,13 @@ jobs:
POSTGRES_USER: postgres
POSTGRES_PASSWORD: postgres
POSTGRES_DB: app_test
clickhouse:
image: clickhouse/clickhouse-server:latest
ports:
- 8123:8123
env:
CLICKHOUSE_DEFAULT_ACCESS_MANAGEMENT: 1
options: --health-cmd "wget --no-verbose --tries=1 --spider http://localhost:8123/ping || exit 1" --health-interval 10s --health-timeout 5s --health-retries 5

steps:
- name: Checkout code
Expand Down Expand Up @@ -200,6 +216,7 @@ jobs:
TEST_DATABASE_URL: postgres://postgres:postgres@localhost:5432/app_test
PGHOST: localhost
PGUSER: postgres
CLICKHOUSE_HOST: localhost
PGPASSWORD: postgres
CHROME_BIN: ${{ steps.setup-chrome.outputs.chrome-path }}
CHROMEDRIVER_BIN: ${{ steps.setup-chrome.outputs.chromedriver-path }}
Expand Down
3 changes: 3 additions & 0 deletions DEVELOPMENT.md
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,8 @@ HCA_CLIENT_ID=<hca_client_id>
HCA_CLIENT_SECRET=<hca_client_secret>
```

ClickHouse is automatically started by Docker Compose alongside Postgres — no extra setup needed.

Start the containers:

```sh
Expand All @@ -51,6 +53,7 @@ We'll now setup the database. In your container shell, run the following:

```bash
app# bin/rails db:create db:schema:load db:seed
app# bin/rails db:migrate:clickhouse
```

Run the Vite build with SSR (server-side-rendering):
Expand Down
2 changes: 2 additions & 0 deletions Gemfile
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ gem "propshaft"
gem "sqlite3", ">= 2.1"
# Use PostgreSQL as the database for Wakatime
gem "pg"
# Use ClickHouse for analytics
gem "clickhouse-activerecord", github: "daisychainapp/clickhouse-activerecord"
# Use the Puma web server [https://github.com/puma/puma]
gem "puma", ">= 5.0"
# Use JavaScript with ESM import maps [https://github.com/rails/importmap-rails]
Expand Down
9 changes: 9 additions & 0 deletions Gemfile.lock
Original file line number Diff line number Diff line change
@@ -1,3 +1,11 @@
GIT
remote: https://github.com/daisychainapp/clickhouse-activerecord.git
revision: 2eb937bc36d327ff4f2f27f1da50cc151a7f6af5
specs:
clickhouse-activerecord (1.6.6)
activerecord (>= 7.1, < 9.0)
bundler (>= 1.13.4)

GEM
remote: https://rubygems.org/
specs:
Expand Down Expand Up @@ -652,6 +660,7 @@ DEPENDENCIES
brakeman
bullet
capybara
clickhouse-activerecord!
cloudflare-rails
countries
debug
Expand Down
108 changes: 45 additions & 63 deletions app/controllers/api/admin/v1/admin_controller.rb
Original file line number Diff line number Diff line change
Expand Up @@ -51,71 +51,77 @@ def visualization_quantized
quantized_query = <<-SQL
WITH base_heartbeats AS (
SELECT
"time",
time,
lineno,
cursorpos,
date_trunc('day', to_timestamp("time")) as day_start
toDate(toDateTime(toUInt32(time))) as day_start
FROM heartbeats
WHERE user_id = ?
AND "time" >= ? AND "time" <= ?
AND (lineno IS NOT NULL OR cursorpos IS NOT NULL)
AND time >= ? AND time <= ?
AND (lineno > 0 OR cursorpos > 0)
LIMIT 1000000
),
daily_stats AS (
SELECT
*,
GREATEST(1, MAX(lineno) OVER (PARTITION BY day_start)) as max_lineno,
GREATEST(1, MAX(cursorpos) OVER (PARTITION BY day_start)) as max_cursorpos
greatest(1, max(lineno) OVER (PARTITION BY day_start)) as max_lineno,
greatest(1, max(cursorpos) OVER (PARTITION BY day_start)) as max_cursorpos
FROM base_heartbeats
),
quantized_heartbeats AS (
SELECT
*,
ROUND(2 + (("time" - extract(epoch from day_start)) / 86400) * (396)) as qx,
ROUND(2 + (1 - CAST(lineno AS decimal) / max_lineno) * (96)) as qy_lineno,
ROUND(2 + (1 - CAST(cursorpos AS decimal) / max_cursorpos) * (96)) as qy_cursorpos
round(2 + ((time - toUInt32(toDateTime(day_start))) / 86400) * (396)) as qx,
round(2 + (1 - CAST(lineno AS Float64) / max_lineno) * (96)) as qy_lineno,
round(2 + (1 - CAST(cursorpos AS Float64) / max_cursorpos) * (96)) as qy_cursorpos
FROM daily_stats
)
SELECT "time", lineno, cursorpos
SELECT time, lineno, cursorpos
FROM (
SELECT DISTINCT ON (day_start, qx, qy_lineno) "time", lineno, cursorpos
SELECT
any(time) AS time,
any(lineno) AS lineno,
any(cursorpos) AS cursorpos
FROM quantized_heartbeats
WHERE lineno IS NOT NULL
ORDER BY day_start, qx, qy_lineno, "time" ASC
WHERE lineno > 0
GROUP BY day_start, qx, qy_lineno
) AS lineno_pixels
UNION
SELECT "time", lineno, cursorpos
UNION ALL
SELECT time, lineno, cursorpos
FROM (
SELECT DISTINCT ON (day_start, qx, qy_cursorpos) "time", lineno, cursorpos
SELECT
any(time) AS time,
any(lineno) AS lineno,
any(cursorpos) AS cursorpos
FROM quantized_heartbeats
WHERE cursorpos IS NOT NULL
ORDER BY day_start, qx, qy_cursorpos, "time" ASC
WHERE cursorpos > 0
GROUP BY day_start, qx, qy_cursorpos
) AS cursorpos_pixels
ORDER BY "time" ASC
ORDER BY time ASC
SQL

daily_totals_query = <<-SQL
WITH heartbeats_with_gaps AS (
SELECT
date_trunc('day', to_timestamp("time"))::date as day,
"time" - LAG("time", 1, "time") OVER (PARTITION BY date_trunc('day', to_timestamp("time")) ORDER BY "time") as gap
toDate(toDateTime(toUInt32(time))) as day,
time - lagInFrame(time, 1, time) OVER (PARTITION BY toDate(toDateTime(toUInt32(time))) ORDER BY time) as gap
FROM heartbeats
WHERE user_id = ? AND time >= ? AND time <= ?
)
SELECT
day,
SUM(LEAST(gap, 120)) as total_seconds
SUM(least(gap, 120)) as total_seconds
FROM heartbeats_with_gaps
WHERE gap IS NOT NULL
GROUP BY day
SQL

quantized_result = ActiveRecord::Base.connection.execute(
ActiveRecord::Base.sanitize_sql([ quantized_query, user.id, start_epoch, end_epoch ])
quantized_result = Heartbeat.connection.select_all(
Heartbeat.sanitize_sql([ quantized_query, user.id, start_epoch, end_epoch ])
)

daily_totals_result = ActiveRecord::Base.connection.execute(
ActiveRecord::Base.sanitize_sql([ daily_totals_query, user.id, start_epoch, end_epoch ])
daily_totals_result = Heartbeat.connection.select_all(
Heartbeat.sanitize_sql([ daily_totals_query, user.id, start_epoch, end_epoch ])
)

daily_totals = daily_totals_result.each_with_object({}) do |row, hash|
Expand Down Expand Up @@ -197,8 +203,8 @@ def alt_candidates
LIMIT 5000
SQL

result = ActiveRecord::Base.connection.exec_query(
ActiveRecord::Base.sanitize_sql([ query, cutoff, cutoff ])
result = Heartbeat.connection.select_all(
Heartbeat.sanitize_sql([ query, cutoff, cutoff ])
)

render json: { candidates: result.to_a }
Expand All @@ -210,44 +216,20 @@ def shared_machines

query = <<-SQL
SELECT
sms.machine,
sms.machine_frequency,
ARRAY_AGG(DISTINCT u.id) AS user_ids
FROM
(
SELECT
machine,
COUNT(user_id) AS machine_frequency,
ARRAY_AGG(user_id) AS user_ids
FROM
(
SELECT DISTINCT
machine,
user_id
FROM
heartbeats
WHERE
machine IS NOT NULL
AND time > ?
) AS UserMachines
GROUP BY
machine
HAVING
COUNT(user_id) > 1
) AS sms,
LATERAL UNNEST(sms.user_ids) AS user_id_from_array
JOIN
users AS u ON u.id = user_id_from_array
GROUP BY
sms.machine,
sms.machine_frequency
ORDER BY
sms.machine_frequency DESC
machine,
uniq(user_id) AS machine_frequency,
groupUniqArray(user_id) AS user_ids
FROM heartbeats
WHERE machine != '' AND machine IS NOT NULL
AND time > ?
GROUP BY machine
HAVING uniq(user_id) > 1
ORDER BY machine_frequency DESC
LIMIT 5000
SQL

result = ActiveRecord::Base.connection.exec_query(
ActiveRecord::Base.sanitize_sql([ query, cutoff ])
result = Heartbeat.connection.select_all(
Heartbeat.sanitize_sql([ query, cutoff ])
)

render json: { machines: result.to_a }
Expand Down
52 changes: 33 additions & 19 deletions app/controllers/api/hackatime/v1/hackatime_controller.rb
Original file line number Diff line number Diff line change
Expand Up @@ -98,13 +98,11 @@ def stats_last_7_days
# Calculate total seconds
total_seconds = heartbeats.duration_seconds.to_i

# Get unique days
days = []
heartbeats.pluck(:time).each do |timestamp|
day = Time.at(timestamp).in_time_zone(@user.timezone).to_date
days << day unless days.include?(day)
end
days_covered = days.length
# Get unique days using ClickHouse aggregation instead of loading all timestamps
tz_quoted = Heartbeat.connection.quote(@user.timezone)
days_covered = Heartbeat.connection.select_value(
"SELECT uniq(toDate(toDateTime(toUInt32(time), #{tz_quoted}))) FROM (#{heartbeats.with_valid_timestamps.to_sql})"
).to_i

# Calculate daily average
daily_average = days_covered > 0 ? (total_seconds.to_f / days_covered).round(1) : 0
Expand Down Expand Up @@ -232,9 +230,34 @@ def body_to_json
LAST_LANGUAGE_SENTINEL = "<<LAST_LANGUAGE>>"

def handle_heartbeat(heartbeat_array)
results = []
prepared = prepare_heartbeat_attrs(heartbeat_array)

now = Time.current
base_us = (now.to_r * 1_000_000).to_i
records = prepared.each_with_index.map do |item, i|
attrs = item[:attrs]
attrs[:id] = ((base_us + i) << Heartbeat::CLICKHOUSE_ID_RANDOM_BITS) | SecureRandom.random_number(Heartbeat::CLICKHOUSE_ID_RANDOM_MAX)
attrs[:created_at] = now
attrs[:updated_at] = now
attrs
end

begin
Heartbeat.insert_all(records) if records.any?
rescue => e
report_error(e, message: "Error bulk inserting heartbeats")
return records.map { |r| [ { error: e.message, type: e.class.name }, 422 ] }
end

records.each { |r| queue_project_mapping(r[:project]) }
HeartbeatCacheInvalidator.bump_for(@user.id) if records.any?
PosthogService.capture_once_per_day(@user, "heartbeat_sent", { heartbeat_count: heartbeat_array.size })
records.map { |r| [ r, 201 ] }
end

def prepare_heartbeat_attrs(heartbeat_array)
last_language = nil
heartbeat_array.each do |heartbeat|
heartbeat_array.filter_map do |heartbeat|
heartbeat = heartbeat.to_h.with_indifferent_access
source_type = :direct_entry

Expand Down Expand Up @@ -282,17 +305,8 @@ def handle_heartbeat(heartbeat_array)
}).slice(*Heartbeat.column_names.map(&:to_sym))
# ^^ They say safety laws are written in blood. Well, so is this line!
# Basically this filters out columns that aren't in our DB (the biggest one being raw_data)
new_heartbeat = Heartbeat.find_or_create_by(attrs)

queue_project_mapping(heartbeat[:project])
results << [ new_heartbeat.attributes, 201 ]
rescue => e
report_error(e, message: "Error creating heartbeat")
results << [ { error: e.message, type: e.class.name }, 422 ]
{ attrs: attrs, source_type: source_type }
end

PosthogService.capture_once_per_day(@user, "heartbeat_sent", { heartbeat_count: heartbeat_array.size })
results
end

def queue_project_mapping(project_name)
Expand Down
2 changes: 1 addition & 1 deletion app/controllers/api/v1/authenticated/hours_controller.rb
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ def index
end_date = params[:end_date]&.to_date || Date.current

total_seconds = current_user.heartbeats
.where(created_at: start_date.beginning_of_day..end_date.end_of_day)
.where(time: start_date.beginning_of_day.to_f..end_date.end_of_day.to_f)
.duration_seconds

render json: {
Expand Down
Loading
Loading