Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions DEPLOYMENT.md
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,8 @@ docker push equalitie/baskervillehall:dev
docker buildx build --platform linux/amd64 -f ./Dockerfile.session-simple . -t equalitie/baskervillehall:session2
docker push equalitie/baskervillehall:session2

docker buildx build --platform linux/amd64 -f ./Dockerfile.session-simple . -t equalitie/baskervillehall:session_dev
docker push equalitie/baskervillehall:session_dev
```

### Building predict image
Expand Down Expand Up @@ -115,6 +117,15 @@ kubectl apply -f deployment/postgres/postgres-baskervillehall-service.yaml

kubectl port-forward service/postgres-baskervillehall 5433:5432

### Postgres deployment dev
kubectl apply -f deployment/postgres_dev/postgres-baskervillehall-dev-secret.yaml
kubectl apply -f deployment/postgres_dev/postgres-baskervillehall-dev-pv.yaml
kubectl apply -f deployment/postgres_dev/postgres-baskervillehall-dev-pvc.yaml
kubectl apply -f deployment/postgres_dev/postgres-baskervillehall-dev.yaml
kubectl apply -f deployment/postgres_dev/postgres-baskervillehall-dev-service.yaml

kubectl port-forward service/postgres-baskervillehall-dev 5433:5432

### Monitoring

```commandline
Expand Down
4 changes: 2 additions & 2 deletions config_baskervillehall_ch.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -53,15 +53,15 @@ data:
NUM_OFFENCES_FOR_DIFFICULT_CHALLENGE: "5"
MAXSIZE_PENDING: "10000000"
USE_SHAPLEY: "True"
POSTGRES_HOST: "postgres-database-service.baskerville-server-database-production"
POSTGRES_HOST: "postgres-baskervillehall-dev.default"
POSTGRES_PORT: "5432"
POSTGRES_REFRESH_PERIOD_IN_SECONDS: "180"
SENSITIVITY_FACTOR: "0.05"
TTL_RECORDS_DAYS: "7"
NUM_REQUESTS_IN_STORAGE: "20"
SQL_TABLE_SESSIONS: "sessions"
SQL_TABLE_COMMANDS: "challenge_command_history"
AUTOCREATE_HOSTNAME_ID: "False"
AUTOCREATE_HOSTNAME_ID: "True"
MAX_SESSIONS_FOR_IP: "10"
BOT_SCORE_THRESHOLD: "0.5"
BAD_BOT_CHALLENGE: "True"
Expand Down
2 changes: 1 addition & 1 deletion deployment/postgres/create_schema.sql
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ CREATE TABLE public.sessions (
ALTER TABLE public.sessions ADD CONSTRAINT sessions_hostname_id_fkey FOREIGN KEY (hostname_id) REFERENCES public.hostname(hostname_id) ON DELETE CASCADE;
CREATE INDEX sessions_index ON sessions (session_end, host_name);

DROP TABLE public.challenge_command_history;
-- DROP TABLE public.challenge_command_history;

CREATE TABLE public.challenge_command_history (
challenge_command_id uuid DEFAULT uuid_generate_v4() NOT NULL,
Expand Down
84 changes: 84 additions & 0 deletions deployment/postgres/dashboard.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
WITH hours AS (
SELECT generate_series(
date_trunc('hour', now() AT TIME ZONE 'UTC') - interval '23 hours',
date_trunc('hour', now() AT TIME ZONE 'UTC'),
interval '1 hour'
) AS bucket_hour
)
SELECT
h.bucket_hour AS "time", -- UTC
COALESCE(c.challenged_ips, 0) AS challenged,
COALESCE(p.passed_ips, 0) AS passed
FROM hours h
LEFT JOIN public.dashboard_challenged_1h c
ON c.bucket_hour = h.bucket_hour
LEFT JOIN public.dashboard_passed_1h p
ON p.bucket_hour = h.bucket_hour
ORDER BY h.bucket_hour;

-- precision timeseries 24h
WITH hours AS (
SELECT generate_series(
date_trunc('hour', now() AT TIME ZONE 'UTC') - interval '23 hours',
date_trunc('hour', now() AT TIME ZONE 'UTC'),
interval '1 hour'
) AS bucket_hour
)
SELECT
h.bucket_hour AS "time", -- UTC
COALESCE(p.precision_pct, 0)::DOUBLE PRECISION AS precision_pct
FROM hours h
LEFT JOIN public.dashboard_precision_1h p
ON p.bucket_hour = h.bucket_hour
ORDER BY h.bucket_hour;



-- average precision 24h
SELECT
ROUND(
SUM(precision_pct * total_ips) / NULLIF(SUM(total_ips), 0),
1
)::DOUBLE PRECISION AS weighted_avg_precision_24h
FROM public.dashboard_precision_1h
WHERE
bucket_hour >= date_trunc('hour', now() AT TIME ZONE 'UTC') - interval '23 hours';



-- human vs automated last 24h
WITH hours AS (
SELECT generate_series(
date_trunc('hour', now() AT TIME ZONE 'UTC') - interval '23 hours',
date_trunc('hour', now() AT TIME ZONE 'UTC'),
interval '1 hour'
) AS bucket_hour
),
agg AS (
SELECT
bucket_hour,
COALESCE(SUM(cnt) FILTER (WHERE human_label = 'human'), 0) AS human,
COALESCE(SUM(cnt) FILTER (WHERE human_label = 'bot'), 0) AS bot
FROM public.dashboard_human_bot_1h
WHERE
bucket_hour >= date_trunc('hour', now() AT TIME ZONE 'UTC') - interval '23 hours'
GROUP BY bucket_hour
)
SELECT
h.bucket_hour AS "time", -- UTC
COALESCE(a.human, 0) AS human,
COALESCE(a.bot, 0) AS bot,
ROUND(
100.0 * COALESCE(a.human, 0)
/ NULLIF(COALESCE(a.human, 0) + COALESCE(a.bot, 0), 0),
1
)::DOUBLE PRECISION AS human_percenatage,
ROUND(
100.0 * COALESCE(a.bot, 0)
/ NULLIF(COALESCE(a.human, 0) + COALESCE(a.bot, 0), 0),
1
)::DOUBLE PRECISION AS bot_percentage
FROM hours h
LEFT JOIN agg a
ON a.bucket_hour = h.bucket_hour
ORDER BY h.bucket_hour;
2 changes: 1 addition & 1 deletion deployment/postgres/postgres-baskervillehall-secret.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,4 @@ metadata:
type: Opaque
stringData:
user: postgres
password: zJmh93FfhMvX6tDRNEoLuu97
password:
1 change: 1 addition & 0 deletions deployment/postgres/postgres-lb.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ spec:
- 134.122.32.231/32
- 147.182.146.235/32
- 212.105.155.18/32
- 91.65.62.64/32

selector:
app: postgres-baskervillehall
Expand Down
159 changes: 159 additions & 0 deletions deployment/postgres_dev/create_schema.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,159 @@

CREATE EXTENSION IF NOT EXISTS "uuid-ossp";

-- DROP TABLE public.hostname;

CREATE TABLE public.hostname (
hostname_id uuid DEFAULT uuid_generate_v4() NOT NULL,
hostname text NOT NULL,
created_at timestamp NOT NULL,
updated_at timestamp NOT NULL,
updated_by text NOT NULL,
CONSTRAINT hostname_hostname_key UNIQUE (hostname),
CONSTRAINT hostname_pkey PRIMARY KEY (hostname_id)
);
CREATE INDEX hostname_index ON public.hostname USING btree (hostname);

-- DROP TABLE public.sessions;

CREATE TABLE public.sessions (
session_id uuid DEFAULT uuid_generate_v4() NOT NULL,
hostname_id uuid NOT NULL,
host_name text NOT NULL,
ip text NOT NULL,
session_cookie text NOT NULL,
ip_cookie text NOT NULL,
primary_session int4 DEFAULT 0 NULL,
human int4 DEFAULT 0 NULL,

ua_score float8 DEFAULT 0 NULL,
verified_bot int4 DEFAULT 0 NULL,
num_languages int4 DEFAULT 0 NULL,
valid_browser_ciphers int4 DEFAULT 0 NULL,
cipher text,
ciphers text,
asn text,
asn_name text,
is_scraper int4 DEFAULT 0 NULL,

vpn int4 DEFAULT 0 NULL,
class text,
passed_challenge int4 DEFAULT 0 NULL,
fingerprints text NULL,
user_agent text NULL,
country text NULL,
continent text NULL,
datacenter text NULL,
hits int4 DEFAULT 0 NOT NULL,
hit_rate int4 DEFAULT 0 NULL,
num_user_agent int4 DEFAULT 1 NULL,
duration float8 DEFAULT 0 NOT NULL,
session_start timestamp NOT NULL,
session_end timestamp NOT NULL,
requests text NULL,
bot_score float8 DEFAULT -1.0 NOT NULL,
bot_score_top_factor text NULL,

scraper_name text,
created_at timestamp DEFAULT CURRENT_TIMESTAMP NOT NULL,
CONSTRAINT sessions_key PRIMARY KEY (session_id)
);
ALTER TABLE public.sessions ADD CONSTRAINT sessions_hostname_id_fkey FOREIGN KEY (hostname_id) REFERENCES public.hostname(hostname_id) ON DELETE CASCADE;
CREATE INDEX sessions_index ON sessions (session_end, host_name);

-- DROP TABLE public.challenge_command_history;

CREATE TABLE public.challenge_command_history (
challenge_command_id uuid DEFAULT uuid_generate_v4() NOT NULL,
hostname_id uuid NOT NULL,
command_type_name text DEFAULT ''::text NOT NULL,
ip_address inet NOT NULL,
session_cookie text DEFAULT ''::text NOT NULL,
"source" text DEFAULT ''::text NOT NULL,
created_at timestamp DEFAULT CURRENT_TIMESTAMP NOT NULL,
updated_at timestamp DEFAULT CURRENT_TIMESTAMP NOT NULL,
updated_by text NOT NULL,
duration float8 DEFAULT 0.0 NOT NULL,
request_count int4 DEFAULT 0 NOT NULL,



host_name text NOT NULL,
ip_cookie text NOT NULL,
primary_session int4 DEFAULT 0 NULL,
human int4 DEFAULT 0 NULL,
passed_challenge int4 DEFAULT 0 NULL,
bot_score float8 DEFAULT -1.0 NOT NULL,
bot_score_top_factor text NULL,
user_agent text NULL,
country text NULL,
continent text NULL,
datacenter text NULL,
shapley_if text NULL,
shapley_feature_if text NULL,
shapley_ae text NULL,
shapley_feature_ae text NULL,
difficulty int4 DEFAULT 0 NULL,
hits int4 DEFAULT 0 NOT NULL,
hit_rate int4 DEFAULT 0 NULL,
num_user_agent int4 DEFAULT 1 NULL,
session_start timestamp NOT NULL,
session_end timestamp NOT NULL,
requests text NULL,
meta text DEFAULT ''::text NOT NULL,
score_if float8 DEFAULT 0.0 NOT NULL,
score_ae float8 DEFAULT 0.0 NOT NULL,
threshold_ae float8 DEFAULT 0.0 NOT NULL,
scraper_name text,
prediction_if int4 DEFAULT 0 NULL,
prediction_ae int4 DEFAULT 0 NULL,
cloudflare_score int4 DEFAULT 0 NULL,
baskerville_score int4 DEFAULT 0 NULL,

baskerville_score_1 int4 DEFAULT 0 NULL,
baskerville_score_2 int4 DEFAULT 0 NULL,
baskerville_score_3 int4 DEFAULT 0 NULL,
baskerville_score_4 int4 DEFAULT 0 NULL,
CONSTRAINT challenge_command_history_pkey PRIMARY KEY (challenge_command_id)
);
CREATE INDEX idx_hostname_command_type_to_command_history ON public.challenge_command_history USING btree (hostname_id, command_type_name);
CREATE INDEX commands_index ON challenge_command_history (session_end, host_name);

CREATE INDEX idx_challenge_ip_created
ON challenge_command_history (ip_address, created_at);

-- public.challenge_command_history foreign keys

ALTER TABLE public.challenge_command_history ADD CONSTRAINT challenge_command_history_hostname_id_fkey FOREIGN KEY (hostname_id) REFERENCES public.hostname(hostname_id) ON DELETE CASCADE;



CREATE TABLE IF NOT EXISTS public.dashboard_challenged_1h (
bucket_hour timestamptz PRIMARY KEY,
challenged_ips bigint NOT NULL,
updated_at timestamptz NOT NULL DEFAULT now()
);

CREATE TABLE IF NOT EXISTS public.dashboard_passed_1h (
bucket_hour timestamptz PRIMARY KEY,
passed_ips bigint NOT NULL,
updated_at timestamptz NOT NULL DEFAULT now()
);

CREATE TABLE IF NOT EXISTS public.dashboard_precision_1h (
bucket_hour timestamptz PRIMARY KEY,
total_ips bigint NOT NULL,
passed_ips bigint NOT NULL,
precision_pct numeric(5,1) NOT NULL,
updated_at timestamptz NOT NULL DEFAULT now()
);

CREATE TABLE IF NOT EXISTS public.dashboard_human_bot_1h (
bucket_hour timestamptz NOT NULL,
human_label text NOT NULL,
cnt bigint NOT NULL,
updated_at timestamptz NOT NULL DEFAULT now(),
PRIMARY KEY (bucket_hour, human_label)
);


14 changes: 14 additions & 0 deletions deployment/postgres_dev/postgres-baskervillehall-dev-pv.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
apiVersion: v1
kind: PersistentVolume
metadata:
name: postgres-baskervillehall-dev-pv
labels:
type: local
spec:
storageClassName: csi-expandable
capacity:
storage: 30Gi
accessModes:
- ReadWriteOnce
hostPath:
path: "/mnt/data"
10 changes: 10 additions & 0 deletions deployment/postgres_dev/postgres-baskervillehall-dev-pvc.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: postgres-baskervillehall-dev-pvc
spec:
storageClassName: retain-storage
accessModes: ["ReadWriteOnce"]
resources:
requests:
storage: 30Gi
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
apiVersion: v1
kind: Secret
metadata:
name: postgres-baskervillehall-dev-secret
type: Opaque
stringData:
user: postgres
password: zJmh93FfhMvX6tDRNEoLuu97
database: baskerville
13 changes: 13 additions & 0 deletions deployment/postgres_dev/postgres-baskervillehall-dev-service.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
apiVersion: v1
kind: Service
metadata:
name: postgres-baskervillehall-dev
labels:
app: postgres-baskervillehall-dev
spec:
selector:
app: postgres-baskervillehall-dev
type: ClusterIP
ports:
- port: 5432
targetPort: 5432
Loading