HiveSQL: Age of active accounts
EN
After looking at the activity on Hive – how many accounts, how many posts, etc. – I wanted to know how old the active accounts are.
WITH ActivityPerUserMonth AS (
SELECT
c.author AS user_id,
YEAR(c.created) AS yr,
MONTH(c.created) AS mth,
COUNT(*) AS activity_count
FROM comments c
WHERE c.created >= ‘2023-01-01’
GROUP BY c.author, YEAR(c.created), MONTH(c.created)
)
SELECT
CONCAT(a.yr, ‘-’, RIGHT(‘0’ + CAST(a.mth AS VARCHAR(2)), 2)) AS Month,
CASE
WHEN DATEDIFF(month, acc.created, EOMONTH(DATEFROMPARTS(a.yr,a.mth,1))) < 1 THEN '≤1 month'
WHEN DATEDIFF(month, acc.created, EOMONTH(DATEFROMPARTS(a.yr,a.mth,1))) BETWEEN 1 AND 6 THEN ‘1–6 months’
WHEN DATEDIFF(month, acc.created, EOMONTH(DATEFROMPARTS(a.yr,a.mth,1))) BETWEEN 7 AND 12 THEN ‘6–12 months’
WHEN DATEDIFF(month, acc.created, EOMONTH(DATEFROMPARTS(a.yr,a.mth,1))) BETWEEN 13 AND 24 THEN '1–2 years'
WHEN DATEDIFF(month, acc.created, EOMONTH(DATEFROMPARTS(a.yr,a.mth,1))) BETWEEN 25 AND 36 THEN ‘2–3 years’
ELSE '>3 years'
END AS age_group,
SUM(CASE WHEN a.activity_count >= 1 THEN 1 ELSE 0 END) AS At_Least_1,
SUM(CASE WHEN a.activity_count >= 3 THEN 1 ELSE 0 END) AS At_Least_3,
SUM(CASE WHEN a.activity_count >= 10 THEN 1 ELSE 0 END) AS At_Least_10
FROM ActivityPerUserMonth a
JOIN Accounts acc ON acc.name = a.user_id
GROUP BY a.yr, a.mth,
CASE
WHEN DATEDIFF(month, acc.created, EOMONTH(DATEFROMPARTS(a.yr,a.mth,1))) < 1 THEN '≤1 month'
WHEN DATEDIFF(month, acc.created, EOMONTH(DATEFROMPARTS(a.yr,a.mth,1))) BETWEEN 1 AND 6 THEN ‘1–6 months’
WHEN DATEDIFF(month, acc.created, EOMONTH(DATEFROMPARTS(a.yr,a.mth,1))) BETWEEN 7 AND 12 THEN ‘6–12 months’
WHEN DATEDIFF(month, acc.created, EOMONTH(DATEFROMPARTS(a.yr,a.mth,1))) BETWEEN 13 AND 24 THEN '1–2 years'
WHEN DATEDIFF(month, acc.created, EOMONTH(DATEFROMPARTS(a.yr,a.mth,1))) BETWEEN 25 AND 36 THEN ‘2–3 years’
ELSE ‘>3 years’
END
ORDER BY a.yr, a.mth
I use the SQL query above for this. I have divided the age into 6 groups.
Younger than 1 month, 1-6 months, 6-12 months, 1-2 years, 2-3 years, and more than 3 years.
Again, in 3 activity levels with at least 1 post, at least 3 posts, and at least 10 posts.
Here is a graphical evaluation of the data, with the older accounts at the bottom and the younger accounts at the top.
at least 1 post
at least 3 posts
at least 10 posts
Translated with DeepL.com (free version)
DE
Nachdem ich geschaut habe, wie die Aktivität auf Hive ist - wie viele Accounts, wie viele Posts etc. - wollte ich nun wissen, wie alt sind denn die Accounts, die aktiv sind.
WITH ActivityPerUserMonth AS (
SELECT
c.author AS user_id,
YEAR(c.created) AS yr,
MONTH(c.created) AS mth,
COUNT(*) AS activity_count
FROM comments c
WHERE c.created >= '2023-01-01'
GROUP BY c.author, YEAR(c.created), MONTH(c.created)
)
SELECT
CONCAT(a.yr, '-', RIGHT('0' + CAST(a.mth AS VARCHAR(2)), 2)) AS Monat,
CASE
WHEN DATEDIFF(month, acc.created, EOMONTH(DATEFROMPARTS(a.yr,a.mth,1))) < 1 THEN '≤1 Monat'
WHEN DATEDIFF(month, acc.created, EOMONTH(DATEFROMPARTS(a.yr,a.mth,1))) BETWEEN 1 AND 6 THEN '1–6 Monate'
WHEN DATEDIFF(month, acc.created, EOMONTH(DATEFROMPARTS(a.yr,a.mth,1))) BETWEEN 7 AND 12 THEN '6–12 Monate'
WHEN DATEDIFF(month, acc.created, EOMONTH(DATEFROMPARTS(a.yr,a.mth,1))) BETWEEN 13 AND 24 THEN '1–2 Jahre'
WHEN DATEDIFF(month, acc.created, EOMONTH(DATEFROMPARTS(a.yr,a.mth,1))) BETWEEN 25 AND 36 THEN '2–3 Jahre'
ELSE '>3 Jahre'
END AS age_group,
SUM(CASE WHEN a.activity_count >= 1 THEN 1 ELSE 0 END) AS At_Least_1,
SUM(CASE WHEN a.activity_count >= 3 THEN 1 ELSE 0 END) AS At_Least_3,
SUM(CASE WHEN a.activity_count >= 10 THEN 1 ELSE 0 END) AS At_Least_10
FROM ActivityPerUserMonth a
JOIN Accounts acc ON acc.name = a.user_id
GROUP BY a.yr, a.mth,
CASE
WHEN DATEDIFF(month, acc.created, EOMONTH(DATEFROMPARTS(a.yr,a.mth,1))) < 1 THEN '≤1 Monat'
WHEN DATEDIFF(month, acc.created, EOMONTH(DATEFROMPARTS(a.yr,a.mth,1))) BETWEEN 1 AND 6 THEN '1–6 Monate'
WHEN DATEDIFF(month, acc.created, EOMONTH(DATEFROMPARTS(a.yr,a.mth,1))) BETWEEN 7 AND 12 THEN '6–12 Monate'
WHEN DATEDIFF(month, acc.created, EOMONTH(DATEFROMPARTS(a.yr,a.mth,1))) BETWEEN 13 AND 24 THEN '1–2 Jahre'
WHEN DATEDIFF(month, acc.created, EOMONTH(DATEFROMPARTS(a.yr,a.mth,1))) BETWEEN 25 AND 36 THEN '2–3 Jahre'
ELSE '>3 Jahre'
END
ORDER BY a.yr, a.mth
Dafür nutze ich die SQL Abfrage hier oben. Dabei habe ich das Alter in 6 Gruppen eingeordnet.
Jünger als 1 Monat, 1-6 Monate, 6-12 Monate, 1-2 Jahre, 2-3 Jahre und mehr als 3 Jahre.
Wieder in 3 Aktivitätsstufen mit mind. 1 Post, mind. 3 Post und mind. 10 Posts.
Hier zu den Daten eine grafische Auswertung, wobei die älteren Accounts unten sind und die jüngeren Accounts oben.
mindestens 1 Post
mindestens 3 Posts
mindestens 10 Posts
Posted Using INLEO
Also je mehr Posts desto älter die Accounts?
Äh nein,
Ich habs nur in drei Gruppen. Min 1, 3 oder 10 Posts/Kommentare im Monat gemacht, also wie aktiv sind die Accounts.
Und dann wie alt diese sind.
👏🏾👏🏾