Skip to content

Commit

Permalink
5-minute interval for visits
Browse files Browse the repository at this point in the history
  • Loading branch information
vladsavelyev committed Feb 21, 2024
1 parent 09650ad commit 380e6ac
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 13 deletions.
8 changes: 4 additions & 4 deletions app/db.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@

class VisitStats(SQLModel, table=True):
"""
Table to record per-minute visit summaries.
Table to record per-interval visit summaries.
All keys describing the platform are primary, so we have separate a usage record
coming from each source.
Expand Down Expand Up @@ -73,7 +73,7 @@ def get_visit_stats(
end: datetime.datetime | None = None,
limit: int | None = None,
) -> Sequence[VisitStats]:
"""Return list of per-minute visit summary from the DB."""
"""Return list of per-interval visit summary from the DB."""
with Session(engine) as session:
statement = select(VisitStats)
if start:
Expand Down Expand Up @@ -105,9 +105,9 @@ def get_download_stats(
return session.exec(statement).all()


def insert_usage_stats(minute_summary: pd.DataFrame):
def insert_usage_stats(visit_stats: pd.DataFrame):
with Session(engine) as session:
for index, row in minute_summary.iterrows():
for index, row in visit_stats.iterrows():
existing_entry = session.exec(
select(VisitStats).where(
VisitStats.start == row["start"]
Expand Down
18 changes: 9 additions & 9 deletions app/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,7 @@ async def persist_visits():
return _persist_visits()


def _summarize_visits() -> Response:
def _summarize_visits(interval="5min") -> Response:
"""
Summarize visits from the CSV file and write to the database
"""
Expand All @@ -179,30 +179,30 @@ def _summarize_visits() -> Response:
na_filter=False, # prevent empty strings from converting to nan or <NA>
)
df["start"] = pd.to_datetime(df["timestamp"])
df["end"] = df["start"] + pd.to_timedelta("1min")
df["end"] = df["start"] + pd.to_timedelta(interval)
df["start"] = df["start"].dt.strftime("%Y-%m-%d %H:%M")
df["end"] = df["end"].dt.strftime("%Y-%m-%d %H:%M")
df["ci_environment"] = df["ci_environment"].apply(lambda val: strtobool(val) if val else False)
df = df.drop(columns=["timestamp"])

# Summarize visits per user per minute
minute_summary = df.groupby(["start", "end"] + visit_fieldnames).size().reset_index(name="count")
if len(minute_summary) == 0:
# Summarize visits per user per time interval
interval_summary = df.groupby(["start", "end"] + visit_fieldnames).size().reset_index(name="count")
if len(interval_summary) == 0:
return PlainTextResponse(content="No new visits to summarize")

logger.info(f"Summarizing {len(df)} visits in {CSV_FILE_PATH} and writing {len(minute_summary)} rows to the DB")
logger.info(f"Summarizing {len(df)} visits in {CSV_FILE_PATH} and writing {len(interval_summary)} rows to the DB")
try:
db.insert_usage_stats(minute_summary)
db.insert_usage_stats(interval_summary)
except Exception as e:
return PlainTextResponse(
status_code=http.HTTPStatus.INTERNAL_SERVER_ERROR,
content=f"Failed to write to the database: {e}",
)
else:
logger.info(f"Successfully wrote {len(minute_summary)} rows to the DB")
logger.info(f"Successfully wrote {len(interval_summary)} rows to the DB")
open(CSV_FILE_PATH, "w").close() # Clear the CSV file on successful write
return PlainTextResponse(
content=f"Successfully summarized {len(df)} visits to {len(minute_summary)} per-minute entries",
content=f"Successfully summarized {len(df)} visits to {len(interval_summary)} per-interval entries",
)


Expand Down

0 comments on commit 380e6ac

Please sign in to comment.