Skip to content

Commit b393079

Browse files
committed
feat(toggl): convert notebook to reusable module func
1 parent cc8a3dc commit b393079

File tree

3 files changed

+229
-0
lines changed

3 files changed

+229
-0
lines changed

compiler_admin/services/files.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
import json
2+
3+
import pandas as pd
4+
5+
6+
def read_csv(file_path, **kwargs) -> pd.DataFrame:
7+
"""Read a file path or buffer of CSV data into a pandas.DataFrame."""
8+
return pd.read_csv(file_path, **kwargs)
9+
10+
11+
def read_json(file_path: str):
12+
"""Read a file path of JSON data into a python object."""
13+
with open(file_path, "r") as f:
14+
return json.load(f)
15+
16+
17+
def write_csv(file_path, data: pd.DataFrame, columns: list[str] = None):
18+
"""Write a pandas.DataFrame as CSV to the given path or buffer, with an optional list of columns to write."""
19+
data.to_csv(file_path, columns=columns, index=False)
20+
21+
22+
def write_json(file_path: str, data):
23+
"""Write a python object as JSON to the given path."""
24+
with open(file_path, "w") as f:
25+
json.dump(data, f, indent=2)

compiler_admin/services/toggl.py

Lines changed: 132 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,132 @@
1+
import os
2+
import sys
3+
from typing import TextIO
4+
5+
import pandas as pd
6+
7+
from compiler_admin.services.google import user_info as google_user_info
8+
import compiler_admin.services.files as files
9+
10+
# cache of previously seen project information, keyed on Toggl project name
11+
PROJECT_INFO = {}
12+
13+
# cache of previously seen user information, keyed on email
14+
USER_INFO = {}
15+
NOT_FOUND = "NOT FOUND"
16+
17+
# input CSV columns needed for conversion
18+
INPUT_COLUMNS = ["Email", "Task", "Client", "Start date", "Start time", "Duration", "Description"]
19+
20+
# default output CSV columns
21+
OUTPUT_COLUMNS = ["Date", "Client", "Project", "Task", "Notes", "Hours", "First Name", "Last Name"]
22+
23+
24+
def _harvest_client_name():
25+
"""Gets the value of the HARVEST_CLIENT_NAME env var."""
26+
return os.environ.get("HARVEST_CLIENT_NAME")
27+
28+
29+
def _get_info(obj: dict, key: str, env_key: str):
30+
"""Read key from obj, populating obj once from a file path at env_key."""
31+
if obj == {}:
32+
file_path = os.environ.get(env_key)
33+
if file_path:
34+
file_info = files.read_json(file_path)
35+
obj.update(file_info)
36+
return obj.get(key)
37+
38+
39+
def _toggl_project_info(project: str):
40+
"""Return the cached project for the given project key."""
41+
return _get_info(PROJECT_INFO, project, "TOGGL_PROJECT_INFO")
42+
43+
44+
def _toggl_user_info(email: str):
45+
"""Return the cached user for the given email."""
46+
return _get_info(USER_INFO, email, "TOGGL_USER_INFO")
47+
48+
49+
def _get_first_name(email: str) -> str:
50+
"""Get cached first name or derive from email."""
51+
user = _toggl_user_info(email)
52+
first_name = user.get("First Name") if user else None
53+
if first_name is None:
54+
parts = email.split("@")
55+
first_name = parts[0].capitalize()
56+
data = {"First Name": first_name}
57+
if email in USER_INFO:
58+
USER_INFO[email].update(data)
59+
else:
60+
USER_INFO[email] = data
61+
return first_name
62+
63+
64+
def _get_last_name(email: str):
65+
"""Get cached last name or query from Google."""
66+
user = _toggl_user_info(email)
67+
last_name = user.get("Last Name") if user else None
68+
if last_name is None:
69+
user = google_user_info(email)
70+
last_name = user.get("Last Name") if user else None
71+
if email in USER_INFO:
72+
USER_INFO[email].update(user)
73+
else:
74+
USER_INFO[email] = user
75+
return last_name
76+
77+
78+
def _str_timedelta(td):
79+
"""Convert a string formatted duration (e.g. 01:30) to a timedelta."""
80+
return pd.to_timedelta(pd.to_datetime(td, format="%H:%M:%S").strftime("%H:%M:%S"))
81+
82+
83+
def convert_to_harvest(
84+
source_path: str | TextIO = sys.stdin,
85+
output_path: str | TextIO = sys.stdout,
86+
client_name: str = None,
87+
output_cols: list[str] = OUTPUT_COLUMNS,
88+
):
89+
"""Convert Toggl formatted entries in source_path to equivalent Harvest formatted entries.
90+
91+
Args:
92+
source_path: The path to a readable CSV file of Toggl time entries; or a readable buffer of the same.
93+
94+
client_name (str): The value to assign in the output "Client" field
95+
96+
output_cols (list[str]): A list of column names for the output
97+
98+
output_path: The path to a CSV file where Harvest time entries will be written; or a writeable buffer for the same.
99+
100+
Returns:
101+
None. Either prints the resulting CSV data or writes to output_path.
102+
"""
103+
if client_name is None:
104+
client_name = _harvest_client_name()
105+
106+
# read CSV file, parsing dates and times
107+
source = files.read_csv(source_path, usecols=INPUT_COLUMNS, parse_dates=["Start date"], cache_dates=True)
108+
source["Start time"] = source["Start time"].apply(_str_timedelta)
109+
source["Duration"] = source["Duration"].apply(_str_timedelta)
110+
source.sort_values(["Start date", "Start time", "Email"], inplace=True)
111+
112+
# rename columns that can be imported as-is
113+
source.rename(columns={"Task": "Project", "Description": "Notes", "Start date": "Date"}, inplace=True)
114+
115+
# update static calculated columns
116+
source["Client"] = client_name
117+
source["Task"] = "Project Consulting"
118+
119+
# get cached project name if any
120+
source["Project"] = source["Project"].apply(lambda x: _toggl_project_info(x) or x)
121+
122+
# assign First and Last Name
123+
source["First Name"] = source["Email"].apply(_get_first_name)
124+
source["Last Name"] = source["Email"].apply(_get_last_name)
125+
126+
# calculate hours as a decimal from duration timedelta
127+
source["Hours"] = (source["Duration"].dt.total_seconds() / 3600).round(2)
128+
129+
if output_path is None:
130+
output_path = sys.stdout
131+
132+
files.write_csv(output_path, source, columns=output_cols)

tests/services/test_toggl.py

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
from io import StringIO
2+
import sys
3+
4+
import pandas as pd
5+
import pytest
6+
7+
from compiler_admin.services.toggl import INPUT_COLUMNS, files, OUTPUT_COLUMNS, convert_to_harvest, __name__ as MODULE
8+
9+
10+
@pytest.fixture(autouse=True)
11+
def mock_environment(monkeypatch):
12+
monkeypatch.setenv("HARVEST_CLIENT_NAME", "Test_Client")
13+
monkeypatch.setenv("TOGGL_PROJECT_INFO", "notebooks/data/toggl-project-info-sample.json")
14+
monkeypatch.setenv("TOGGL_USER_INFO", "notebooks/data/toggl-user-info-sample.json")
15+
16+
17+
@pytest.fixture
18+
def mock_files(mocker):
19+
return mocker.patch(f"{MODULE}.files", spec=files)
20+
21+
22+
@pytest.fixture
23+
def mock_google_user_info(mocker):
24+
return mocker.patch(f"{MODULE}.google_user_info")
25+
26+
27+
@pytest.fixture
28+
def source_data():
29+
return "notebooks/data/toggl-sample.csv"
30+
31+
32+
@pytest.fixture
33+
def sample_transformed_data():
34+
return "notebooks/data/harvest-sample.csv"
35+
36+
37+
def test_convert_to_harvest_mocked(source_data, mock_files, mock_google_user_info):
38+
mock_google_user_info.return_value = {}
39+
40+
convert_to_harvest(source_data)
41+
42+
mock_files.read_csv.assert_called_once()
43+
call_args = mock_files.read_csv.call_args
44+
assert (source_data,) in call_args
45+
assert call_args.kwargs["usecols"] == INPUT_COLUMNS
46+
assert call_args.kwargs["parse_dates"] == ["Start date"]
47+
assert call_args.kwargs["cache_dates"] is True
48+
49+
mock_files.write_csv.assert_called_once()
50+
call_args = mock_files.write_csv.call_args
51+
assert (sys.stdout, mock_files.read_csv.return_value) in call_args
52+
assert call_args.kwargs["columns"] == OUTPUT_COLUMNS
53+
54+
55+
def test_convert_to_harvest_sample(source_data, sample_transformed_data, mock_google_user_info):
56+
mock_google_user_info.return_value = {}
57+
output = None
58+
59+
with StringIO() as output_data:
60+
convert_to_harvest(source_data, output_data)
61+
output = output_data.getvalue()
62+
63+
assert output
64+
assert isinstance(output, str)
65+
assert ",".join(OUTPUT_COLUMNS) in output
66+
67+
order = ["Date", "First Name", "Hours"]
68+
sample_output_df = pd.read_csv(sample_transformed_data).sort_values(order)
69+
output_df = pd.read_csv(StringIO(output)).sort_values(order)
70+
71+
assert set(output_df.columns.to_list()) < set(sample_output_df.columns.to_list())
72+
assert output_df["Client"].eq("Test_Client").all()

0 commit comments

Comments
 (0)