Skip to content

Commit 623f9a5

Browse files
authored
chore: reduce token spend, filter out mentions at (#173)
1 parent adedee6 commit 623f9a5

File tree

6 files changed

+163
-57
lines changed

6 files changed

+163
-57
lines changed

.github/workflows/ci.yml

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -114,7 +114,8 @@ jobs:
114114
uses: golangci/[email protected]
115115
with:
116116
# Optional: golangci-lint command line arguments.
117-
args: '--timeout=10m'
117+
version: v1.54.1
118+
args: "--timeout=10m"
118119

119120
unittest:
120121
name: Unit Test
@@ -127,7 +128,7 @@ jobs:
127128
image: postgres
128129
# Provide the password for postgres
129130
env:
130-
POSTGRES_PASSWORD: '123456'
131+
POSTGRES_PASSWORD: "123456"
131132
# Set health checks to wait until postgres has started
132133
options: >-
133134
--health-cmd pg_isready

internal/models/chathistories/chat_histories.go

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -363,10 +363,43 @@ func formatFullNameAndUsername(fullName, username string) string {
363363
return strings.ReplaceAll(fullName, "#", "")
364364
}
365365

366+
func (m *Model) encodeMessageIDIntoVirtualMessageID(histories []*ent.ChatHistories) map[int64]int64 {
367+
virtualMessageID := int64(1)
368+
mMessageIDToVirtualMessageID := make(map[int64]int64)
369+
370+
for _, message := range histories {
371+
mMessageIDToVirtualMessageID[virtualMessageID] = message.MessageID
372+
message.MessageID = virtualMessageID
373+
virtualMessageID++
374+
375+
if message.RepliedToMessageID != 0 {
376+
mMessageIDToVirtualMessageID[virtualMessageID] = message.RepliedToMessageID
377+
message.RepliedToMessageID = virtualMessageID
378+
virtualMessageID++
379+
}
380+
}
381+
382+
return mMessageIDToVirtualMessageID
383+
}
384+
385+
func (m *Model) decodeMessageIDFromVirtualMessageID(mMessageIDToVirtualMessageID map[int64]int64, outputs []*openai.ChatHistorySummarizationOutputs) {
386+
for _, o := range outputs {
387+
for _, d := range o.Discussion {
388+
d.KeyIDs = lo.Map(d.KeyIDs, func(virtualMessageID int64, i int) int64 {
389+
return mMessageIDToVirtualMessageID[virtualMessageID]
390+
})
391+
}
392+
393+
o.SinceID = mMessageIDToVirtualMessageID[o.SinceID]
394+
}
395+
}
396+
366397
func (m *Model) SummarizeChatHistories(chatID int64, chatType telegram.ChatType, histories []*ent.ChatHistories) (uuid.UUID, []string, error) {
367398
historiesLLMFriendly := make([]string, 0, len(histories))
368399
historiesIncludedMessageIDs := make([]int64, 0)
369400

401+
mMessageIDToVirtualMessageID := m.encodeMessageIDIntoVirtualMessageID(histories)
402+
370403
for _, message := range histories {
371404
if message.RepliedToMessageID == 0 {
372405
historiesLLMFriendly = append(historiesLLMFriendly, fmt.Sprintf(
@@ -402,6 +435,9 @@ func (m *Model) SummarizeChatHistories(chatID int64, chatType telegram.ChatType,
402435
return uuid.Nil, make([]string, 0), err
403436
}
404437

438+
// reverse virtual message id to real message id
439+
m.decodeMessageIDFromVirtualMessageID(mMessageIDToVirtualMessageID, summarizations)
440+
405441
ss, err := m.renderRecapTemplates(chatID, chatType, summarizations)
406442
if err != nil {
407443
return uuid.Nil, make([]string, 0), err

internal/models/chathistories/chat_histories_test.go

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ import (
1717
"github.com/nekomeowww/insights-bot/internal/configs"
1818
"github.com/nekomeowww/insights-bot/internal/datastore"
1919
"github.com/nekomeowww/insights-bot/internal/lib"
20+
"github.com/nekomeowww/insights-bot/internal/thirdparty/openai"
2021
"github.com/nekomeowww/insights-bot/internal/thirdparty/openai/openaimock"
2122
"github.com/nekomeowww/insights-bot/pkg/tutils"
2223
"github.com/nekomeowww/xo"
@@ -233,3 +234,63 @@ func TestFindLastOneHourChatHistories(t *testing.T) {
233234
return item.MessageID
234235
}))
235236
}
237+
238+
func TestEncodeMessageIDIntoVirtualMessageID(t *testing.T) {
239+
messageID1 := xo.RandomInt64()
240+
messageID2 := xo.RandomInt64()
241+
messageID3 := xo.RandomInt64()
242+
replyToMessageID1 := xo.RandomInt64()
243+
244+
mVirtualIDs := model.encodeMessageIDIntoVirtualMessageID([]*ent.ChatHistories{
245+
{MessageID: messageID1, RepliedToMessageID: replyToMessageID1},
246+
{MessageID: messageID2},
247+
{MessageID: messageID3},
248+
})
249+
250+
assert.Equal(t, map[int64]int64{
251+
1: messageID1,
252+
2: replyToMessageID1,
253+
3: messageID2,
254+
4: messageID3,
255+
}, mVirtualIDs)
256+
}
257+
258+
func TestDecodeMessageIDFromVirtualMessageID(t *testing.T) {
259+
messageID1 := xo.RandomInt64()
260+
messageID2 := xo.RandomInt64()
261+
messageID3 := xo.RandomInt64()
262+
replyToMessageID1 := xo.RandomInt64()
263+
264+
mVirtualIDs := map[int64]int64{
265+
1: messageID1,
266+
2: replyToMessageID1,
267+
3: messageID2,
268+
4: messageID3,
269+
}
270+
271+
outputs := []*openai.ChatHistorySummarizationOutputs{
272+
{SinceID: 1, Discussion: []*openai.ChatHistorySummarizationOutputsDiscussion{
273+
{KeyIDs: []int64{1, 2}},
274+
}},
275+
{SinceID: 3, Discussion: []*openai.ChatHistorySummarizationOutputsDiscussion{
276+
{KeyIDs: []int64{3}},
277+
}},
278+
{SinceID: 4, Discussion: []*openai.ChatHistorySummarizationOutputsDiscussion{
279+
{KeyIDs: []int64{4}},
280+
}},
281+
}
282+
283+
model.decodeMessageIDFromVirtualMessageID(mVirtualIDs, outputs)
284+
285+
assert.Equal(t, []*openai.ChatHistorySummarizationOutputs{
286+
{SinceID: messageID1, Discussion: []*openai.ChatHistorySummarizationOutputsDiscussion{
287+
{KeyIDs: []int64{messageID1, replyToMessageID1}},
288+
}},
289+
{SinceID: messageID2, Discussion: []*openai.ChatHistorySummarizationOutputsDiscussion{
290+
{KeyIDs: []int64{messageID2}},
291+
}},
292+
{SinceID: messageID3, Discussion: []*openai.ChatHistorySummarizationOutputsDiscussion{
293+
{KeyIDs: []int64{messageID3}},
294+
}},
295+
}, outputs)
296+
}

internal/models/chathistories/recap.go

Lines changed: 18 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -109,13 +109,13 @@ func (m *Model) summarizeChatHistoriesSlice(chatID int64, s string) ([]*openai.C
109109
return outputs, resp.Usage, nil
110110
}
111111

112-
func filterOutInvalidFields(messageIDs []int64, outputs []*openai.ChatHistorySummarizationOutputs) []*openai.ChatHistorySummarizationOutputs {
113-
for i := range outputs {
112+
func filterOutInvalidFields(messageIDs []int64) func(output *openai.ChatHistorySummarizationOutputs, _ int) *openai.ChatHistorySummarizationOutputs {
113+
return func(output *openai.ChatHistorySummarizationOutputs, _ int) *openai.ChatHistorySummarizationOutputs {
114114
// limit key ids to 5
115-
outputs[i].Participants = lo.Uniq(outputs[i].Participants)
115+
output.Participants = lo.Uniq(output.Participants)
116116

117117
// filter out non-exist message ids
118-
for _, d := range outputs[i].Discussion {
118+
for _, d := range output.Discussion {
119119
d.KeyIDs = lo.Filter(d.KeyIDs, func(item int64, _ int) bool {
120120
return lo.Contains(messageIDs, item) && item != 0
121121
})
@@ -128,12 +128,12 @@ func filterOutInvalidFields(messageIDs []int64, outputs []*openai.ChatHistorySum
128128
}
129129
}
130130

131-
outputs[i].Discussion = lo.Filter(outputs[i].Discussion, func(item *openai.ChatHistorySummarizationOutputsDiscussion, _ int) bool {
131+
output.Discussion = lo.Filter(output.Discussion, func(item *openai.ChatHistorySummarizationOutputsDiscussion, _ int) bool {
132132
return len(item.KeyIDs) > 0 && item.Point != ""
133133
})
134-
}
135134

136-
return outputs
135+
return output
136+
}
137137
}
138138

139139
func filterOutInvalidOutputFilterFunc(output *openai.ChatHistorySummarizationOutputs, _ int) bool {
@@ -144,6 +144,14 @@ func filterOutInvalidOutputFilterFunc(output *openai.ChatHistorySummarizationOut
144144
len(output.Discussion) > 0 // filter out empty discussion
145145
}
146146

147+
func filterOutMention(output *openai.ChatHistorySummarizationOutputs, _ int) *openai.ChatHistorySummarizationOutputs {
148+
output.Participants = lo.Map(output.Participants, func(item string, _ int) string {
149+
return strings.TrimPrefix(item, "@")
150+
})
151+
152+
return output
153+
}
154+
147155
func (m *Model) summarizeChatHistories(chatID int64, messageIDs []int64, llmFriendlyChatHistories string) ([]*openai.ChatHistorySummarizationOutputs, goopenai.Usage, error) {
148156
chatHistoriesSlices := m.openAI.SplitContentBasedByTokenLimitations(llmFriendlyChatHistories, 15000)
149157
chatHistoriesSummarizations := make([]*openai.ChatHistorySummarizationOutputs, 0, len(chatHistoriesSlices))
@@ -168,9 +176,11 @@ func (m *Model) summarizeChatHistories(chatID int64, messageIDs []int64, llmFrie
168176
}
169177

170178
// filter out invalid fields
171-
o = filterOutInvalidFields(messageIDs, o)
179+
o = lo.Map(o, filterOutInvalidFields(messageIDs))
172180
// filter out empty outputs
173181
o = lo.Filter(o, filterOutInvalidOutputFilterFunc)
182+
// filter out mentions
183+
o = lo.Map(o, filterOutMention)
174184

175185
if len(o) == 0 {
176186
m.logger.Error(fmt.Sprintf("no valid outputs from chat histories slice: %s, tried %d...", s, tried),

internal/models/chathistories/recap_test.go

Lines changed: 38 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -11,84 +11,82 @@ import (
1111

1212
func TestFilterOutInvalidFields(t *testing.T) {
1313
t.Run("UniqParticipants", func(t *testing.T) {
14-
outputs := filterOutInvalidFields([]int64{}, []*openai.ChatHistorySummarizationOutputs{
15-
{
16-
Participants: []string{"User 1", "User 1"},
17-
},
18-
})
14+
output := filterOutInvalidFields([]int64{})(&openai.ChatHistorySummarizationOutputs{
15+
Participants: []string{"User 1", "User 1"},
16+
}, 0)
1917

20-
assert.Equal(t, []string{"User 1"}, outputs[0].Participants)
18+
assert.Equal(t, []string{"User 1"}, output.Participants)
2119
})
2220

2321
t.Run("FilterOutNonExistMessageIDAndZeroMessageID", func(t *testing.T) {
24-
outputs := filterOutInvalidFields([]int64{1, 2, 3, 4}, []*openai.ChatHistorySummarizationOutputs{
25-
{
26-
Discussion: []*openai.ChatHistorySummarizationOutputsDiscussion{
27-
{Point: "Point 1", KeyIDs: []int64{0, 1, 2}},
28-
{Point: "Point 2", KeyIDs: []int64{3, 4, 5}},
29-
},
22+
output := filterOutInvalidFields([]int64{1, 2, 3, 4})(&openai.ChatHistorySummarizationOutputs{
23+
Discussion: []*openai.ChatHistorySummarizationOutputsDiscussion{
24+
{Point: "Point 1", KeyIDs: []int64{0, 1, 2}},
25+
{Point: "Point 2", KeyIDs: []int64{3, 4, 5}},
3026
},
31-
})
27+
}, 0)
3228

3329
assert.Equal(t, []*openai.ChatHistorySummarizationOutputsDiscussion{
3430
{Point: "Point 1", KeyIDs: []int64{1, 2}},
3531
{Point: "Point 2", KeyIDs: []int64{3, 4}},
36-
}, outputs[0].Discussion)
32+
}, output.Discussion)
3733
})
3834

3935
t.Run("UniqKeyID", func(t *testing.T) {
40-
outputs := filterOutInvalidFields([]int64{1, 2, 3, 4}, []*openai.ChatHistorySummarizationOutputs{
41-
{
42-
Discussion: []*openai.ChatHistorySummarizationOutputsDiscussion{
43-
{Point: "Point 1", KeyIDs: []int64{1, 2, 2}},
44-
{Point: "Point 2", KeyIDs: []int64{3, 4, 4}},
45-
},
36+
output := filterOutInvalidFields([]int64{1, 2, 3, 4})(&openai.ChatHistorySummarizationOutputs{
37+
Discussion: []*openai.ChatHistorySummarizationOutputsDiscussion{
38+
{Point: "Point 1", KeyIDs: []int64{1, 2, 2}},
39+
{Point: "Point 2", KeyIDs: []int64{3, 4, 4}},
4640
},
47-
})
41+
}, 0)
4842

4943
assert.Equal(t, []*openai.ChatHistorySummarizationOutputsDiscussion{
5044
{Point: "Point 1", KeyIDs: []int64{1, 2}},
5145
{Point: "Point 2", KeyIDs: []int64{3, 4}},
52-
}, outputs[0].Discussion)
46+
}, output.Discussion)
5347
})
5448

5549
t.Run("LimitKeyIDsTo5", func(t *testing.T) {
56-
outputs := filterOutInvalidFields([]int64{1, 2, 3, 4, 5, 6, 7, 8, 9}, []*openai.ChatHistorySummarizationOutputs{
57-
{
58-
Participants: []string{"a"},
59-
Discussion: []*openai.ChatHistorySummarizationOutputsDiscussion{
60-
{Point: "Point 1", KeyIDs: []int64{1, 2, 3, 4, 5, 6, 7, 8, 9}},
61-
},
50+
output := filterOutInvalidFields([]int64{1, 2, 3, 4, 5, 6, 7, 8, 9})(&openai.ChatHistorySummarizationOutputs{
51+
Participants: []string{"a"},
52+
Discussion: []*openai.ChatHistorySummarizationOutputsDiscussion{
53+
{Point: "Point 1", KeyIDs: []int64{1, 2, 3, 4, 5, 6, 7, 8, 9}},
6254
},
63-
})
55+
}, 0)
6456

6557
assert.Equal(t, []*openai.ChatHistorySummarizationOutputsDiscussion{
6658
{Point: "Point 1", KeyIDs: []int64{1, 2, 3, 4, 5}},
67-
}, outputs[0].Discussion)
59+
}, output.Discussion)
6860
})
6961

7062
t.Run("FilterOutEmptyKeyIDsAndEmptyPointFromDiscussion", func(t *testing.T) {
71-
outputs := filterOutInvalidFields([]int64{1, 2, 3, 4}, []*openai.ChatHistorySummarizationOutputs{
72-
{
73-
Participants: []string{"a"},
74-
Discussion: []*openai.ChatHistorySummarizationOutputsDiscussion{
75-
{Point: "Point 1", KeyIDs: []int64{1, 2}},
76-
{Point: "", KeyIDs: []int64{}},
77-
{Point: "", KeyIDs: []int64{3, 4}},
78-
},
63+
output := filterOutInvalidFields([]int64{1, 2, 3, 4})(&openai.ChatHistorySummarizationOutputs{
64+
Participants: []string{"a"},
65+
Discussion: []*openai.ChatHistorySummarizationOutputsDiscussion{
66+
{Point: "Point 1", KeyIDs: []int64{1, 2}},
67+
{Point: "", KeyIDs: []int64{}},
68+
{Point: "", KeyIDs: []int64{3, 4}},
7969
},
80-
})
70+
}, 0)
8171

8272
assert.Equal(t, []*openai.ChatHistorySummarizationOutputsDiscussion{
8373
{Point: "Point 1", KeyIDs: []int64{1, 2}},
84-
}, outputs[0].Discussion)
74+
}, output.Discussion)
8575
})
8676
}
8777

8878
func TestFilterOutInvalidOutputFilterFunc(t *testing.T) {
8979
assert.False(t, filterOutInvalidOutputFilterFunc(&openai.ChatHistorySummarizationOutputs{}, 0))
9080
}
9181

82+
func TestFilterOutMention(t *testing.T) {
83+
output := filterOutMention(&openai.ChatHistorySummarizationOutputs{
84+
Participants: []string{"@User 1", "@User 2"},
85+
}, 0)
86+
87+
assert.Equal(t, []string{"User 1", "User 2"}, output.Participants)
88+
}
89+
9290
func TestRecapOutputTemplateExecute(t *testing.T) { //nolint:dupl
9391
sb := new(strings.Builder)
9492
err := RecapOutputTemplate.Execute(sb, RecapOutputTemplateInputs{

pkg/linkprev/linkprev_test.go

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -39,13 +39,13 @@ func TestPreview(t *testing.T) {
3939
meta, err := NewClient().Preview(context.Background(), "https://twitter.com/GoogleDevEurope/status/1640667303158198272")
4040
require.NoError(t, err)
4141
assert.Equal(t, Meta{
42-
Title: "Google for Developers Europe on Twitter: \"🎉 Happy Birthday @golang!\n\nDid you know that 11 years ago today Go 1 was publicly released? Join us in celebrating this day by:\n\n🎁 Checking out local meetups → https://t.co/TCNAZL0oOj\n🎁 Trying out the Go Playground → https://t.co/nnkaugz32x\n\nRT if you are a fellow Gopher! https://t.co/jiE7UTMHll\" / X",
42+
Title: "Google for Developers Europe on X: \"🎉 Happy Birthday @golang!\n\nDid you know that 11 years ago today Go 1 was publicly released? Join us in celebrating this day by:\n\n🎁 Checking out local meetups → https://t.co/TCNAZL0oOj\n🎁 Trying out the Go Playground → https://t.co/nnkaugz32x\n\nRT if you are a fellow Gopher! https://t.co/jiE7UTMHll\" / X",
4343
OpenGraph: opengraph.OpenGraph{
44-
Title: "Google for Developers Europe on Twitter",
44+
Title: "Google for Developers Europe on X",
4545
Type: "article",
4646
Image: "https://pbs.twimg.com/media/FsTSN8nWwAA278D.png:large",
4747
URL: "https://twitter.com/GoogleDevEurope/status/1640667303158198272",
48-
Description: "🎉 Happy Birthday @golang!\n\nDid you know that 11 years ago today Go 1 was publicly released? Join us in celebrating this day by:\n\n🎁 Checking out local meetups → https://t.co/TCNAZL0oOj\n🎁 Trying out the Go Playground → https://t.co/nnkaugz32x\n\nRT if you are a fellow Gopher!",
48+
Description: "🎉 Happy Birthday @golang!\n\nDid you know that 11 years ago today Go 1 was publicly released? Join us in celebrating this day by:\n\n🎁 Checking out local meetups → https://t.co/TCNAZL0oOj\n🎁 Trying out the Go Playground → https://t.co/nnkaugz32x\n\nRT if you are a fellow Gopher!",
4949
SiteName: "Twitter",
5050
},
5151
}, meta)
@@ -61,7 +61,7 @@ func TestPreview(t *testing.T) {
6161
Title: "Google for Developers Europe (@GoogleDevEurope)",
6262
Image: "https://pbs.twimg.com/media/FsTSN8nWwAA278D.png",
6363
Description: "🎉 Happy Birthday @golang!\n\nDid you know that 11 years ago today Go 1 was publicly released? Join us in celebrating this day by:\n\n🎁 Checking out local meetups → https://goo.gle/3zaGgRi\n🎁 Trying out the Go Playground → https://goo.gle/3zaGurC\n\nRT if you are a fellow Gopher!",
64-
SiteName: "FixTweet",
64+
SiteName: "FixTweet - 🆕 x.com link? Try fixupx.com",
6565
},
6666
}, meta)
6767
})
@@ -70,13 +70,13 @@ func TestPreview(t *testing.T) {
7070
meta, err := NewClient().Preview(context.Background(), "https://vxtwitter.com/GoogleDevEurope/status/1640667303158198272")
7171
require.NoError(t, err)
7272
assert.Equal(t, Meta{
73-
Title: "Google for Developers Europe on Twitter: \"🎉 Happy Birthday @golang!\n\nDid you know that 11 years ago today Go 1 was publicly released? Join us in celebrating this day by:\n\n🎁 Checking out local meetups → https://t.co/TCNAZL0oOj\n🎁 Trying out the Go Playground → https://t.co/nnkaugz32x\n\nRT if you are a fellow Gopher! https://t.co/jiE7UTMHll\" / X",
73+
Title: "Google for Developers Europe on X: \"🎉 Happy Birthday @golang!\n\nDid you know that 11 years ago today Go 1 was publicly released? Join us in celebrating this day by:\n\n🎁 Checking out local meetups → https://t.co/TCNAZL0oOj\n🎁 Trying out the Go Playground → https://t.co/nnkaugz32x\n\nRT if you are a fellow Gopher! https://t.co/jiE7UTMHll\" / X",
7474
OpenGraph: opengraph.OpenGraph{
75-
Title: "Google for Developers Europe on Twitter",
75+
Title: "Google for Developers Europe on X",
7676
Type: "article",
7777
Image: "https://pbs.twimg.com/media/FsTSN8nWwAA278D.png:large",
7878
URL: "https://twitter.com/GoogleDevEurope/status/1640667303158198272",
79-
Description: "🎉 Happy Birthday @golang!\n\nDid you know that 11 years ago today Go 1 was publicly released? Join us in celebrating this day by:\n\n🎁 Checking out local meetups → https://t.co/TCNAZL0oOj\n🎁 Trying out the Go Playground → https://t.co/nnkaugz32x\n\nRT if you are a fellow Gopher!",
79+
Description: "🎉 Happy Birthday @golang!\n\nDid you know that 11 years ago today Go 1 was publicly released? Join us in celebrating this day by:\n\n🎁 Checking out local meetups → https://t.co/TCNAZL0oOj\n🎁 Trying out the Go Playground → https://t.co/nnkaugz32x\n\nRT if you are a fellow Gopher!",
8080
SiteName: "Twitter",
8181
},
8282
}, meta)

0 commit comments

Comments
 (0)