Skip to content

chore: reduce token spend, filter out mentions at #173

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Aug 21, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,8 @@ jobs:
uses: golangci/[email protected]
with:
# Optional: golangci-lint command line arguments.
args: '--timeout=10m'
version: v1.54.1
args: "--timeout=10m"

unittest:
name: Unit Test
Expand All @@ -127,7 +128,7 @@ jobs:
image: postgres
# Provide the password for postgres
env:
POSTGRES_PASSWORD: '123456'
POSTGRES_PASSWORD: "123456"
# Set health checks to wait until postgres has started
options: >-
--health-cmd pg_isready
Expand Down
36 changes: 36 additions & 0 deletions internal/models/chathistories/chat_histories.go
Original file line number Diff line number Diff line change
Expand Up @@ -363,10 +363,43 @@ func formatFullNameAndUsername(fullName, username string) string {
return strings.ReplaceAll(fullName, "#", "")
}

func (m *Model) encodeMessageIDIntoVirtualMessageID(histories []*ent.ChatHistories) map[int64]int64 {
virtualMessageID := int64(1)
mMessageIDToVirtualMessageID := make(map[int64]int64)

for _, message := range histories {
mMessageIDToVirtualMessageID[virtualMessageID] = message.MessageID
message.MessageID = virtualMessageID
virtualMessageID++

if message.RepliedToMessageID != 0 {
mMessageIDToVirtualMessageID[virtualMessageID] = message.RepliedToMessageID
message.RepliedToMessageID = virtualMessageID
virtualMessageID++
}
}

return mMessageIDToVirtualMessageID
}

func (m *Model) decodeMessageIDFromVirtualMessageID(mMessageIDToVirtualMessageID map[int64]int64, outputs []*openai.ChatHistorySummarizationOutputs) {
for _, o := range outputs {
for _, d := range o.Discussion {
d.KeyIDs = lo.Map(d.KeyIDs, func(virtualMessageID int64, i int) int64 {
return mMessageIDToVirtualMessageID[virtualMessageID]
})
}

o.SinceID = mMessageIDToVirtualMessageID[o.SinceID]
}
}

func (m *Model) SummarizeChatHistories(chatID int64, chatType telegram.ChatType, histories []*ent.ChatHistories) (uuid.UUID, []string, error) {
historiesLLMFriendly := make([]string, 0, len(histories))
historiesIncludedMessageIDs := make([]int64, 0)

mMessageIDToVirtualMessageID := m.encodeMessageIDIntoVirtualMessageID(histories)

for _, message := range histories {
if message.RepliedToMessageID == 0 {
historiesLLMFriendly = append(historiesLLMFriendly, fmt.Sprintf(
Expand Down Expand Up @@ -402,6 +435,9 @@ func (m *Model) SummarizeChatHistories(chatID int64, chatType telegram.ChatType,
return uuid.Nil, make([]string, 0), err
}

// reverse virtual message id to real message id
m.decodeMessageIDFromVirtualMessageID(mMessageIDToVirtualMessageID, summarizations)

ss, err := m.renderRecapTemplates(chatID, chatType, summarizations)
if err != nil {
return uuid.Nil, make([]string, 0), err
Expand Down
61 changes: 61 additions & 0 deletions internal/models/chathistories/chat_histories_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ import (
"github.com/nekomeowww/insights-bot/internal/configs"
"github.com/nekomeowww/insights-bot/internal/datastore"
"github.com/nekomeowww/insights-bot/internal/lib"
"github.com/nekomeowww/insights-bot/internal/thirdparty/openai"
"github.com/nekomeowww/insights-bot/internal/thirdparty/openai/openaimock"
"github.com/nekomeowww/insights-bot/pkg/tutils"
"github.com/nekomeowww/xo"
Expand Down Expand Up @@ -233,3 +234,63 @@ func TestFindLastOneHourChatHistories(t *testing.T) {
return item.MessageID
}))
}

func TestEncodeMessageIDIntoVirtualMessageID(t *testing.T) {
messageID1 := xo.RandomInt64()
messageID2 := xo.RandomInt64()
messageID3 := xo.RandomInt64()
replyToMessageID1 := xo.RandomInt64()

mVirtualIDs := model.encodeMessageIDIntoVirtualMessageID([]*ent.ChatHistories{
{MessageID: messageID1, RepliedToMessageID: replyToMessageID1},
{MessageID: messageID2},
{MessageID: messageID3},
})

assert.Equal(t, map[int64]int64{
1: messageID1,
2: replyToMessageID1,
3: messageID2,
4: messageID3,
}, mVirtualIDs)
}

func TestDecodeMessageIDFromVirtualMessageID(t *testing.T) {
messageID1 := xo.RandomInt64()
messageID2 := xo.RandomInt64()
messageID3 := xo.RandomInt64()
replyToMessageID1 := xo.RandomInt64()

mVirtualIDs := map[int64]int64{
1: messageID1,
2: replyToMessageID1,
3: messageID2,
4: messageID3,
}

outputs := []*openai.ChatHistorySummarizationOutputs{
{SinceID: 1, Discussion: []*openai.ChatHistorySummarizationOutputsDiscussion{
{KeyIDs: []int64{1, 2}},
}},
{SinceID: 3, Discussion: []*openai.ChatHistorySummarizationOutputsDiscussion{
{KeyIDs: []int64{3}},
}},
{SinceID: 4, Discussion: []*openai.ChatHistorySummarizationOutputsDiscussion{
{KeyIDs: []int64{4}},
}},
}

model.decodeMessageIDFromVirtualMessageID(mVirtualIDs, outputs)

assert.Equal(t, []*openai.ChatHistorySummarizationOutputs{
{SinceID: messageID1, Discussion: []*openai.ChatHistorySummarizationOutputsDiscussion{
{KeyIDs: []int64{messageID1, replyToMessageID1}},
}},
{SinceID: messageID2, Discussion: []*openai.ChatHistorySummarizationOutputsDiscussion{
{KeyIDs: []int64{messageID2}},
}},
{SinceID: messageID3, Discussion: []*openai.ChatHistorySummarizationOutputsDiscussion{
{KeyIDs: []int64{messageID3}},
}},
}, outputs)
}
26 changes: 18 additions & 8 deletions internal/models/chathistories/recap.go
Original file line number Diff line number Diff line change
Expand Up @@ -109,13 +109,13 @@ func (m *Model) summarizeChatHistoriesSlice(chatID int64, s string) ([]*openai.C
return outputs, resp.Usage, nil
}

func filterOutInvalidFields(messageIDs []int64, outputs []*openai.ChatHistorySummarizationOutputs) []*openai.ChatHistorySummarizationOutputs {
for i := range outputs {
func filterOutInvalidFields(messageIDs []int64) func(output *openai.ChatHistorySummarizationOutputs, _ int) *openai.ChatHistorySummarizationOutputs {
return func(output *openai.ChatHistorySummarizationOutputs, _ int) *openai.ChatHistorySummarizationOutputs {
// limit key ids to 5
outputs[i].Participants = lo.Uniq(outputs[i].Participants)
output.Participants = lo.Uniq(output.Participants)

// filter out non-exist message ids
for _, d := range outputs[i].Discussion {
for _, d := range output.Discussion {
d.KeyIDs = lo.Filter(d.KeyIDs, func(item int64, _ int) bool {
return lo.Contains(messageIDs, item) && item != 0
})
Expand All @@ -128,12 +128,12 @@ func filterOutInvalidFields(messageIDs []int64, outputs []*openai.ChatHistorySum
}
}

outputs[i].Discussion = lo.Filter(outputs[i].Discussion, func(item *openai.ChatHistorySummarizationOutputsDiscussion, _ int) bool {
output.Discussion = lo.Filter(output.Discussion, func(item *openai.ChatHistorySummarizationOutputsDiscussion, _ int) bool {
return len(item.KeyIDs) > 0 && item.Point != ""
})
}

return outputs
return output
}
}

func filterOutInvalidOutputFilterFunc(output *openai.ChatHistorySummarizationOutputs, _ int) bool {
Expand All @@ -144,6 +144,14 @@ func filterOutInvalidOutputFilterFunc(output *openai.ChatHistorySummarizationOut
len(output.Discussion) > 0 // filter out empty discussion
}

func filterOutMention(output *openai.ChatHistorySummarizationOutputs, _ int) *openai.ChatHistorySummarizationOutputs {
output.Participants = lo.Map(output.Participants, func(item string, _ int) string {
return strings.TrimPrefix(item, "@")
})

return output
}

func (m *Model) summarizeChatHistories(chatID int64, messageIDs []int64, llmFriendlyChatHistories string) ([]*openai.ChatHistorySummarizationOutputs, goopenai.Usage, error) {
chatHistoriesSlices := m.openAI.SplitContentBasedByTokenLimitations(llmFriendlyChatHistories, 15000)
chatHistoriesSummarizations := make([]*openai.ChatHistorySummarizationOutputs, 0, len(chatHistoriesSlices))
Expand All @@ -168,9 +176,11 @@ func (m *Model) summarizeChatHistories(chatID int64, messageIDs []int64, llmFrie
}

// filter out invalid fields
o = filterOutInvalidFields(messageIDs, o)
o = lo.Map(o, filterOutInvalidFields(messageIDs))
// filter out empty outputs
o = lo.Filter(o, filterOutInvalidOutputFilterFunc)
// filter out mentions
o = lo.Map(o, filterOutMention)

if len(o) == 0 {
m.logger.Error(fmt.Sprintf("no valid outputs from chat histories slice: %s, tried %d...", s, tried),
Expand Down
78 changes: 38 additions & 40 deletions internal/models/chathistories/recap_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,84 +11,82 @@ import (

func TestFilterOutInvalidFields(t *testing.T) {
t.Run("UniqParticipants", func(t *testing.T) {
outputs := filterOutInvalidFields([]int64{}, []*openai.ChatHistorySummarizationOutputs{
{
Participants: []string{"User 1", "User 1"},
},
})
output := filterOutInvalidFields([]int64{})(&openai.ChatHistorySummarizationOutputs{
Participants: []string{"User 1", "User 1"},
}, 0)

assert.Equal(t, []string{"User 1"}, outputs[0].Participants)
assert.Equal(t, []string{"User 1"}, output.Participants)
})

t.Run("FilterOutNonExistMessageIDAndZeroMessageID", func(t *testing.T) {
outputs := filterOutInvalidFields([]int64{1, 2, 3, 4}, []*openai.ChatHistorySummarizationOutputs{
{
Discussion: []*openai.ChatHistorySummarizationOutputsDiscussion{
{Point: "Point 1", KeyIDs: []int64{0, 1, 2}},
{Point: "Point 2", KeyIDs: []int64{3, 4, 5}},
},
output := filterOutInvalidFields([]int64{1, 2, 3, 4})(&openai.ChatHistorySummarizationOutputs{
Discussion: []*openai.ChatHistorySummarizationOutputsDiscussion{
{Point: "Point 1", KeyIDs: []int64{0, 1, 2}},
{Point: "Point 2", KeyIDs: []int64{3, 4, 5}},
},
})
}, 0)

assert.Equal(t, []*openai.ChatHistorySummarizationOutputsDiscussion{
{Point: "Point 1", KeyIDs: []int64{1, 2}},
{Point: "Point 2", KeyIDs: []int64{3, 4}},
}, outputs[0].Discussion)
}, output.Discussion)
})

t.Run("UniqKeyID", func(t *testing.T) {
outputs := filterOutInvalidFields([]int64{1, 2, 3, 4}, []*openai.ChatHistorySummarizationOutputs{
{
Discussion: []*openai.ChatHistorySummarizationOutputsDiscussion{
{Point: "Point 1", KeyIDs: []int64{1, 2, 2}},
{Point: "Point 2", KeyIDs: []int64{3, 4, 4}},
},
output := filterOutInvalidFields([]int64{1, 2, 3, 4})(&openai.ChatHistorySummarizationOutputs{
Discussion: []*openai.ChatHistorySummarizationOutputsDiscussion{
{Point: "Point 1", KeyIDs: []int64{1, 2, 2}},
{Point: "Point 2", KeyIDs: []int64{3, 4, 4}},
},
})
}, 0)

assert.Equal(t, []*openai.ChatHistorySummarizationOutputsDiscussion{
{Point: "Point 1", KeyIDs: []int64{1, 2}},
{Point: "Point 2", KeyIDs: []int64{3, 4}},
}, outputs[0].Discussion)
}, output.Discussion)
})

t.Run("LimitKeyIDsTo5", func(t *testing.T) {
outputs := filterOutInvalidFields([]int64{1, 2, 3, 4, 5, 6, 7, 8, 9}, []*openai.ChatHistorySummarizationOutputs{
{
Participants: []string{"a"},
Discussion: []*openai.ChatHistorySummarizationOutputsDiscussion{
{Point: "Point 1", KeyIDs: []int64{1, 2, 3, 4, 5, 6, 7, 8, 9}},
},
output := filterOutInvalidFields([]int64{1, 2, 3, 4, 5, 6, 7, 8, 9})(&openai.ChatHistorySummarizationOutputs{
Participants: []string{"a"},
Discussion: []*openai.ChatHistorySummarizationOutputsDiscussion{
{Point: "Point 1", KeyIDs: []int64{1, 2, 3, 4, 5, 6, 7, 8, 9}},
},
})
}, 0)

assert.Equal(t, []*openai.ChatHistorySummarizationOutputsDiscussion{
{Point: "Point 1", KeyIDs: []int64{1, 2, 3, 4, 5}},
}, outputs[0].Discussion)
}, output.Discussion)
})

t.Run("FilterOutEmptyKeyIDsAndEmptyPointFromDiscussion", func(t *testing.T) {
outputs := filterOutInvalidFields([]int64{1, 2, 3, 4}, []*openai.ChatHistorySummarizationOutputs{
{
Participants: []string{"a"},
Discussion: []*openai.ChatHistorySummarizationOutputsDiscussion{
{Point: "Point 1", KeyIDs: []int64{1, 2}},
{Point: "", KeyIDs: []int64{}},
{Point: "", KeyIDs: []int64{3, 4}},
},
output := filterOutInvalidFields([]int64{1, 2, 3, 4})(&openai.ChatHistorySummarizationOutputs{
Participants: []string{"a"},
Discussion: []*openai.ChatHistorySummarizationOutputsDiscussion{
{Point: "Point 1", KeyIDs: []int64{1, 2}},
{Point: "", KeyIDs: []int64{}},
{Point: "", KeyIDs: []int64{3, 4}},
},
})
}, 0)

assert.Equal(t, []*openai.ChatHistorySummarizationOutputsDiscussion{
{Point: "Point 1", KeyIDs: []int64{1, 2}},
}, outputs[0].Discussion)
}, output.Discussion)
})
}

func TestFilterOutInvalidOutputFilterFunc(t *testing.T) {
assert.False(t, filterOutInvalidOutputFilterFunc(&openai.ChatHistorySummarizationOutputs{}, 0))
}

func TestFilterOutMention(t *testing.T) {
output := filterOutMention(&openai.ChatHistorySummarizationOutputs{
Participants: []string{"@User 1", "@User 2"},
}, 0)

assert.Equal(t, []string{"User 1", "User 2"}, output.Participants)
}

func TestRecapOutputTemplateExecute(t *testing.T) { //nolint:dupl
sb := new(strings.Builder)
err := RecapOutputTemplate.Execute(sb, RecapOutputTemplateInputs{
Expand Down
14 changes: 7 additions & 7 deletions pkg/linkprev/linkprev_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,13 +39,13 @@ func TestPreview(t *testing.T) {
meta, err := NewClient().Preview(context.Background(), "https://twitter.com/GoogleDevEurope/status/1640667303158198272")
require.NoError(t, err)
assert.Equal(t, Meta{
Title: "Google for Developers Europe on Twitter: \"🎉 Happy Birthday @golang!\n\nDid you know that 11 years ago today Go 1 was publicly released? Join us in celebrating this day by:\n\n🎁 Checking out local meetups → https://t.co/TCNAZL0oOj\n🎁 Trying out the Go Playground → https://t.co/nnkaugz32x\n\nRT if you are a fellow Gopher! https://t.co/jiE7UTMHll\" / X",
Title: "Google for Developers Europe on X: \"🎉 Happy Birthday @golang!\n\nDid you know that 11 years ago today Go 1 was publicly released? Join us in celebrating this day by:\n\n🎁 Checking out local meetups → https://t.co/TCNAZL0oOj\n🎁 Trying out the Go Playground → https://t.co/nnkaugz32x\n\nRT if you are a fellow Gopher! https://t.co/jiE7UTMHll\" / X",
OpenGraph: opengraph.OpenGraph{
Title: "Google for Developers Europe on Twitter",
Title: "Google for Developers Europe on X",
Type: "article",
Image: "https://pbs.twimg.com/media/FsTSN8nWwAA278D.png:large",
URL: "https://twitter.com/GoogleDevEurope/status/1640667303158198272",
Description: "🎉 Happy Birthday @golang!\n\nDid you know that 11 years ago today Go 1 was publicly released? Join us in celebrating this day by:\n\n🎁 Checking out local meetups → https://t.co/TCNAZL0oOj\n🎁 Trying out the Go Playground → https://t.co/nnkaugz32x\n\nRT if you are a fellow Gopher!",
Description: "🎉 Happy Birthday @golang!\n\nDid you know that 11 years ago today Go 1 was publicly released? Join us in celebrating this day by:\n\n🎁 Checking out local meetups → https://t.co/TCNAZL0oOj\n🎁 Trying out the Go Playground → https://t.co/nnkaugz32x\n\nRT if you are a fellow Gopher!",
SiteName: "Twitter",
},
}, meta)
Expand All @@ -61,7 +61,7 @@ func TestPreview(t *testing.T) {
Title: "Google for Developers Europe (@GoogleDevEurope)",
Image: "https://pbs.twimg.com/media/FsTSN8nWwAA278D.png",
Description: "🎉 Happy Birthday @golang!\n\nDid you know that 11 years ago today Go 1 was publicly released? Join us in celebrating this day by:\n\n🎁 Checking out local meetups → https://goo.gle/3zaGgRi\n🎁 Trying out the Go Playground → https://goo.gle/3zaGurC\n\nRT if you are a fellow Gopher!",
SiteName: "FixTweet",
SiteName: "FixTweet - 🆕 x.com link? Try fixupx.com",
},
}, meta)
})
Expand All @@ -70,13 +70,13 @@ func TestPreview(t *testing.T) {
meta, err := NewClient().Preview(context.Background(), "https://vxtwitter.com/GoogleDevEurope/status/1640667303158198272")
require.NoError(t, err)
assert.Equal(t, Meta{
Title: "Google for Developers Europe on Twitter: \"🎉 Happy Birthday @golang!\n\nDid you know that 11 years ago today Go 1 was publicly released? Join us in celebrating this day by:\n\n🎁 Checking out local meetups → https://t.co/TCNAZL0oOj\n🎁 Trying out the Go Playground → https://t.co/nnkaugz32x\n\nRT if you are a fellow Gopher! https://t.co/jiE7UTMHll\" / X",
Title: "Google for Developers Europe on X: \"🎉 Happy Birthday @golang!\n\nDid you know that 11 years ago today Go 1 was publicly released? Join us in celebrating this day by:\n\n🎁 Checking out local meetups → https://t.co/TCNAZL0oOj\n🎁 Trying out the Go Playground → https://t.co/nnkaugz32x\n\nRT if you are a fellow Gopher! https://t.co/jiE7UTMHll\" / X",
OpenGraph: opengraph.OpenGraph{
Title: "Google for Developers Europe on Twitter",
Title: "Google for Developers Europe on X",
Type: "article",
Image: "https://pbs.twimg.com/media/FsTSN8nWwAA278D.png:large",
URL: "https://twitter.com/GoogleDevEurope/status/1640667303158198272",
Description: "🎉 Happy Birthday @golang!\n\nDid you know that 11 years ago today Go 1 was publicly released? Join us in celebrating this day by:\n\n🎁 Checking out local meetups → https://t.co/TCNAZL0oOj\n🎁 Trying out the Go Playground → https://t.co/nnkaugz32x\n\nRT if you are a fellow Gopher!",
Description: "🎉 Happy Birthday @golang!\n\nDid you know that 11 years ago today Go 1 was publicly released? Join us in celebrating this day by:\n\n🎁 Checking out local meetups → https://t.co/TCNAZL0oOj\n🎁 Trying out the Go Playground → https://t.co/nnkaugz32x\n\nRT if you are a fellow Gopher!",
SiteName: "Twitter",
},
}, meta)
Expand Down