-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathclient.go
More file actions
194 lines (159 loc) · 4.68 KB
/
client.go
File metadata and controls
194 lines (159 loc) · 4.68 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
package magictext
import (
"fmt"
"log"
"strings"
"time"
"unicode/utf8"
"github.com/martinlindhe/subtitles"
"github.com/pkoukk/tiktoken-go"
"github.com/sashabaranov/go-openai"
)
const (
MaxReqTokens512 = 512
MaxReqTokens2048 = 2048
)
var (
Debug = false
MockOpenAI = false
OpenAIClient *openai.Client
TikToken *tiktoken.Tiktoken
)
type Summary struct {
ID string
Seq int
Text string
}
type CaptionSummary struct {
From time.Time
To time.Time
Summary
}
func (cs *CaptionSummary) FromInString() string {
return cs.From.Format("15:04:05")
}
func (cs *CaptionSummary) ToInString() string {
return cs.To.Format("15:04:05")
}
func (cs *CaptionSummary) FromInSeconds() int {
return cs.From.Hour()*3600 + cs.From.Minute()*60 + cs.From.Second()
}
func (cs *CaptionSummary) ToInSeconds() int {
return cs.To.Hour()*3600 + cs.To.Minute()*60 + cs.To.Second()
}
type TextChunk struct {
ID string `json:"id"`
Seq int `json:"seq"`
Text string `json:"text"`
Tokens int `json:"tokens"`
}
type CaptionChunk struct {
From time.Time `json:"from"`
To time.Time `json:"to"`
TextChunk
}
func NewCaptionChunk(seq int, text string, from, to time.Time) *CaptionChunk {
text = strings.TrimSpace(text)
cc := CaptionChunk{}
cc.ID = hashString(text)
cc.Seq = seq
cc.Text = text
cc.From = from
cc.To = to
cc.Tokens = CountTokens(text)
return &cc
}
func (c *CaptionChunk) String() string {
text := c.Text
maxLength := 60
if utf8.RuneCountInString(c.Text) > maxLength {
text = fmt.Sprintf("%s...", string([]rune(c.Text)[:maxLength-3]))
}
return fmt.Sprintf("%s <%04d> %s %s", c.ID[:8], c.Tokens, c.From.Format("15:04:05"), text)
}
// GenerateSummaryBySubtitle generates a summary for the given subtitles
func GenerateSummaryBySubtitle(topic string, subtitle subtitles.Subtitle) ([]*CaptionSummary, string, error) {
subtitleSummaries := make([]*CaptionSummary, 0, 10)
// Split subtitle into caption chunks
captionChunks, err := SplitSubtitle(subtitle)
if err != nil {
return subtitleSummaries, "", err
}
// Save caption chunks into a map, so we can get start time
// by content hash id
captionChunksMap := make(map[string]*CaptionChunk, 0)
chunks := make(ChunkSlice, 0, len(captionChunks))
for i, cc := range captionChunks {
chunks = append(chunks, NewChunk(i, cc.Text))
captionChunksMap[cc.ID] = cc
}
log.Println("Total chunks: ", len(chunks))
rootChunk, err := generateSummary(topic, chunks)
if err != nil {
return subtitleSummaries, "", err
}
randomFile := randFilename()
_ = DumpChunksToJSON("/tmp/"+randomFile+"_1.json", captionChunks)
_ = DumpChunksToJSON("/tmp/"+randomFile+"_2.json", chunks)
_ = DumpChunksToJSON("/tmp/"+randomFile+"_3.json", rootChunk)
summary := rootChunk.Text
for _, child := range rootChunk.Children {
for _, grandchild := range child.Children {
ss := &CaptionSummary{}
ss.ID = grandchild.ID
ss.Seq = grandchild.Seq
ss.Text = grandchild.Text
leafFrom, leafTo := getLeafChunk(grandchild, true), getLeafChunk(grandchild, false)
if cc, ok := captionChunksMap[leafFrom.ID]; ok {
ss.From = cc.From
}
if cc, ok := captionChunksMap[leafTo.ID]; ok {
ss.To = cc.To
}
subtitleSummaries = append(subtitleSummaries, ss)
}
}
return subtitleSummaries, summary, nil
}
func getLeafChunk(target *Chunk, isFirst bool) *Chunk {
if len(target.Children) == 0 {
return target
}
if isFirst {
return getLeafChunk(target.Children[0], isFirst)
}
return getLeafChunk(target.Children[len(target.Children)-1], isFirst)
}
// GenerateTitle generates a title for the given text, the max length of input text is 512.
func GenerateTitle(text string) (string, error) {
if tokens, ok := ValidateTokens(text, MaxReqTokens512); !ok {
return "", fmt.Errorf("The maximum tokens supported is %d, got %d", MaxReqTokens512, tokens)
}
result, err := completionWithRetry(fmt.Sprintf(GenerateTitlePrompt, text))
if err != nil {
return "", err
}
return result, nil
}
// ExtractNouns extracts nouns from a string, the max length of input text is 2048, the output
// is a json string, see following example for more information.
//
// Output string:
//
// {
// "usernames": ["吴三桂", "皇太极", "弘历"],
// "company_names": ["得到"],
// "product_names": [],
// "course_names": ["硅谷来信"],
// "book_names": ["万历十五年", "湘行散记", "货币未来"]
// }
func ExtractNouns(text string) (string, error) {
if tokens, ok := ValidateTokens(text, MaxReqTokens2048); !ok {
return "", fmt.Errorf("The maximum tokens supported is %d, got %d", MaxReqTokens2048, tokens)
}
result, err := completionWithRetry(fmt.Sprintf(ExtractNounsPrompt, text))
if err != nil {
return "", err
}
return result, nil
}