@@ -141,15 +141,15 @@ func (indexer *Indexer) Lookup(
141141 }
142142 numDocs = 0
143143
144- // 合并关键词和标签为搜索键
145- keywords := make ([] string , len ( tokens ) + len ( labels ))
146- copy ( keywords , tokens )
147- copy ( keywords [ len ( tokens ):], labels )
144+ mustKeywords , mustTokensLength , mustNotKeywords , isValid := getProssedQueries ( tokens , labels )
145+ if ! isValid {
146+ return
147+ }
148148
149149 indexer .tableLock .RLock ()
150150 defer indexer .tableLock .RUnlock ()
151- table := make ([]* KeywordIndices , len (keywords ))
152- for i , keyword := range keywords {
151+ table := make ([]* KeywordIndices , len (mustKeywords ))
152+ for i , keyword := range mustKeywords {
153153 indices , found := indexer .tableLock .table [keyword ]
154154 if ! found {
155155 // 当反向索引表中无此搜索键时直接返回
@@ -160,6 +160,15 @@ func (indexer *Indexer) Lookup(
160160 }
161161 }
162162
163+ // 保存must not搜索键
164+ mustNotTable := make ([]* KeywordIndices , 0 )
165+ for _ , keyword := range mustNotKeywords {
166+ indices , found := indexer .tableLock .table [keyword ]
167+ if found {
168+ mustNotTable = append (mustNotTable , indices )
169+ }
170+ }
171+
163172 // 当没有找到时直接返回
164173 if len (table ) == 0 {
165174 return
@@ -171,6 +180,7 @@ func (indexer *Indexer) Lookup(
171180 for iTable := 0 ; iTable < len (table ); iTable ++ {
172181 indexPointers [iTable ] = indexer .getIndexLength (table [iTable ]) - 1
173182 }
183+
174184 // 平均文本关键词长度,用于计算BM25
175185 avgDocLength := indexer .totalTokenLength / float32 (indexer .numDocuments )
176186 for ; indexPointers [0 ] >= 0 ; indexPointers [0 ]-- {
@@ -186,7 +196,9 @@ func (indexer *Indexer) Lookup(
186196 continue
187197 }
188198 }
199+
189200 iTable := 1
201+
190202 found := true
191203 for ; iTable < len (table ); iTable ++ {
192204 // 二分法比简单的顺序归并效率高,也有更高效率的算法,
@@ -196,7 +208,11 @@ func (indexer *Indexer) Lookup(
196208 position , foundBaseDocId := indexer .searchIndex (table [iTable ],
197209 0 , indexPointers [iTable ], baseDocId )
198210 if foundBaseDocId {
199- indexPointers [iTable ] = position
211+ if ! indexer .findInMustNotTable (mustNotTable , baseDocId ) {
212+ indexPointers [iTable ] = position
213+ } else {
214+ found = false
215+ }
200216 } else {
201217 if position == 0 {
202218 // 该搜索键中所有的文档ID都比baseDocId大,因此已经没有
@@ -211,19 +227,27 @@ func (indexer *Indexer) Lookup(
211227 }
212228 }
213229
230+ // 如果搜索键只返回一个反向表, 并且存在逻辑非搜索键
231+ // 则需要判断baseDocId是不是在逻辑非反向表中
232+ if len (table ) == 1 && len (mustNotTable ) > 0 {
233+ if indexer .findInMustNotTable (mustNotTable , baseDocId ) {
234+ found = false
235+ }
236+ }
237+
214238 if found {
215239 indexedDoc := types.IndexedDocument {}
216240
217241 // 当为LocationsIndex时计算关键词紧邻距离
218242 if indexer .initOptions .IndexType == types .LocationsIndex {
219243 // 计算有多少关键词是带有距离信息的
220244 numTokensWithLocations := 0
221- for i , t := range table [:len ( tokens ) ] {
245+ for i , t := range table [:mustTokensLength ] {
222246 if len (t.locations [indexPointers [i ]]) > 0 {
223247 numTokensWithLocations ++
224248 }
225249 }
226- if numTokensWithLocations != len ( tokens ) {
250+ if numTokensWithLocations != mustTokensLength {
227251 if ! countDocsOnly {
228252 docs = append (docs , types.IndexedDocument {
229253 DocId : baseDocId ,
@@ -234,13 +258,13 @@ func (indexer *Indexer) Lookup(
234258 }
235259
236260 // 计算搜索键在文档中的紧邻距离
237- tokenProximity , tokenLocations := computeTokenProximity (table [:len ( tokens ) ], indexPointers , tokens )
261+ tokenProximity , tokenLocations := computeTokenProximity (table [:mustTokensLength ], indexPointers , mustKeywords [: mustTokensLength ] )
238262 indexedDoc .TokenProximity = int32 (tokenProximity )
239263 indexedDoc .TokenSnippetLocations = tokenLocations
240264
241265 // 添加TokenLocations
242- indexedDoc .TokenLocations = make ([][]int , len ( tokens ) )
243- for i , t := range table [:len ( tokens ) ] {
266+ indexedDoc .TokenLocations = make ([][]int , mustTokensLength )
267+ for i , t := range table [:mustTokensLength ] {
244268 indexedDoc .TokenLocations [i ] = t.locations [indexPointers [i ]]
245269 }
246270 }
@@ -250,7 +274,7 @@ func (indexer *Indexer) Lookup(
250274 indexer .initOptions .IndexType == types .FrequenciesIndex {
251275 bm25 := float32 (0 )
252276 d := indexer .docTokenLengths [baseDocId ]
253- for i , t := range table [:len ( tokens ) ] {
277+ for i , t := range table [:mustTokensLength ] {
254278 var frequency float32
255279 if indexer .initOptions .IndexType == types .LocationsIndex {
256280 frequency = float32 (len (t.locations [indexPointers [i ]]))
@@ -423,3 +447,55 @@ func (indexer *Indexer) RemoveDoc(docId uint64) {
423447 indexer .numDocuments --
424448 indexer .tableLock .Unlock ()
425449}
450+
451+ func getProssedQueries (tokens []string , labels []string ) (
452+ []string , int , []string , bool ) {
453+ mustTokensLength := 0
454+ mustKeywords := make ([]string , 0 )
455+ mustNotKeywords := make ([]string , 0 )
456+
457+ for _ , v := range tokens {
458+ if len (v ) > 0 && v [0 :1 ] == "+" {
459+ mustKeywords = append (mustKeywords , v [1 :])
460+ mustTokensLength ++
461+ }
462+ if len (v ) > 0 && v [0 :1 ] == "-" {
463+ mustNotKeywords = append (mustNotKeywords , v [1 :])
464+ }
465+ if len (v ) > 0 && v [:1 ] != "+" && v [:1 ] != "-" {
466+ mustKeywords = append (mustKeywords , v )
467+ mustTokensLength ++
468+ }
469+ }
470+
471+ for _ , v := range labels {
472+ if len (v ) > 0 && v [0 :1 ] == "+" {
473+ mustKeywords = append (mustKeywords , v [1 :])
474+ }
475+ if len (v ) > 0 && v [0 :1 ] == "-" {
476+ mustNotKeywords = append (mustNotKeywords , v [1 :])
477+ }
478+ if len (v ) > 0 && v [:1 ] != "+" && v [:1 ] != "-" {
479+ mustKeywords = append (mustKeywords , v )
480+ }
481+ }
482+
483+ if mustTokensLength == 0 && len (mustNotKeywords ) > 0 {
484+ // 不能只包含非搜索键
485+ return mustKeywords , mustTokensLength , mustNotKeywords , false
486+ }
487+ return mustKeywords , mustTokensLength , mustNotKeywords , true
488+ }
489+
490+ // 在must not table中查找docId
491+ // 返回: 找到: true, 未找到: false
492+ func (indexer * Indexer ) findInMustNotTable (table []* KeywordIndices , docId uint64 ) bool {
493+ for i := 0 ; i < len (table ); i ++ {
494+ _ , foundDocId := indexer .searchIndex (table [i ],
495+ 0 , indexer .getIndexLength (table [i ])- 1 , docId )
496+ if foundDocId {
497+ return true
498+ }
499+ }
500+ return false
501+ }
0 commit comments