-
-
Notifications
You must be signed in to change notification settings - Fork 6
Expand file tree
/
Copy pathnsrl.go
More file actions
151 lines (130 loc) · 3.79 KB
/
nsrl.go
File metadata and controls
151 lines (130 loc) · 3.79 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
package filetrove
import (
"bufio"
"encoding/gob"
"errors"
"fmt"
"io"
"os"
"strings"
"github.com/bits-and-blooms/bloom/v3"
)
// NSRLFilter wraps a Bloom filter with NSRL metadata
type NSRLFilter struct {
Filter *bloom.BloomFilter
Version string // NSRL RDS version (e.g., "2026.03.1-modern")
HashType string // "sha1" (future: "sha256")
FPR float64 // target false positive rate
Items uint // number of hashes inserted
Subsets []string // e.g., ["modern"], ["modern", "android", "ios"]
}
// Contains checks if a given SHA1 hash is present in the NSRL Bloom filter
func (nf *NSRLFilter) Contains(sha1hash string) bool {
return nf.Filter.TestString(strings.ToLower(sha1hash))
}
// CreateNSRLBloom reads a newline-delimited SHA1 hash file and creates a Bloom filter.
// nsrlsourcefile may be "-" to read from stdin, in which case estimatedItems must be > 0.
// estimatedItems is a hint for filter sizing. If 0, the file is pre-scanned to count
// the actual number of hashes, which guarantees the target FPR is met.
// fpr is the target false positive rate (e.g., 0.0001 for 0.01%).
func CreateNSRLBloom(nsrlsourcefile string, nsrlversion string, nsrloutfile string, estimatedItems uint, fpr float64) error {
var r io.Reader
if nsrlsourcefile == "-" {
if estimatedItems == 0 {
return fmt.Errorf("--nsrl-estimate must be provided when reading from stdin")
}
r = os.Stdin
} else {
// If no estimate provided, count actual lines first so the filter is correctly sized.
if estimatedItems == 0 {
n, err := countNonEmptyLines(nsrlsourcefile)
if err != nil {
return fmt.Errorf("counting hashes: %w", err)
}
estimatedItems = n
}
f, err := os.Open(nsrlsourcefile)
if err != nil {
return err
}
defer f.Close()
r = f
}
// Ensure at least 1 to avoid zero-size filter
if estimatedItems == 0 {
estimatedItems = 1
}
filter := bloom.NewWithEstimates(estimatedItems, fpr)
var count uint
scanner := bufio.NewScanner(r)
for scanner.Scan() {
hash := strings.TrimSpace(scanner.Text())
if len(hash) == 0 {
continue
}
filter.AddString(strings.ToLower(hash))
count++
}
if err := scanner.Err(); err != nil {
return err
}
fmt.Printf("Bloom filter stats: estimated items: %d, actual items inserted: %d, target FPR: %.6f\n",
estimatedItems, count, fpr)
if count > estimatedItems {
fmt.Printf("WARNING: actual item count (%d) exceeds estimated items (%d). "+
"The real false positive rate will be significantly higher than the target %.6f. "+
"Re-create the filter with --nsrl-estimate >= %d.\n",
count, estimatedItems, fpr, count)
}
nf := NSRLFilter{
Filter: filter,
Version: nsrlversion,
HashType: "sha1",
FPR: fpr,
Items: count,
Subsets: []string{},
}
outFile, err := os.Create(nsrloutfile)
if err != nil {
return err
}
defer outFile.Close()
encoder := gob.NewEncoder(outFile)
if err := encoder.Encode(&nf); err != nil {
return err
}
return nil
}
// countNonEmptyLines counts non-empty lines in a file (single pass).
func countNonEmptyLines(path string) (uint, error) {
f, err := os.Open(path)
if err != nil {
return 0, err
}
defer f.Close()
var n uint
scanner := bufio.NewScanner(f)
for scanner.Scan() {
if strings.TrimSpace(scanner.Text()) != "" {
n++
}
}
return n, scanner.Err()
}
// LoadNSRL loads a serialized NSRLFilter from a .bloom file into memory
func LoadNSRL(nsrlbloomfile string) (*NSRLFilter, error) {
file, err := os.Open(nsrlbloomfile)
if err != nil {
return nil, err
}
defer file.Close()
var nf NSRLFilter
decoder := gob.NewDecoder(file)
if err := decoder.Decode(&nf); err != nil {
return nil, errors.New("could not decode NSRL bloom filter: " + err.Error())
}
if nf.Filter == nil {
return nil, errors.New("NSRL bloom filter is empty or corrupt")
}
return &nf, nil
}