fix: correctly ignore class values in HTML

jdkato · jdkato · commit 41f3b223e8cc · 2026-03-17T09:39:45.000-07:00
Signed-off-by: Joseph Kato <joseph@jdkato.io> #1086
diff --git a/internal/lint/walk.go b/internal/lint/walk.go
@@ -34,13 +34,18 @@ type walker struct {
 
 	begin int
 	end   int
+
+	// ext holds the file extension of the current file.
+	ext string
 }
 
 func newWalker(f *core.File, raw []byte, offset int) *walker {
 	return &walker{
 		lines:   len(f.Lines) + offset,
 		context: string2ByteSlice(f.Content),
-		z:       html.NewTokenizer(bytes.NewReader(raw))}
+		z:       html.NewTokenizer(bytes.NewReader(raw)),
+		ext:     f.NormedExt,
+	}
 }
 
 func (w *walker) sub(sub string, char rune) bool {
@@ -137,10 +142,20 @@ func (w *walker) walk() (html.TokenType, html.Token, string) {
 
 func (w *walker) replaceToks(tok html.Token) {
 	tags := core.StringInSlice(tok.Data, []string{
-		"img", "a", "p", "script", "h1", "h2", "h3", "h4", "h5", "h6"})
+		"img", "a", "p", "script", "h1", "h2", "h3", "h4", "h5", "h6", "span"})
 	if tags {
+		names := []string{"href", "id", "src", "alt"}
+		if w.ext == ".html" {
+			// We need to handle cases in which inline tags include `class` attributes, which may
+			// contain substrings that match our actual findings. The challenge is that many of our
+			// supported formats inject these *after* converting to HTML, so we can't find them in
+			// the original text.
+			//
+			// See testdata/fixtures/patterns/{test2.rst, test3.html} for examples.
+			names = append(names, "class")
+		}
 		for _, a := range tok.Attr {
-			if core.StringInSlice(a.Key, []string{"href", "id", "src", "alt"}) {
+			if core.StringInSlice(a.Key, names) {
 				if a.Key == "href" {
 					a.Val, _ = url.QueryUnescape(a.Val)
 				}
diff --git a/testdata/features/patterns.feature b/testdata/features/patterns.feature
@@ -53,7 +53,17 @@ Feature: IgnorePatterns
         When I test patterns for "test.html"
         Then the output should contain exactly:
             """
-            test.html:22:11:Vale.Repetition:'bye' is repeated!
+            test.html:22:13:Vale.Repetition:'bye' is repeated!
+            """
+        And the exit status should be 1
+
+    Scenario: HTML with Ignored Classes
+        When I test patterns for "test3.html"
+        Then the output should contain exactly:
+            """
+            test3.html:30:75:write-good.We:Try to avoid using first-person plural like 'us'.
+            test3.html:32:98:Vale.Spelling:Did you really mean 'hasChildren'?
+            test3.html:33:64:write-good.We:Try to avoid using first-person plural like 'us'.
             """
         And the exit status should be 1
 
diff --git a/testdata/fixtures/patterns/.vale.ini b/testdata/fixtures/patterns/.vale.ini
@@ -4,10 +4,14 @@ MinAlertLevel = suggestion
 IgnoredScopes = code, tt, strong, text.comment.line
 IgnoredClasses = metrics, blurb, alignedsummary
 
-[*]
+[*.{md,rst,adoc,org,html,py}]
 BasedOnStyles = Vale
 
-[*.{md,rst,adoc,org,mdx}]
+[test3.html]
+write-good.We = YES
+Vale.Repetition = NO
+
+[*.{md,rst,adoc,org,mdx,py}]
 TokenIgnores = (\$+?[^\d][^\n$]+\$+?), (<http[^\n]+>+?)
 IgnorePatterns = (?s) *({{< ?file(?:-excerpt)? [^>]* ?>}}.*?{{< ?/file(?:-excerpt)? ?>}})
 
diff --git a/testdata/fixtures/patterns/test.html b/testdata/fixtures/patterns/test.html
@@ -13,29 +13,29 @@
 </div>
 
 <div class="blurb">
-  <svg>...</svg>
-  Thanks! bye bye
+    <svg>...</svg>
+    Thanks! bye bye
 </div>
 
 <div class="okay">
-  <svg>...</svg>
-  Thanks! bye bye
+    <svg>...</svg>
+    Thanks! bye bye
 </div>
 
 <table class="blurb">
-  <tr>
-    <th>Company</th>
-    <th>Contact</th>
-    <th>Country</th>
-  </tr>
-  <tr>
-    <td>Alfreds Futterkiste</td>
-    <td>Maria Anders</td>
-    <td>Germany</td>
-  </tr>
-  <tr>
-    <td>Centro comercial Moctezuma</td>
-    <td>Francisco Chang</td>
-    <td>Mexico</td>
-  </tr>
-</table>
+    <tr>
+        <th>Company</th>
+        <th>Contact</th>
+        <th>Country</th>
+    </tr>
+    <tr>
+        <td>Alfreds Futterkiste</td>
+        <td>Maria Anders</td>
+        <td>Germany</td>
+    </tr>
+    <tr>
+        <td>Centro comercial Moctezuma</td>
+        <td>Francisco Chang</td>
+        <td>Mexico</td>
+    </tr>
+</table>
diff --git a/testdata/fixtures/patterns/test3.html b/testdata/fixtures/patterns/test3.html
@@ -0,0 +1,47 @@
+<html lang="en">
+
+<body class="qt-design-system" style="background:var(--content-bg-color)" onload="prettyPrint()">
+    <div data-global-resource-path="qt-design-system/components/b-sidebar.html">
+        <div class="b-sidebar b-sidebar--full-width">
+            <div class="b-sidebar__content">
+                <div class="b-sidebar__content__wrapper">
+                    <article class="b-sidebar__content__left">
+                        <div class="context mainContent" style="margin: 0;padding: 0; background: 0;">
+
+
+                            <div class="context">
+
+                                <div class="table">
+                                    <table class="alignedsummary requisites" translate="no">
+                                        <tr>
+                                            <td class="memItemRight bottomAlign"><span
+                                                    class="status preliminary"></span></td>
+                                        </tr>
+                                    </table>
+                                </div>
+
+                                <div>
+                                    <p><a href="">data</a>
+                                        <a href="">at</a> <a href="">data</a>, <a href="">at</a>
+                                        <a href="">at</a>
+                                    </p>
+                                </div>
+
+                                <p>Iterating over the mutable rows allows us to modify individual items.</p>
+                                <p>When iterating over a tree, the row wrapper has two additional member functions,
+                                    <a href="qrangemodeladapter.html#hasChildren" translate="no">hasChildren</a>()
+                                    and children(), that allow us to traverse the entire tree using iterators.
+                                </p>
+
+                            </div>
+
+                    </article>
+
+                </div>
+            </div>
+        </div>
+    </div>
+
+</body>
+
+</html>
diff --git a/testdata/fixtures/patterns/test4.html b/testdata/fixtures/patterns/test4.html
diff --git a/testdata/styles/write-good/We.yml b/testdata/styles/write-good/We.yml