2222// CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
2323
2424using System ;
25+ using System . Collections . Generic ;
26+ using System . ComponentModel ;
2527using System . Data . SqlClient ;
28+ using System . Linq ;
2629using EventFlow . Core ;
2730using EventFlow . Logs ;
2831
@@ -32,7 +35,6 @@ public class MsSqlErrorRetryStrategy : IMsSqlErrorRetryStrategy
3235 {
3336 private readonly ILog _log ;
3437 private readonly IMsSqlConfiguration _msSqlConfiguration ;
35- private static readonly Random Random = new Random ( ) ;
3638
3739 public MsSqlErrorRetryStrategy (
3840 ILog log ,
@@ -42,73 +44,124 @@ public MsSqlErrorRetryStrategy(
4244 _msSqlConfiguration = msSqlConfiguration ;
4345 }
4446
45- public virtual Retry ShouldThisBeRetried ( Exception exception , TimeSpan totalExecutionTime , int currentRetryCount )
47+ public virtual Retry ShouldThisBeRetried (
48+ Exception exception ,
49+ TimeSpan totalExecutionTime ,
50+ int currentRetryCount )
4651 {
52+ // List of possible errors inspired by Azure SqlDatabaseTransientErrorDetectionStrategy
53+
4754 var sqlException = exception as SqlException ;
48- if ( sqlException == null || currentRetryCount > 2 )
55+ if ( sqlException == null || currentRetryCount > _msSqlConfiguration . TransientRetryCount )
4956 {
5057 return Retry . No ;
5158 }
5259
53- switch ( sqlException . Number )
60+ var retry = Enumerable . Empty < Retry > ( )
61+ . Concat ( CheckErrorCode ( sqlException ) )
62+ . Concat ( CheckInnerException ( sqlException ) )
63+ . FirstOrDefault ( ) ;
64+
65+ return retry ?? Retry . No ;
66+ }
67+
68+ private IEnumerable < Retry > CheckErrorCode ( SqlException sqlException )
69+ {
70+ foreach ( SqlError sqlExceptionError in sqlException . Errors )
5471 {
55- // SQL Error Code: 40501
56- // The service is currently busy. Retry the request after 10 seconds.
57- case 40501 :
72+ // ReSharper disable once SwitchStatementMissingSomeCases
73+ switch ( sqlExceptionError . Number )
74+ {
75+ // SQL Error Code: 40501
76+ // The service is currently busy. Retry the request after 10 seconds.
77+ case 40501 :
5878 {
59- var delay = TimeSpan . FromMilliseconds ( 5000 + ( 10000 * Random . NextDouble ( ) ) ) ;
79+ var delay = _msSqlConfiguration . ServerBusyRetryDelay . PickDelay ( ) ;
6080 _log . Warning (
61- "MSSQL server returned error 40501 which means it too busy! Trying to wait {0:0.###} (random between 5 and 15 seconds) " ,
81+ "MSSQL server returned error 40501 which means it too busy and asked us to wait 10 seconds ! Trying to wait {0:0.###} seconds. " ,
6282 delay . TotalSeconds ) ;
63- return Retry . YesAfter ( delay ) ;
83+ yield return Retry . YesAfter ( delay ) ;
84+ yield break ;
6485 }
6586
66- // SQL Error Code: 40197
67- // The service has encountered an error processing your request. Please try again.
68- case 40197 :
69-
70- // SQL Error Code: 10053
71- // A transport-level error has occurred when receiving results from the server.
72- // An established connection was aborted by the software in your host machine.
73- case 10053 :
74-
75- // SQL Error Code: 10054
76- // A transport-level error has occurred when sending the request to the server.
77- // (provider: TCP Provider, error: 0 - An existing connection was forcibly closed by the remote host.)
78- case 10054 :
79-
80- // SQL Error Code: 10060
81- // A network-related or instance-specific error occurred while establishing a connection to SQL Server.
82- // The server was not found or was not accessible. Verify that the instance name is correct and that SQL Server
83- // is configured to allow remote connections. (provider: TCP Provider, error: 0 - A connection attempt failed
84- // because the connected party did not properly respond after a period of time, or established connection failed
85- // because connected host has failed to respond.)"}
86- case 10060 :
87-
88- // SQL Error Code: 40613
89- // Database XXXX on server YYYY is not currently available. Please retry the connection later. If the problem persists, contact customer
90- // support, and provide them the session tracing ID of ZZZZZ.
91- case 40613 :
92-
93- // SQL Error Code: 40143
94- // The service has encountered an error processing your request. Please try again.
95- case 40143 :
96-
97- // SQL Error Code: 233
98- // The client was unable to establish a connection because of an error during connection initialization process before login.
99- // Possible causes include the following: the client tried to connect to an unsupported version of SQL Server; the server was too busy
100- // to accept new connections; or there was a resource limitation (insufficient memory or maximum allowed connections) on the server.
101- // (provider: TCP Provider, error: 0 - An existing connection was forcibly closed by the remote host.)
102- case 233 :
103-
104- // SQL Error Code: 64
105- // A connection was successfully established with the server, but then an error occurred during the login process.
106- // (provider: TCP Provider, error: 0 - The specified network name is no longer available.)
107- case 64 :
108- return Retry . YesAfter ( _msSqlConfiguration . TransientRetryDelay . PickDelay ( ) ) ;
109-
110- default :
111- return Retry . No ;
87+ // SQL Error Code: 40613
88+ // Database XXXX on server YYYY is not currently available. Please retry the connection later. If the problem persists, contact customer
89+ // support, and provide them the session tracing ID of ZZZZZ.
90+ case 40613 :
91+
92+ // SQL Error Code: 40540
93+ // The service has encountered an error processing your request. Please try again.
94+ case 40540 :
95+
96+ // SQL Error Code: 40197
97+ // The service has encountered an error processing your request. Please try again.
98+ case 40197 :
99+
100+ // SQL Error Code: 40143
101+ // The service has encountered an error processing your request. Please try again.
102+ case 40143 :
103+
104+ // SQL Error Code: 18401
105+ // Login failed for user '%s'. Reason: Server is in script upgrade mode. Only administrator can connect at this time.
106+ // Devnote: this can happen when SQL is going through recovery (e.g. after failover)
107+ case 18401 :
108+
109+ // SQL Error Code: 10929
110+ // Resource ID: %d. The %s minimum guarantee is %d, maximum limit is %d and the current usage for the database is %d.
111+ // However, the server is currently too busy to support requests greater than %d for this database.
112+ case 10929 :
113+
114+ // SQL Error Code: 10928
115+ // Resource ID: %d. The %s limit for the database is %d and has been reached.
116+ case 10928 :
117+
118+ // SQL Error Code: 10060
119+ // A network-related or instance-specific error occurred while establishing a connection to SQL Server.
120+ // The server was not found or was not accessible. Verify that the instance name is correct and that SQL Server
121+ // is configured to allow remote connections. (provider: TCP Provider, error: 0 - A connection attempt failed
122+ // because the connected party did not properly respond after a period of time, or established connection failed
123+ // because connected host has failed to respond.)"}
124+ case 10060 :
125+
126+ // SQL Error Code: 10054
127+ // A transport-level error has occurred when sending the request to the server.
128+ // (provider: TCP Provider, error: 0 - An existing connection was forcibly closed by the remote host.)
129+ case 10054 :
130+
131+ // SQL Error Code: 10053
132+ // A transport-level error has occurred when receiving results from the server.
133+ // An established connection was aborted by the software in your host machine.
134+ case 10053 :
135+
136+ // SQL Error Code: 233
137+ // The client was unable to establish a connection because of an error during connection initialization process before login.
138+ // Possible causes include the following: the client tried to connect to an unsupported version of SQL Server; the server was too busy
139+ // to accept new connections; or there was a resource limitation (insufficient memory or maximum allowed connections) on the server.
140+ // (provider: TCP Provider, error: 0 - An existing connection was forcibly closed by the remote host.)
141+ case 233 :
142+
143+ // SQL Error Code: 64
144+ // A connection was successfully established with the server, but then an error occurred during the login process.
145+ // (provider: TCP Provider, error: 0 - The specified network name is no longer available.)
146+ case 64 :
147+ yield return Retry . YesAfter ( _msSqlConfiguration . TransientRetryDelay . PickDelay ( ) ) ;
148+ yield break ;
149+ }
150+ }
151+ }
152+
153+ private IEnumerable < Retry > CheckInnerException ( SqlException sqlException )
154+ {
155+ // Prelogin failure can happen due to waits expiring on windows handles. Or
156+ // due to bugs in the gateway code, a dropped database with a pooled connection
157+ // when reset results in a timeout error instead of immediate failure.
158+
159+ var win32Exception = sqlException . InnerException as Win32Exception ;
160+ if ( win32Exception == null ) yield break ;
161+
162+ if ( win32Exception . NativeErrorCode == 0x102 || win32Exception . NativeErrorCode == 0x121 )
163+ {
164+ yield return Retry . YesAfter ( _msSqlConfiguration . TransientRetryDelay . PickDelay ( ) ) ;
112165 }
113166 }
114167 }
0 commit comments