From 5d9039df1cab9ffe1c8a08183cbee77f5c0d5bce Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Pen=CC=83alba?= Date: Thu, 11 Dec 2025 10:16:05 +0100 Subject: [PATCH 01/19] Update to .net8 --- src/TextMateSharp.Demo/TextMateSharp.Demo.csproj | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/TextMateSharp.Demo/TextMateSharp.Demo.csproj b/src/TextMateSharp.Demo/TextMateSharp.Demo.csproj index 1edf6a8..f648298 100644 --- a/src/TextMateSharp.Demo/TextMateSharp.Demo.csproj +++ b/src/TextMateSharp.Demo/TextMateSharp.Demo.csproj @@ -2,7 +2,7 @@ Exe - net6.0 + net8.0 False enable From 3b6f3575922746a6350eed71d4a324016174c26b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Pen=CC=83alba?= Date: Thu, 11 Dec 2025 10:16:16 +0100 Subject: [PATCH 02/19] Update to Onigwrap 1.0.9 --- build/Directory.Build.props | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build/Directory.Build.props b/build/Directory.Build.props index d2bc446..7d9da74 100644 --- a/build/Directory.Build.props +++ b/build/Directory.Build.props @@ -3,6 +3,6 @@ latest true 8.0.5 - 1.0.8 + 1.0.9 From dba4dfccc1fc67e247d31792d2c6f2e7f943956d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Pen=CC=83alba?= Date: Thu, 11 Dec 2025 11:28:56 +0100 Subject: [PATCH 03/19] Target .net8 --- .../TextMateSharp.Grammars.Tests.csproj | 2 +- src/TextMateSharp.Tests/TextMateSharp.Tests.csproj | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/TextMateSharp.Grammars.Tests/TextMateSharp.Grammars.Tests.csproj b/src/TextMateSharp.Grammars.Tests/TextMateSharp.Grammars.Tests.csproj index 449f714..3db4421 100644 --- a/src/TextMateSharp.Grammars.Tests/TextMateSharp.Grammars.Tests.csproj +++ b/src/TextMateSharp.Grammars.Tests/TextMateSharp.Grammars.Tests.csproj @@ -1,7 +1,7 @@ - net6.0 + net8.0 false True ..\TextMateSharp.snk diff --git a/src/TextMateSharp.Tests/TextMateSharp.Tests.csproj b/src/TextMateSharp.Tests/TextMateSharp.Tests.csproj index ad51fbf..61499ce 100644 --- a/src/TextMateSharp.Tests/TextMateSharp.Tests.csproj +++ b/src/TextMateSharp.Tests/TextMateSharp.Tests.csproj @@ -1,7 +1,7 @@  - net6.0 + net8.0 False True ..\TextMateSharp.snk From 08903063f7e493b85c631f3935d2999250ec5317 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Pen=CC=83alba?= Date: Thu, 11 Dec 2025 14:11:12 +0100 Subject: [PATCH 04/19] Change public APIs to get LineText instead string Line text can be implicitly converted from string and from ReadonlyMemory --- src/TextMateSharp.Tests/Model/TMModelTests.cs | 2 +- src/TextMateSharp/Grammar/IGrammar.cs | 8 ++-- src/TextMateSharp/Grammar/LineText.cs | 33 +++++++++++++++++ .../Internal/Grammars/Grammar.cs | 37 ++++++++++++------- .../Internal/Grammars/LineTokenizer.cs | 18 ++++----- .../Internal/Grammars/LineTokens.cs | 5 ++- .../Internal/Rules/BeginEndRule.cs | 3 +- .../Internal/Rules/BeginWhileRule.cs | 3 +- .../Internal/Rules/RegExpSource.cs | 4 +- src/TextMateSharp/Internal/Rules/Rule.cs | 5 ++- .../Internal/Utils/RegexSource.cs | 6 +-- .../Internal/Utils/StringUtils.cs | 18 ++++++++- src/TextMateSharp/Model/AbstractLineList.cs | 4 +- src/TextMateSharp/Model/IModelLines.cs | 4 +- .../Model/ITokenizationSupport.cs | 9 ++--- src/TextMateSharp/Model/TMModel.cs | 4 +- src/TextMateSharp/Model/Tokenizer.cs | 13 ++++--- 17 files changed, 122 insertions(+), 54 deletions(-) create mode 100644 src/TextMateSharp/Grammar/LineText.cs diff --git a/src/TextMateSharp.Tests/Model/TMModelTests.cs b/src/TextMateSharp.Tests/Model/TMModelTests.cs index e02e1c8..5e6f901 100644 --- a/src/TextMateSharp.Tests/Model/TMModelTests.cs +++ b/src/TextMateSharp.Tests/Model/TMModelTests.cs @@ -125,7 +125,7 @@ public override int GetLineLength(int lineIndex) { return _lines[lineIndex].Length; } - public override string GetLineText(int lineIndex) + public override LineText GetLineText(int lineIndex) { return _lines[lineIndex]; } diff --git a/src/TextMateSharp/Grammar/IGrammar.cs b/src/TextMateSharp/Grammar/IGrammar.cs index e82df51..05e0d03 100644 --- a/src/TextMateSharp/Grammar/IGrammar.cs +++ b/src/TextMateSharp/Grammar/IGrammar.cs @@ -9,9 +9,9 @@ public interface IGrammar string GetName(); string GetScopeName(); ICollection GetFileTypes(); - ITokenizeLineResult TokenizeLine(string lineText); - ITokenizeLineResult TokenizeLine(string lineText, IStateStack prevState, TimeSpan timeLimit); - ITokenizeLineResult2 TokenizeLine2(string lineText); - ITokenizeLineResult2 TokenizeLine2(string lineText, IStateStack prevState, TimeSpan timeLimit); + ITokenizeLineResult TokenizeLine(LineText lineText); + ITokenizeLineResult TokenizeLine(LineText lineText, IStateStack prevState, TimeSpan timeLimit); + ITokenizeLineResult2 TokenizeLine2(LineText lineText); + ITokenizeLineResult2 TokenizeLine2(LineText lineText, IStateStack prevState, TimeSpan timeLimit); } } \ No newline at end of file diff --git a/src/TextMateSharp/Grammar/LineText.cs b/src/TextMateSharp/Grammar/LineText.cs new file mode 100644 index 0000000..e78c1c0 --- /dev/null +++ b/src/TextMateSharp/Grammar/LineText.cs @@ -0,0 +1,33 @@ +using System; + +namespace TextMateSharp.Grammars +{ + public readonly struct LineText + { + private readonly ReadOnlyMemory _memory; + + public LineText(ReadOnlyMemory memory) + { + _memory = memory; + } + + public LineText(string text) + { + _memory = text?.AsMemory() ?? ReadOnlyMemory.Empty; + } + + public ReadOnlyMemory Memory => _memory; + + public int Length => _memory.Length; + + public bool IsEmpty => _memory.IsEmpty; + + public static implicit operator LineText(string text) => new LineText(text); + + public static implicit operator LineText(ReadOnlyMemory memory) => new LineText(memory); + + public static implicit operator ReadOnlyMemory(LineText lineText) => lineText._memory; + + public override string ToString() => _memory.Span.ToString(); + } +} diff --git a/src/TextMateSharp/Internal/Grammars/Grammar.cs b/src/TextMateSharp/Internal/Grammars/Grammar.cs index 8700f84..4651538 100644 --- a/src/TextMateSharp/Internal/Grammars/Grammar.cs +++ b/src/TextMateSharp/Internal/Grammars/Grammar.cs @@ -200,27 +200,27 @@ private IRawGrammar Clone(IRawGrammar grammar) return (IRawGrammar)((Raw)grammar).Clone(); } - public ITokenizeLineResult TokenizeLine(string lineText) + public ITokenizeLineResult TokenizeLine(LineText lineText) { return TokenizeLine(lineText, null, TimeSpan.MaxValue); } - public ITokenizeLineResult TokenizeLine(string lineText, IStateStack prevState, TimeSpan timeLimit) + public ITokenizeLineResult TokenizeLine(LineText lineText, IStateStack prevState, TimeSpan timeLimit) { - return (ITokenizeLineResult)Tokenize(lineText, (StateStack)prevState, false, timeLimit); + return (ITokenizeLineResult)Tokenize(lineText.Memory, (StateStack)prevState, false, timeLimit); } - public ITokenizeLineResult2 TokenizeLine2(string lineText) + public ITokenizeLineResult2 TokenizeLine2(LineText lineText) { return TokenizeLine2(lineText, null, TimeSpan.MaxValue); } - public ITokenizeLineResult2 TokenizeLine2(string lineText, IStateStack prevState, TimeSpan timeLimit) + public ITokenizeLineResult2 TokenizeLine2(LineText lineText, IStateStack prevState, TimeSpan timeLimit) { - return (ITokenizeLineResult2)Tokenize(lineText, (StateStack)prevState, true, timeLimit); + return (ITokenizeLineResult2)Tokenize(lineText.Memory, (StateStack)prevState, true, timeLimit); } - private object Tokenize(string lineText, StateStack prevState, bool emitBinaryTokens, TimeSpan timeLimit) + private object Tokenize(ReadOnlyMemory lineText, StateStack prevState, bool emitBinaryTokens, TimeSpan timeLimit) { if (this._rootId == null) { @@ -237,7 +237,7 @@ private object Tokenize(string lineText, StateStack prevState, bool emitBinaryTo rawDefaultMetadata.TokenType, null, defaultTheme.fontStyle, defaultTheme.foreground, defaultTheme.background); - string rootScopeName = this.GetRule(this._rootId)?.GetName(null, null); + string rootScopeName = this.GetRule(this._rootId)?.GetName(ReadOnlyMemory.Empty, null); if (rootScopeName == null) return null; BasicScopeAttributes rawRootMetadata = this._basicScopeAttributesProvider.GetBasicScopeAttributes(rootScopeName); @@ -253,14 +253,25 @@ private object Tokenize(string lineText, StateStack prevState, bool emitBinaryTo prevState.Reset(); } - if (string.IsNullOrEmpty(lineText) || lineText[lineText.Length - 1] != '\n') + // Check if we need to append newline + ReadOnlyMemory effectiveLineText; + if (lineText.Length == 0 || lineText.Span[lineText.Length - 1] != '\n') { // Only add \n if the passed lineText didn't have it. - lineText += '\n'; + // We need to allocate a new buffer with the newline + char[] buffer = new char[lineText.Length + 1]; + lineText.Span.CopyTo(buffer); + buffer[lineText.Length] = '\n'; + effectiveLineText = buffer.AsMemory(); } - int lineLength = lineText.Length; - LineTokens lineTokens = new LineTokens(emitBinaryTokens, lineText, _tokenTypeMatchers, _balancedBracketSelectors); - TokenizeStringResult tokenizeResult = LineTokenizer.TokenizeString(this, lineText, isFirstLine, 0, prevState, + else + { + effectiveLineText = lineText; + } + + int lineLength = effectiveLineText.Length; + LineTokens lineTokens = new LineTokens(emitBinaryTokens, effectiveLineText, _tokenTypeMatchers, _balancedBracketSelectors); + TokenizeStringResult tokenizeResult = LineTokenizer.TokenizeString(this, effectiveLineText, isFirstLine, 0, prevState, lineTokens, true, timeLimit); if (emitBinaryTokens) diff --git a/src/TextMateSharp/Internal/Grammars/LineTokenizer.cs b/src/TextMateSharp/Internal/Grammars/LineTokenizer.cs index 05095e0..33ed2bf 100644 --- a/src/TextMateSharp/Internal/Grammars/LineTokenizer.cs +++ b/src/TextMateSharp/Internal/Grammars/LineTokenizer.cs @@ -13,7 +13,7 @@ namespace TextMateSharp.Internal.Grammars class LineTokenizer { private Grammar _grammar; - private string _lineText; + private ReadOnlyMemory _lineText; private bool _isFirstLine; private int _linePos; private StateStack _stack; @@ -22,7 +22,7 @@ class LineTokenizer private bool _stop; private int _lineLength; - public LineTokenizer(Grammar grammar, string lineText, bool isFirstLine, int linePos, StateStack stack, + public LineTokenizer(Grammar grammar, ReadOnlyMemory lineText, bool isFirstLine, int linePos, StateStack stack, LineTokens lineTokens) { this._grammar = grammar; @@ -253,7 +253,7 @@ private void ScanNext() } } - private MatchResult MatchRule(Grammar grammar, string lineText, in bool isFirstLine, in int linePos, + private MatchResult MatchRule(Grammar grammar, ReadOnlyMemory lineText, in bool isFirstLine, in int linePos, StateStack stack, in int anchorPosition) { Rule rule = stack.GetRule(grammar); @@ -277,7 +277,7 @@ private MatchResult MatchRule(Grammar grammar, string lineText, in bool isFirstL return null; } - private MatchResult MatchRuleOrInjections(Grammar grammar, string lineText, bool isFirstLine, + private MatchResult MatchRuleOrInjections(Grammar grammar, ReadOnlyMemory lineText, bool isFirstLine, in int linePos, StateStack stack, in int anchorPosition) { // Look for normal grammar rule @@ -319,7 +319,7 @@ private MatchResult MatchRuleOrInjections(Grammar grammar, string lineText, bool return matchResult; } - private MatchInjectionsResult MatchInjections(List injections, Grammar grammar, string lineText, + private MatchInjectionsResult MatchInjections(List injections, Grammar grammar, ReadOnlyMemory lineText, bool isFirstLine, in int linePos, StateStack stack, in int anchorPosition) { // The lower the better @@ -383,7 +383,7 @@ private MatchInjectionsResult MatchInjections(List injections, Gramma return null; } - private void HandleCaptures(Grammar grammar, string lineText, bool isFirstLine, StateStack stack, + private void HandleCaptures(Grammar grammar, ReadOnlyMemory lineText, bool isFirstLine, StateStack stack, LineTokens lineTokens, List captures, IOnigCaptureIndex[] captureIndices) { if (captures.Count == 0) @@ -457,7 +457,7 @@ private void HandleCaptures(Grammar grammar, string lineText, bool isFirstLine, contentNameScopesList); TokenizeString(grammar, - lineText.SubstringAtIndexes(0, captureIndex.End), + lineText.SliceAtIndexes(0, captureIndex.End), (isFirstLine && captureIndex.Start == 0), captureIndex.Start, stackClone, lineTokens, false, TimeSpan.MaxValue); continue; } @@ -488,7 +488,7 @@ private void HandleCaptures(Grammar grammar, string lineText, bool isFirstLine, * order. If any fails, cut off the entire stack above the failed while * condition. While conditions may also advance the linePosition. */ - private WhileCheckResult CheckWhileConditions(Grammar grammar, string lineText, bool isFirstLine, + private WhileCheckResult CheckWhileConditions(Grammar grammar, ReadOnlyMemory lineText, bool isFirstLine, int linePos, StateStack stack, LineTokens lineTokens) { int anchorPosition = stack.BeginRuleCapturedEOL ? 0 : -1; @@ -541,7 +541,7 @@ private WhileCheckResult CheckWhileConditions(Grammar grammar, string lineText, return new WhileCheckResult(stack, linePos, anchorPosition, isFirstLine); } - public static TokenizeStringResult TokenizeString(Grammar grammar, string lineText, bool isFirstLine, int linePos, + public static TokenizeStringResult TokenizeString(Grammar grammar, ReadOnlyMemory lineText, bool isFirstLine, int linePos, StateStack stack, LineTokens lineTokens, bool checkWhileConditions, TimeSpan timeLimit) { return new LineTokenizer(grammar, lineText, isFirstLine, linePos, stack, lineTokens).Scan(checkWhileConditions, timeLimit); diff --git a/src/TextMateSharp/Internal/Grammars/LineTokens.cs b/src/TextMateSharp/Internal/Grammars/LineTokens.cs index d535d46..dcf9f34 100644 --- a/src/TextMateSharp/Internal/Grammars/LineTokens.cs +++ b/src/TextMateSharp/Internal/Grammars/LineTokens.cs @@ -1,3 +1,4 @@ +using System; using System.Collections.Generic; using TextMateSharp.Grammars; using TextMateSharp.Themes; @@ -6,7 +7,7 @@ namespace TextMateSharp.Internal.Grammars { internal class LineTokens { - private string _lineText; + private ReadOnlyMemory _lineText; // used only if `_emitBinaryTokens` is false. private List _tokens; @@ -23,7 +24,7 @@ internal class LineTokens internal LineTokens( bool emitBinaryTokens, - string lineText, + ReadOnlyMemory lineText, List tokenTypeOverrides, BalancedBracketSelectors balancedBracketSelectors) { diff --git a/src/TextMateSharp/Internal/Rules/BeginEndRule.cs b/src/TextMateSharp/Internal/Rules/BeginEndRule.cs index a925a71..38d0259 100644 --- a/src/TextMateSharp/Internal/Rules/BeginEndRule.cs +++ b/src/TextMateSharp/Internal/Rules/BeginEndRule.cs @@ -1,3 +1,4 @@ +using System; using System.Collections.Generic; using Onigwrap; @@ -33,7 +34,7 @@ public BeginEndRule(RuleId id, string name, string contentName, string begin, Li _cachedCompiledPatterns = null; } - public string GetEndWithResolvedBackReferences(string lineText, IOnigCaptureIndex[] captureIndices) + public string GetEndWithResolvedBackReferences(ReadOnlyMemory lineText, IOnigCaptureIndex[] captureIndices) { return this._end.ResolveBackReferences(lineText, captureIndices); } diff --git a/src/TextMateSharp/Internal/Rules/BeginWhileRule.cs b/src/TextMateSharp/Internal/Rules/BeginWhileRule.cs index f3d52c5..99290e2 100644 --- a/src/TextMateSharp/Internal/Rules/BeginWhileRule.cs +++ b/src/TextMateSharp/Internal/Rules/BeginWhileRule.cs @@ -1,3 +1,4 @@ +using System; using System.Collections.Generic; using Onigwrap; @@ -33,7 +34,7 @@ public BeginWhileRule(RuleId id, string name, string contentName, string begin, _cachedCompiledWhilePatterns = null; } - public string getWhileWithResolvedBackReferences(string lineText, IOnigCaptureIndex[] captureIndices) + public string getWhileWithResolvedBackReferences(ReadOnlyMemory lineText, IOnigCaptureIndex[] captureIndices) { return this._while.ResolveBackReferences(lineText, captureIndices); } diff --git a/src/TextMateSharp/Internal/Rules/RegExpSource.cs b/src/TextMateSharp/Internal/Rules/RegExpSource.cs index faef2ca..a3dee4a 100644 --- a/src/TextMateSharp/Internal/Rules/RegExpSource.cs +++ b/src/TextMateSharp/Internal/Rules/RegExpSource.cs @@ -119,7 +119,7 @@ private void HandleAnchors(string regExpSource) } } - public string ResolveBackReferences(string lineText, IOnigCaptureIndex[] captureIndices) + public string ResolveBackReferences(ReadOnlyMemory lineText, IOnigCaptureIndex[] captureIndices) { List capturedValues = new List(); @@ -151,7 +151,7 @@ public string ResolveBackReferences(string lineText, IOnigCaptureIndex[] capture System.Diagnostics.Debug.WriteLine(ex.Message); } - return lineText; + return lineText.Span.ToString(); } private string EscapeRegExpCharacters(string value) diff --git a/src/TextMateSharp/Internal/Rules/Rule.cs b/src/TextMateSharp/Internal/Rules/Rule.cs index 0641e23..a238361 100644 --- a/src/TextMateSharp/Internal/Rules/Rule.cs +++ b/src/TextMateSharp/Internal/Rules/Rule.cs @@ -1,3 +1,4 @@ +using System; using Onigwrap; using TextMateSharp.Internal.Utils; @@ -24,7 +25,7 @@ public Rule(RuleId id, string name, string contentName) _contentNameIsCapturing = RegexSource.HasCaptures(this._contentName); } - public string GetName(string lineText, IOnigCaptureIndex[] captureIndices) + public string GetName(ReadOnlyMemory lineText, IOnigCaptureIndex[] captureIndices) { if (!this._nameIsCapturing) { @@ -34,7 +35,7 @@ public string GetName(string lineText, IOnigCaptureIndex[] captureIndices) return RegexSource.ReplaceCaptures(this._name, lineText, captureIndices); } - public string GetContentName(string lineText, IOnigCaptureIndex[] captureIndices) + public string GetContentName(ReadOnlyMemory lineText, IOnigCaptureIndex[] captureIndices) { if (!this._contentNameIsCapturing) { diff --git a/src/TextMateSharp/Internal/Utils/RegexSource.cs b/src/TextMateSharp/Internal/Utils/RegexSource.cs index b8c26c3..762948b 100644 --- a/src/TextMateSharp/Internal/Utils/RegexSource.cs +++ b/src/TextMateSharp/Internal/Utils/RegexSource.cs @@ -62,13 +62,13 @@ public static bool HasCaptures(string regexSource) return CAPTURING_REGEX_SOURCE.Match(regexSource).Success; } - public static string ReplaceCaptures(string regexSource, string captureSource, IOnigCaptureIndex[] captureIndices) + public static string ReplaceCaptures(string regexSource, ReadOnlyMemory captureSource, IOnigCaptureIndex[] captureIndices) { return CAPTURING_REGEX_SOURCE.Replace( regexSource, m => GetReplacement(m.Value, captureSource, captureIndices)); } - private static string GetReplacement(string match, string captureSource, IOnigCaptureIndex[] captureIndices) + private static string GetReplacement(string match, ReadOnlyMemory captureSource, IOnigCaptureIndex[] captureIndices) { int index = -1; string command = null; @@ -82,7 +82,7 @@ private static string GetReplacement(string match, string captureSource, IOnigCa { index = int.Parse(match.SubstringAtIndexes(1, match.Length)); } - IOnigCaptureIndex capture = captureIndices.Length > index ? captureIndices[index] : null; + IOnigCaptureIndex capture = captureIndices != null && captureIndices.Length > index ? captureIndices[index] : null; if (capture != null) { string result = captureSource.SubstringAtIndexes(capture.Start, capture.End); diff --git a/src/TextMateSharp/Internal/Utils/StringUtils.cs b/src/TextMateSharp/Internal/Utils/StringUtils.cs index 991d221..0047324 100644 --- a/src/TextMateSharp/Internal/Utils/StringUtils.cs +++ b/src/TextMateSharp/Internal/Utils/StringUtils.cs @@ -1,4 +1,5 @@ -using System.Collections.Generic; +using System; +using System.Collections.Generic; using System.Text.RegularExpressions; namespace TextMateSharp.Internal.Utils @@ -15,6 +16,21 @@ internal static string SubstringAtIndexes(this string str, int startIndex, int e return str.Substring(startIndex, endIndex - startIndex); } + internal static ReadOnlyMemory SliceAtIndexes(this ReadOnlyMemory memory, int startIndex, int endIndex) + { + return memory.Slice(startIndex, endIndex - startIndex); + } + + internal static ReadOnlySpan SliceAtIndexes(this ReadOnlySpan span, int startIndex, int endIndex) + { + return span.Slice(startIndex, endIndex - startIndex); + } + + internal static string SubstringAtIndexes(this ReadOnlyMemory memory, int startIndex, int endIndex) + { + return memory.Slice(startIndex, endIndex - startIndex).Span.ToString(); + } + internal static bool IsValidHexColor(string hex) { if (hex == null || hex.Length < 1) diff --git a/src/TextMateSharp/Model/AbstractLineList.cs b/src/TextMateSharp/Model/AbstractLineList.cs index 56d2c57..fb255da 100644 --- a/src/TextMateSharp/Model/AbstractLineList.cs +++ b/src/TextMateSharp/Model/AbstractLineList.cs @@ -1,6 +1,8 @@ using System; using System.Collections.Generic; +using TextMateSharp.Grammars; + namespace TextMateSharp.Model { public abstract class AbstractLineList : IModelLines @@ -94,7 +96,7 @@ public int GetSize() public abstract int GetNumberOfLines(); - public abstract string GetLineText(int lineIndex); + public abstract LineText GetLineText(int lineIndex); public abstract int GetLineLength(int lineIndex); diff --git a/src/TextMateSharp/Model/IModelLines.cs b/src/TextMateSharp/Model/IModelLines.cs index 593592e..80f7568 100644 --- a/src/TextMateSharp/Model/IModelLines.cs +++ b/src/TextMateSharp/Model/IModelLines.cs @@ -1,5 +1,7 @@ using System; +using TextMateSharp.Grammars; + namespace TextMateSharp.Model { public interface IModelLines @@ -11,7 +13,7 @@ public interface IModelLines ModelLine Get(int lineIndex); void ForEach(Action action); int GetNumberOfLines(); - string GetLineText(int lineIndex); + LineText GetLineText(int lineIndex); int GetLineLength(int lineIndex); void Dispose(); } diff --git a/src/TextMateSharp/Model/ITokenizationSupport.cs b/src/TextMateSharp/Model/ITokenizationSupport.cs index 81cb75a..813bb0f 100644 --- a/src/TextMateSharp/Model/ITokenizationSupport.cs +++ b/src/TextMateSharp/Model/ITokenizationSupport.cs @@ -1,14 +1,13 @@ using System; +using TextMateSharp.Grammars; + namespace TextMateSharp.Model { public interface ITokenizationSupport { TMState GetInitialState(); - - LineTokens Tokenize(string line, TMState state, TimeSpan timeLimit); - - LineTokens Tokenize(string line, TMState state, int offsetDelta, int maxLen, TimeSpan timeLimit); - + LineTokens Tokenize(LineText line, TMState state, TimeSpan timeLimit); + LineTokens Tokenize(LineText line, TMState state, int offsetDelta, int maxLen, TimeSpan timeLimit); } } \ No newline at end of file diff --git a/src/TextMateSharp/Model/TMModel.cs b/src/TextMateSharp/Model/TMModel.cs index 5550583..b76199b 100644 --- a/src/TextMateSharp/Model/TMModel.cs +++ b/src/TextMateSharp/Model/TMModel.cs @@ -190,12 +190,12 @@ public int UpdateTokensInRange(ModelTokensChangedEventBuilder eventBuilder, int int endStateIndex = lineIndex + 1; LineTokens r = null; - string text = null; + LineText text = default; ModelLine modeLine = model._lines.Get(lineIndex); try { text = model._lines.GetLineText(lineIndex); - if (text == null) + if (text.IsEmpty) continue; // Tokenize only the first X characters r = model._tokenizer.Tokenize(text, modeLine.State, 0, MAX_LEN_TO_TOKENIZE, stopLineTokenizationAfter); diff --git a/src/TextMateSharp/Model/Tokenizer.cs b/src/TextMateSharp/Model/Tokenizer.cs index 2859b92..d0caeda 100644 --- a/src/TextMateSharp/Model/Tokenizer.cs +++ b/src/TextMateSharp/Model/Tokenizer.cs @@ -21,22 +21,23 @@ public TMState GetInitialState() return new TMState(null, null); } - public LineTokens Tokenize(string line, TMState state, TimeSpan timeLimit) + public LineTokens Tokenize(LineText line, TMState state, TimeSpan timeLimit) { return Tokenize(line, state, 0, 0, timeLimit); } - public LineTokens Tokenize(string line, TMState state, int offsetDelta, int maxLen, TimeSpan timeLimit) + public LineTokens Tokenize(LineText line, TMState state, int offsetDelta, int maxLen, TimeSpan timeLimit) { if (_grammar == null) return null; TMState freshState = state != null ? state.Clone() : GetInitialState(); - if (line.Length > 0 && line.Length > maxLen) - line = line.Substring(0, maxLen); + ReadOnlyMemory effectiveLine = line.Memory; + if (maxLen > 0 && effectiveLine.Length > maxLen) + effectiveLine = effectiveLine.Slice(0, maxLen); - ITokenizeLineResult textMateResult = _grammar.TokenizeLine(line, freshState.GetRuleStack(), timeLimit); + ITokenizeLineResult textMateResult = _grammar.TokenizeLine(effectiveLine, freshState.GetRuleStack(), timeLimit); freshState.SetRuleStack(textMateResult.RuleStack); // Create the result early and fill in the tokens later @@ -57,7 +58,7 @@ public LineTokens Tokenize(string line, TMState state, int offsetDelta, int maxL lastTokenType = tokenType; } } - return new LineTokens(tokens, offsetDelta + line.Length, freshState); + return new LineTokens(tokens, offsetDelta + effectiveLine.Length, freshState); } private string DecodeTextMateToken(DecodeMap decodeMap, List scopes) From a99cc2f943583803839e42df8ffa93328839167c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Pen=CC=83alba?= Date: Thu, 11 Dec 2025 14:11:15 +0100 Subject: [PATCH 05/19] Update .gitignore --- .gitignore | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.gitignore b/.gitignore index 75f0aa7..e773ddd 100644 --- a/.gitignore +++ b/.gitignore @@ -372,3 +372,7 @@ src/.vscode/launch.json .idea/.idea.TextMateSharp.dir/.idea/indexLayout.xml .idea/.idea.TextMateSharp.dir/.idea/vcs.xml .idea/.idea.TextMateSharp/.idea/riderMarkupCache.xml +.idea/.idea.TextMateSharp/.idea/copilot.data.migration.agent.xml +.idea/.idea.TextMateSharp/.idea/copilot.data.migration.ask.xml +.idea/.idea.TextMateSharp/.idea/copilot.data.migration.ask2agent.xml +.idea/.idea.TextMateSharp/.idea/copilot.data.migration.edit.xml From bd57ea0735f77276f9625d0113f549dc9f167ebf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Pen=CC=83alba?= Date: Thu, 11 Dec 2025 14:17:05 +0100 Subject: [PATCH 06/19] Add unit tests for LineText --- .../Grammar/LineTextTests.cs | 153 ++++++++++++++++++ 1 file changed, 153 insertions(+) create mode 100644 src/TextMateSharp.Tests/Grammar/LineTextTests.cs diff --git a/src/TextMateSharp.Tests/Grammar/LineTextTests.cs b/src/TextMateSharp.Tests/Grammar/LineTextTests.cs new file mode 100644 index 0000000..23f2f0e --- /dev/null +++ b/src/TextMateSharp.Tests/Grammar/LineTextTests.cs @@ -0,0 +1,153 @@ +using System; + +using NUnit.Framework; + +using TextMateSharp.Grammars; + +namespace TextMateSharp.Tests.Grammar +{ + [TestFixture] + public class LineTextTests + { + [Test] + public void Constructor_WithString_ShouldStoreText() + { + LineText lineText = new LineText("hello world"); + + Assert.AreEqual(11, lineText.Length); + Assert.AreEqual("hello world", lineText.ToString()); + } + + [Test] + public void Constructor_WithNullString_ShouldBeEmpty() + { + LineText lineText = new LineText((string)null); + + Assert.IsTrue(lineText.IsEmpty); + Assert.AreEqual(0, lineText.Length); + } + + [Test] + public void Constructor_WithReadOnlyMemory_ShouldStoreText() + { + ReadOnlyMemory memory = "hello world".AsMemory(); + LineText lineText = new LineText(memory); + + Assert.AreEqual(11, lineText.Length); + Assert.AreEqual("hello world", lineText.ToString()); + } + + [Test] + public void Constructor_WithEmptyMemory_ShouldBeEmpty() + { + LineText lineText = new LineText(ReadOnlyMemory.Empty); + + Assert.IsTrue(lineText.IsEmpty); + Assert.AreEqual(0, lineText.Length); + } + + [Test] + public void ImplicitConversion_FromString_ShouldWork() + { + LineText lineText = "test string"; + + Assert.AreEqual("test string", lineText.ToString()); + Assert.AreEqual(11, lineText.Length); + } + + [Test] + public void ImplicitConversion_FromReadOnlyMemory_ShouldWork() + { + ReadOnlyMemory memory = "test memory".AsMemory(); + LineText lineText = memory; + + Assert.AreEqual("test memory", lineText.ToString()); + Assert.AreEqual(11, lineText.Length); + } + + [Test] + public void ImplicitConversion_ToReadOnlyMemory_ShouldWork() + { + LineText lineText = "test"; + ReadOnlyMemory memory = lineText; + + Assert.AreEqual(4, memory.Length); + Assert.AreEqual("test", memory.Span.ToString()); + } + + [Test] + public void Memory_Property_ShouldReturnUnderlyingMemory() + { + LineText lineText = "hello"; + + ReadOnlyMemory memory = lineText.Memory; + + Assert.AreEqual(5, memory.Length); + Assert.AreEqual('h', memory.Span[0]); + Assert.AreEqual('o', memory.Span[4]); + } + + [Test] + public void IsEmpty_WithEmptyString_ShouldReturnTrue() + { + LineText lineText = ""; + + Assert.IsTrue(lineText.IsEmpty); + } + + [Test] + public void IsEmpty_WithNonEmptyString_ShouldReturnFalse() + { + LineText lineText = "x"; + + Assert.IsFalse(lineText.IsEmpty); + } + + [Test] + public void Default_LineText_ShouldBeEmpty() + { + LineText lineText = default; + + Assert.IsTrue(lineText.IsEmpty); + Assert.AreEqual(0, lineText.Length); + } + + [Test] + public void ToString_ShouldReturnStringRepresentation() + { + LineText lineText = "hello world"; + + Assert.AreEqual("hello world", lineText.ToString()); + } + + [Test] + public void SlicedMemory_ShouldWorkCorrectly() + { + char[] buffer = "hello world".ToCharArray(); + ReadOnlyMemory sliced = buffer.AsMemory().Slice(6, 5); + LineText lineText = sliced; + + Assert.AreEqual("world", lineText.ToString()); + Assert.AreEqual(5, lineText.Length); + } + + [Test] + public void UnicodeText_ShouldBeHandledCorrectly() + { + LineText lineText = "안녕하세요"; + + Assert.AreEqual(5, lineText.Length); + Assert.AreEqual("안녕하세요", lineText.ToString()); + } + + [Test] + public void CharArrayMemory_ShouldWorkWithLineText() + { + char[] buffer = new char[] { 'a', 'b', 'c', 'd', 'e' }; + LineText lineText = (ReadOnlyMemory)buffer.AsMemory(); + + Assert.AreEqual(5, lineText.Length); + Assert.AreEqual("abcde", lineText.ToString()); + } + } +} From 7109ac7711606898e9bd18a321a804edd1b40ad6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Pen=CC=83alba?= Date: Thu, 11 Dec 2025 14:26:26 +0100 Subject: [PATCH 07/19] Fixing failing test TMModel_Should_Parse_Until_Last_Document_Line --- src/TextMateSharp/Model/TMModel.cs | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/TextMateSharp/Model/TMModel.cs b/src/TextMateSharp/Model/TMModel.cs index b76199b..977cb35 100644 --- a/src/TextMateSharp/Model/TMModel.cs +++ b/src/TextMateSharp/Model/TMModel.cs @@ -195,8 +195,6 @@ public int UpdateTokensInRange(ModelTokensChangedEventBuilder eventBuilder, int try { text = model._lines.GetLineText(lineIndex); - if (text.IsEmpty) - continue; // Tokenize only the first X characters r = model._tokenizer.Tokenize(text, modeLine.State, 0, MAX_LEN_TO_TOKENIZE, stopLineTokenizationAfter); } From 81a865c8958b27a9a476e387d0c22ff767296284 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Pen=CC=83alba?= Date: Thu, 11 Dec 2025 14:58:28 +0100 Subject: [PATCH 08/19] Add benchmark tests --- TextMateSharp.sln | 61 ++++++++++++++++ .../BigFileTokenizationBenchmark.cs | 73 +++++++++++++++++++ src/TextMateSharp.Benchmarks/Program.cs | 12 +++ .../TextMateSharp.Benchmarks.csproj | 19 +++++ 4 files changed, 165 insertions(+) create mode 100644 src/TextMateSharp.Benchmarks/BigFileTokenizationBenchmark.cs create mode 100644 src/TextMateSharp.Benchmarks/Program.cs create mode 100644 src/TextMateSharp.Benchmarks/TextMateSharp.Benchmarks.csproj diff --git a/TextMateSharp.sln b/TextMateSharp.sln index d23b39d..ce32781 100644 --- a/TextMateSharp.sln +++ b/TextMateSharp.sln @@ -31,32 +31,92 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Workflows", "Workflows", "{ EndProject Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "TextMateSharp.Grammars.Tests", "src\TextMateSharp.Grammars.Tests\TextMateSharp.Grammars.Tests.csproj", "{B9194474-83A7-47E6-B5E6-6CE360B1189B}" EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "src", "src", "{827E0CD3-B72D-47B6-A68D-7590B98EB39B}" +EndProject +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "TextMateSharp.Benchmarks", "src\TextMateSharp.Benchmarks\TextMateSharp.Benchmarks.csproj", "{C1F336BA-0CAD-4A76-8C83-E0CA2DB9DA54}" +EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|Any CPU = Debug|Any CPU + Debug|x64 = Debug|x64 + Debug|x86 = Debug|x86 Release|Any CPU = Release|Any CPU + Release|x64 = Release|x64 + Release|x86 = Release|x86 EndGlobalSection GlobalSection(ProjectConfigurationPlatforms) = postSolution {664F185F-961B-496E-9159-3CC8F05DBBE5}.Debug|Any CPU.ActiveCfg = Debug|Any CPU {664F185F-961B-496E-9159-3CC8F05DBBE5}.Debug|Any CPU.Build.0 = Debug|Any CPU + {664F185F-961B-496E-9159-3CC8F05DBBE5}.Debug|x64.ActiveCfg = Debug|Any CPU + {664F185F-961B-496E-9159-3CC8F05DBBE5}.Debug|x64.Build.0 = Debug|Any CPU + {664F185F-961B-496E-9159-3CC8F05DBBE5}.Debug|x86.ActiveCfg = Debug|Any CPU + {664F185F-961B-496E-9159-3CC8F05DBBE5}.Debug|x86.Build.0 = Debug|Any CPU {664F185F-961B-496E-9159-3CC8F05DBBE5}.Release|Any CPU.ActiveCfg = Release|Any CPU {664F185F-961B-496E-9159-3CC8F05DBBE5}.Release|Any CPU.Build.0 = Release|Any CPU + {664F185F-961B-496E-9159-3CC8F05DBBE5}.Release|x64.ActiveCfg = Release|Any CPU + {664F185F-961B-496E-9159-3CC8F05DBBE5}.Release|x64.Build.0 = Release|Any CPU + {664F185F-961B-496E-9159-3CC8F05DBBE5}.Release|x86.ActiveCfg = Release|Any CPU + {664F185F-961B-496E-9159-3CC8F05DBBE5}.Release|x86.Build.0 = Release|Any CPU {DB75EFF5-4248-4679-9C59-9533998936B3}.Debug|Any CPU.ActiveCfg = Debug|Any CPU {DB75EFF5-4248-4679-9C59-9533998936B3}.Debug|Any CPU.Build.0 = Debug|Any CPU + {DB75EFF5-4248-4679-9C59-9533998936B3}.Debug|x64.ActiveCfg = Debug|Any CPU + {DB75EFF5-4248-4679-9C59-9533998936B3}.Debug|x64.Build.0 = Debug|Any CPU + {DB75EFF5-4248-4679-9C59-9533998936B3}.Debug|x86.ActiveCfg = Debug|Any CPU + {DB75EFF5-4248-4679-9C59-9533998936B3}.Debug|x86.Build.0 = Debug|Any CPU {DB75EFF5-4248-4679-9C59-9533998936B3}.Release|Any CPU.ActiveCfg = Release|Any CPU {DB75EFF5-4248-4679-9C59-9533998936B3}.Release|Any CPU.Build.0 = Release|Any CPU + {DB75EFF5-4248-4679-9C59-9533998936B3}.Release|x64.ActiveCfg = Release|Any CPU + {DB75EFF5-4248-4679-9C59-9533998936B3}.Release|x64.Build.0 = Release|Any CPU + {DB75EFF5-4248-4679-9C59-9533998936B3}.Release|x86.ActiveCfg = Release|Any CPU + {DB75EFF5-4248-4679-9C59-9533998936B3}.Release|x86.Build.0 = Release|Any CPU {B49D3C2E-6C4E-45B3-A645-592994B7B94D}.Debug|Any CPU.ActiveCfg = Debug|Any CPU {B49D3C2E-6C4E-45B3-A645-592994B7B94D}.Debug|Any CPU.Build.0 = Debug|Any CPU + {B49D3C2E-6C4E-45B3-A645-592994B7B94D}.Debug|x64.ActiveCfg = Debug|Any CPU + {B49D3C2E-6C4E-45B3-A645-592994B7B94D}.Debug|x64.Build.0 = Debug|Any CPU + {B49D3C2E-6C4E-45B3-A645-592994B7B94D}.Debug|x86.ActiveCfg = Debug|Any CPU + {B49D3C2E-6C4E-45B3-A645-592994B7B94D}.Debug|x86.Build.0 = Debug|Any CPU {B49D3C2E-6C4E-45B3-A645-592994B7B94D}.Release|Any CPU.ActiveCfg = Release|Any CPU {B49D3C2E-6C4E-45B3-A645-592994B7B94D}.Release|Any CPU.Build.0 = Release|Any CPU + {B49D3C2E-6C4E-45B3-A645-592994B7B94D}.Release|x64.ActiveCfg = Release|Any CPU + {B49D3C2E-6C4E-45B3-A645-592994B7B94D}.Release|x64.Build.0 = Release|Any CPU + {B49D3C2E-6C4E-45B3-A645-592994B7B94D}.Release|x86.ActiveCfg = Release|Any CPU + {B49D3C2E-6C4E-45B3-A645-592994B7B94D}.Release|x86.Build.0 = Release|Any CPU {DDB3D93D-BFAA-4CE6-B98D-74497DDE0D62}.Debug|Any CPU.ActiveCfg = Debug|Any CPU {DDB3D93D-BFAA-4CE6-B98D-74497DDE0D62}.Debug|Any CPU.Build.0 = Debug|Any CPU + {DDB3D93D-BFAA-4CE6-B98D-74497DDE0D62}.Debug|x64.ActiveCfg = Debug|Any CPU + {DDB3D93D-BFAA-4CE6-B98D-74497DDE0D62}.Debug|x64.Build.0 = Debug|Any CPU + {DDB3D93D-BFAA-4CE6-B98D-74497DDE0D62}.Debug|x86.ActiveCfg = Debug|Any CPU + {DDB3D93D-BFAA-4CE6-B98D-74497DDE0D62}.Debug|x86.Build.0 = Debug|Any CPU {DDB3D93D-BFAA-4CE6-B98D-74497DDE0D62}.Release|Any CPU.ActiveCfg = Release|Any CPU {DDB3D93D-BFAA-4CE6-B98D-74497DDE0D62}.Release|Any CPU.Build.0 = Release|Any CPU + {DDB3D93D-BFAA-4CE6-B98D-74497DDE0D62}.Release|x64.ActiveCfg = Release|Any CPU + {DDB3D93D-BFAA-4CE6-B98D-74497DDE0D62}.Release|x64.Build.0 = Release|Any CPU + {DDB3D93D-BFAA-4CE6-B98D-74497DDE0D62}.Release|x86.ActiveCfg = Release|Any CPU + {DDB3D93D-BFAA-4CE6-B98D-74497DDE0D62}.Release|x86.Build.0 = Release|Any CPU {B9194474-83A7-47E6-B5E6-6CE360B1189B}.Debug|Any CPU.ActiveCfg = Debug|Any CPU {B9194474-83A7-47E6-B5E6-6CE360B1189B}.Debug|Any CPU.Build.0 = Debug|Any CPU + {B9194474-83A7-47E6-B5E6-6CE360B1189B}.Debug|x64.ActiveCfg = Debug|Any CPU + {B9194474-83A7-47E6-B5E6-6CE360B1189B}.Debug|x64.Build.0 = Debug|Any CPU + {B9194474-83A7-47E6-B5E6-6CE360B1189B}.Debug|x86.ActiveCfg = Debug|Any CPU + {B9194474-83A7-47E6-B5E6-6CE360B1189B}.Debug|x86.Build.0 = Debug|Any CPU {B9194474-83A7-47E6-B5E6-6CE360B1189B}.Release|Any CPU.ActiveCfg = Release|Any CPU {B9194474-83A7-47E6-B5E6-6CE360B1189B}.Release|Any CPU.Build.0 = Release|Any CPU + {B9194474-83A7-47E6-B5E6-6CE360B1189B}.Release|x64.ActiveCfg = Release|Any CPU + {B9194474-83A7-47E6-B5E6-6CE360B1189B}.Release|x64.Build.0 = Release|Any CPU + {B9194474-83A7-47E6-B5E6-6CE360B1189B}.Release|x86.ActiveCfg = Release|Any CPU + {B9194474-83A7-47E6-B5E6-6CE360B1189B}.Release|x86.Build.0 = Release|Any CPU + {C1F336BA-0CAD-4A76-8C83-E0CA2DB9DA54}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {C1F336BA-0CAD-4A76-8C83-E0CA2DB9DA54}.Debug|Any CPU.Build.0 = Debug|Any CPU + {C1F336BA-0CAD-4A76-8C83-E0CA2DB9DA54}.Debug|x64.ActiveCfg = Debug|Any CPU + {C1F336BA-0CAD-4A76-8C83-E0CA2DB9DA54}.Debug|x64.Build.0 = Debug|Any CPU + {C1F336BA-0CAD-4A76-8C83-E0CA2DB9DA54}.Debug|x86.ActiveCfg = Debug|Any CPU + {C1F336BA-0CAD-4A76-8C83-E0CA2DB9DA54}.Debug|x86.Build.0 = Debug|Any CPU + {C1F336BA-0CAD-4A76-8C83-E0CA2DB9DA54}.Release|Any CPU.ActiveCfg = Release|Any CPU + {C1F336BA-0CAD-4A76-8C83-E0CA2DB9DA54}.Release|Any CPU.Build.0 = Release|Any CPU + {C1F336BA-0CAD-4A76-8C83-E0CA2DB9DA54}.Release|x64.ActiveCfg = Release|Any CPU + {C1F336BA-0CAD-4A76-8C83-E0CA2DB9DA54}.Release|x64.Build.0 = Release|Any CPU + {C1F336BA-0CAD-4A76-8C83-E0CA2DB9DA54}.Release|x86.ActiveCfg = Release|Any CPU + {C1F336BA-0CAD-4A76-8C83-E0CA2DB9DA54}.Release|x86.Build.0 = Release|Any CPU EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE @@ -64,6 +124,7 @@ Global GlobalSection(NestedProjects) = preSolution {C4A8E28E-70B0-4184-B62B-7286CB2F5756} = {FB55729C-1952-4D20-BFE7-C3202B160A0B} {46BA508A-D22E-4F76-AD27-68AC62725952} = {FB55729C-1952-4D20-BFE7-C3202B160A0B} + {C1F336BA-0CAD-4A76-8C83-E0CA2DB9DA54} = {827E0CD3-B72D-47B6-A68D-7590B98EB39B} EndGlobalSection GlobalSection(ExtensibilityGlobals) = postSolution SolutionGuid = {D82FE2B4-7A75-444B-AB90-DC50F82D89A8} diff --git a/src/TextMateSharp.Benchmarks/BigFileTokenizationBenchmark.cs b/src/TextMateSharp.Benchmarks/BigFileTokenizationBenchmark.cs new file mode 100644 index 0000000..c77c547 --- /dev/null +++ b/src/TextMateSharp.Benchmarks/BigFileTokenizationBenchmark.cs @@ -0,0 +1,73 @@ +using System; +using System.IO; + +using BenchmarkDotNet.Attributes; + +using TextMateSharp.Grammars; + +namespace TextMateSharp.Benchmarks +{ + [MemoryDiagnoser] + public class BigFileTokenizationBenchmark + { + private IGrammar _grammar = null!; + private string[] _lines = null!; + + [GlobalSetup] + public void Setup() + { + // Walk up directories to find the solution root + string? dir = AppDomain.CurrentDomain.BaseDirectory; + string bigFilePath = ""; + + while (dir != null) + { + string candidate = Path.Combine(dir, "src", "TextMateSharp.Demo", + "testdata", "samplefiles", "bigfile.cs"); + if (File.Exists(candidate)) + { + bigFilePath = candidate; + break; + } + dir = Path.GetDirectoryName(dir); + } + + if (string.IsNullOrEmpty(bigFilePath) || !File.Exists(bigFilePath)) + { + throw new FileNotFoundException( + "Could not find bigfile.cs. Make sure you're running from the TextMateSharp solution directory."); + } + + + // Load the file into memory + _lines = File.ReadAllLines(bigFilePath); + Console.WriteLine($"Loaded {_lines.Length} lines from bigfile.cs"); + + // Load the C# grammar + RegistryOptions options = new RegistryOptions(ThemeName.DarkPlus); + Registry.Registry registry = new Registry.Registry(options); + _grammar = registry.LoadGrammar("source.cs"); + + if (_grammar == null) + { + throw new InvalidOperationException("Failed to load C# grammar"); + } + } + + [Benchmark] + public int TokenizeAllLines() + { + int totalTokens = 0; + IStateStack? ruleStack = null; + + for (int i = 0; i < _lines.Length; i++) + { + ITokenizeLineResult result = _grammar.TokenizeLine(_lines[i], ruleStack, TimeSpan.MaxValue); + ruleStack = result.RuleStack; + totalTokens += result.Tokens.Length; + } + + return totalTokens; + } + } +} diff --git a/src/TextMateSharp.Benchmarks/Program.cs b/src/TextMateSharp.Benchmarks/Program.cs new file mode 100644 index 0000000..38c7edf --- /dev/null +++ b/src/TextMateSharp.Benchmarks/Program.cs @@ -0,0 +1,12 @@ +using BenchmarkDotNet.Running; + +namespace TextMateSharp.Benchmarks +{ + public class Program + { + public static void Main(string[] args) + { + BenchmarkRunner.Run(); + } + } +} diff --git a/src/TextMateSharp.Benchmarks/TextMateSharp.Benchmarks.csproj b/src/TextMateSharp.Benchmarks/TextMateSharp.Benchmarks.csproj new file mode 100644 index 0000000..403cf07 --- /dev/null +++ b/src/TextMateSharp.Benchmarks/TextMateSharp.Benchmarks.csproj @@ -0,0 +1,19 @@ + + + + Exe + net8.0 + False + enable + + + + + + + + + + + + From 0c1c0aae0a9be62110b7fa50bf78b9404c0f0671 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Pen=CC=83alba?= Date: Thu, 11 Dec 2025 18:11:30 +0100 Subject: [PATCH 09/19] perf: Reduce allocations and improve tokenization performance by 39% MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Optimizations applied: - Use ArrayPool instead of allocating new char[] per line in Grammar.Tokenize() - Replace new Stopwatch() with Stopwatch.GetTimestamp() in LineTokenizer.Scan() - Pool List and List in LineTokenizer - Cache GetScopeNames() result in AttributedScopeStack - Avoid List allocation for single-scope PushAtributed() Benchmark results (133K line file): - Execution time: 4.75s → 2.89s (39% faster) - Memory allocated: 658 MB → 488 MB (26% less) - Gen0 collections: 82K → 61K (26% fewer) - Gen1 collections: 8K → 4K (50% fewer) --- .../Internal/Grammars/AttributedScopeStack.cs | 23 ++++++-- .../Internal/Grammars/Grammar.cs | 57 ++++++++++++------- .../Internal/Grammars/LineTokenizer.cs | 17 ++++-- 3 files changed, 65 insertions(+), 32 deletions(-) diff --git a/src/TextMateSharp/Internal/Grammars/AttributedScopeStack.cs b/src/TextMateSharp/Internal/Grammars/AttributedScopeStack.cs index 3d869ab..4546068 100644 --- a/src/TextMateSharp/Internal/Grammars/AttributedScopeStack.cs +++ b/src/TextMateSharp/Internal/Grammars/AttributedScopeStack.cs @@ -10,6 +10,7 @@ public class AttributedScopeStack public AttributedScopeStack Parent { get; private set; } public string ScopePath { get; private set; } public int TokenAttributes { get; private set; } + private List _cachedScopeNames; public AttributedScopeStack(AttributedScopeStack parent, string scopePath, int tokenAttributes) { @@ -157,13 +158,18 @@ private static AttributedScopeStack Push(AttributedScopeStack target, Grammar gr { foreach (string scope in scopes) { - BasicScopeAttributes rawMetadata = grammar.GetMetadataForScope(scope); - int metadata = AttributedScopeStack.MergeAttributes(target.TokenAttributes, target, rawMetadata); - target = new AttributedScopeStack(target, scope, metadata); + target = PushSingleScope(target, grammar, scope); } return target; } + private static AttributedScopeStack PushSingleScope(AttributedScopeStack target, Grammar grammar, string scope) + { + BasicScopeAttributes rawMetadata = grammar.GetMetadataForScope(scope); + int metadata = AttributedScopeStack.MergeAttributes(target.TokenAttributes, target, rawMetadata); + return new AttributedScopeStack(target, scope, metadata); + } + public AttributedScopeStack PushAtributed(string scopePath, Grammar grammar) { if (scopePath == null) @@ -175,13 +181,18 @@ public AttributedScopeStack PushAtributed(string scopePath, Grammar grammar) // there are multiple scopes to push return Push(this, grammar, new List(scopePath.Split(new[] {" "}, StringSplitOptions.None))); } - // there is a single scope to push - return Push(this, grammar, new List() { scopePath }); + // there is a single scope to push - avoid List allocation + return PushSingleScope(this, grammar, scopePath); } + public List GetScopeNames() { - return AttributedScopeStack.GenerateScopes(this); + if (_cachedScopeNames == null) + { + _cachedScopeNames = GenerateScopes(this); + } + return _cachedScopeNames; } private static List GenerateScopes(AttributedScopeStack scopesList) diff --git a/src/TextMateSharp/Internal/Grammars/Grammar.cs b/src/TextMateSharp/Internal/Grammars/Grammar.cs index 4651538..11a0ee0 100644 --- a/src/TextMateSharp/Internal/Grammars/Grammar.cs +++ b/src/TextMateSharp/Internal/Grammars/Grammar.cs @@ -1,4 +1,5 @@ using System; +using System.Buffers; using System.Collections.Generic; using TextMateSharp.Grammars; @@ -254,35 +255,49 @@ private object Tokenize(ReadOnlyMemory lineText, StateStack prevState, boo } // Check if we need to append newline + char[] rentedBuffer = null; ReadOnlyMemory effectiveLineText; - if (lineText.Length == 0 || lineText.Span[lineText.Length - 1] != '\n') - { - // Only add \n if the passed lineText didn't have it. - // We need to allocate a new buffer with the newline - char[] buffer = new char[lineText.Length + 1]; - lineText.Span.CopyTo(buffer); - buffer[lineText.Length] = '\n'; - effectiveLineText = buffer.AsMemory(); - } - else + + try { - effectiveLineText = lineText; - } + if (lineText.Length == 0 || lineText.Span[lineText.Length - 1] != '\n') + { + // Only add \n if the passed lineText didn't have it. + // Use ArrayPool to avoid per-line allocation + int requiredLength = lineText.Length + 1; + rentedBuffer = ArrayPool.Shared.Rent(requiredLength); + lineText.Span.CopyTo(rentedBuffer); + rentedBuffer[lineText.Length] = '\n'; + effectiveLineText = rentedBuffer.AsMemory(0, requiredLength); + } + else + { + effectiveLineText = lineText; + } - int lineLength = effectiveLineText.Length; - LineTokens lineTokens = new LineTokens(emitBinaryTokens, effectiveLineText, _tokenTypeMatchers, _balancedBracketSelectors); - TokenizeStringResult tokenizeResult = LineTokenizer.TokenizeString(this, effectiveLineText, isFirstLine, 0, prevState, - lineTokens, true, timeLimit); + int lineLength = effectiveLineText.Length; + LineTokens lineTokens = new LineTokens(emitBinaryTokens, effectiveLineText, _tokenTypeMatchers, _balancedBracketSelectors); + TokenizeStringResult tokenizeResult = LineTokenizer.TokenizeString(this, effectiveLineText, isFirstLine, 0, prevState, + lineTokens, true, timeLimit); - if (emitBinaryTokens) - { - return new TokenizeLineResult2(lineTokens.GetBinaryResult(tokenizeResult.Stack, lineLength), + if (emitBinaryTokens) + { + return new TokenizeLineResult2(lineTokens.GetBinaryResult(tokenizeResult.Stack, lineLength), + tokenizeResult.Stack, tokenizeResult.StoppedEarly); + } + return new TokenizeLineResult(lineTokens.GetResult(tokenizeResult.Stack, lineLength), tokenizeResult.Stack, tokenizeResult.StoppedEarly); } - return new TokenizeLineResult(lineTokens.GetResult(tokenizeResult.Stack, lineLength), - tokenizeResult.Stack, tokenizeResult.StoppedEarly); + finally + { + if (rentedBuffer != null) + { + ArrayPool.Shared.Return(rentedBuffer); + } + } } + private void GenerateRootId() { _isCompiling = true; diff --git a/src/TextMateSharp/Internal/Grammars/LineTokenizer.cs b/src/TextMateSharp/Internal/Grammars/LineTokenizer.cs index 33ed2bf..f79735a 100644 --- a/src/TextMateSharp/Internal/Grammars/LineTokenizer.cs +++ b/src/TextMateSharp/Internal/Grammars/LineTokenizer.cs @@ -21,6 +21,8 @@ class LineTokenizer private int _anchorPosition = -1; private bool _stop; private int _lineLength; + private readonly List _localStackBuffer = new List(); + private readonly List _whileRulesBuffer = new List(); public LineTokenizer(Grammar grammar, ReadOnlyMemory lineText, bool isFirstLine, int linePos, StateStack stack, LineTokens lineTokens) @@ -48,11 +50,13 @@ public TokenizeStringResult Scan(bool checkWhileConditions, TimeSpan timeLimit) _anchorPosition = whileCheckResult.AnchorPosition; } - var stopWatch = new Stopwatch(); - stopWatch.Start(); + // Use Stopwatch.GetTimestamp() instead of new Stopwatch() to avoid allocation + long startTimestamp = Stopwatch.GetTimestamp(); + long timeoutTicks = (long)(timeLimit.TotalSeconds * Stopwatch.Frequency); + while (!_stop) { - if (stopWatch.Elapsed > timeLimit) + if (Stopwatch.GetTimestamp() - startTimestamp > timeoutTicks) { return new TokenizeStringResult(_stack, true); } @@ -62,6 +66,7 @@ public TokenizeStringResult Scan(bool checkWhileConditions, TimeSpan timeLimit) return new TokenizeStringResult(_stack, false); } + private void ScanNext() { MatchResult r = MatchRuleOrInjections(_grammar, _lineText, _isFirstLine, _linePos, _stack, _anchorPosition); @@ -392,7 +397,8 @@ private void HandleCaptures(Grammar grammar, ReadOnlyMemory lineText, bool } int len = Math.Min(captures.Count, captureIndices.Length); - List localStack = new List(); + _localStackBuffer.Clear(); + var localStack = _localStackBuffer; int maxEnd = captureIndices[0].End; IOnigCaptureIndex captureIndex; @@ -492,7 +498,8 @@ private WhileCheckResult CheckWhileConditions(Grammar grammar, ReadOnlyMemory whileRules = new List(); + _whileRulesBuffer.Clear(); + var whileRules = _whileRulesBuffer; for (StateStack node = stack; node != null; node = node.Pop()) { Rule nodeRule = node.GetRule(grammar); From 8e20d42f816e89a808b9faebf04fd07095d3a475 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Pen=CC=83alba?= Date: Thu, 11 Dec 2025 19:04:51 +0100 Subject: [PATCH 10/19] fix: Revert ArrayPool optimization that caused cross-platform test failures The ArrayPool optimization in Grammar.Tokenize() caused test failures on x64 Linux/Windows CI while passing on ARM64 macOS locally. Root cause: The rented buffer was returned to the pool in the finally block while LineTokens still held a ReadOnlyMemory reference to it. On x64 platforms with aggressive buffer reuse, subsequent tokenize calls would reuse and overwrite the buffer, corrupting previous results. The other performance optimizations from commit 0c1c0aa remain intact: - Stopwatch.GetTimestamp() instead of new Stopwatch() - Pooled List and List in LineTokenizer - Cached GetScopeNames() in AttributedScopeStack - Single-scope PushAtributed() optimization --- .../Internal/Grammars/Grammar.cs | 58 ++++++++----------- 1 file changed, 23 insertions(+), 35 deletions(-) diff --git a/src/TextMateSharp/Internal/Grammars/Grammar.cs b/src/TextMateSharp/Internal/Grammars/Grammar.cs index 11a0ee0..ff78267 100644 --- a/src/TextMateSharp/Internal/Grammars/Grammar.cs +++ b/src/TextMateSharp/Internal/Grammars/Grammar.cs @@ -1,5 +1,4 @@ using System; -using System.Buffers; using System.Collections.Generic; using TextMateSharp.Grammars; @@ -255,46 +254,35 @@ private object Tokenize(ReadOnlyMemory lineText, StateStack prevState, boo } // Check if we need to append newline - char[] rentedBuffer = null; ReadOnlyMemory effectiveLineText; - - try + if (lineText.Length == 0 || lineText.Span[lineText.Length - 1] != '\n') { - if (lineText.Length == 0 || lineText.Span[lineText.Length - 1] != '\n') - { - // Only add \n if the passed lineText didn't have it. - // Use ArrayPool to avoid per-line allocation - int requiredLength = lineText.Length + 1; - rentedBuffer = ArrayPool.Shared.Rent(requiredLength); - lineText.Span.CopyTo(rentedBuffer); - rentedBuffer[lineText.Length] = '\n'; - effectiveLineText = rentedBuffer.AsMemory(0, requiredLength); - } - else - { - effectiveLineText = lineText; - } + // Only add \n if the passed lineText didn't have it. + // Note: We cannot use ArrayPool here because the LineTokens/tokens may hold + // references to this memory after this method returns. Using ArrayPool would + // cause memory corruption when the buffer is returned and reused. + char[] buffer = new char[lineText.Length + 1]; + lineText.Span.CopyTo(buffer); + buffer[lineText.Length] = '\n'; + effectiveLineText = buffer.AsMemory(); + } + else + { + effectiveLineText = lineText; + } - int lineLength = effectiveLineText.Length; - LineTokens lineTokens = new LineTokens(emitBinaryTokens, effectiveLineText, _tokenTypeMatchers, _balancedBracketSelectors); - TokenizeStringResult tokenizeResult = LineTokenizer.TokenizeString(this, effectiveLineText, isFirstLine, 0, prevState, - lineTokens, true, timeLimit); + int lineLength = effectiveLineText.Length; + LineTokens lineTokens = new LineTokens(emitBinaryTokens, effectiveLineText, _tokenTypeMatchers, _balancedBracketSelectors); + TokenizeStringResult tokenizeResult = LineTokenizer.TokenizeString(this, effectiveLineText, isFirstLine, 0, prevState, + lineTokens, true, timeLimit); - if (emitBinaryTokens) - { - return new TokenizeLineResult2(lineTokens.GetBinaryResult(tokenizeResult.Stack, lineLength), - tokenizeResult.Stack, tokenizeResult.StoppedEarly); - } - return new TokenizeLineResult(lineTokens.GetResult(tokenizeResult.Stack, lineLength), - tokenizeResult.Stack, tokenizeResult.StoppedEarly); - } - finally + if (emitBinaryTokens) { - if (rentedBuffer != null) - { - ArrayPool.Shared.Return(rentedBuffer); - } + return new TokenizeLineResult2(lineTokens.GetBinaryResult(tokenizeResult.Stack, lineLength), + tokenizeResult.Stack, tokenizeResult.StoppedEarly); } + return new TokenizeLineResult(lineTokens.GetResult(tokenizeResult.Stack, lineLength), + tokenizeResult.Stack, tokenizeResult.StoppedEarly); } From 059062ce36edb2224fa93b2000caf4d3a5134af6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Pen=CC=83alba?= Date: Thu, 11 Dec 2025 20:19:42 +0100 Subject: [PATCH 11/19] Revert "fix: Revert ArrayPool optimization that caused cross-platform test failures" This reverts commit 8e20d42f816e89a808b9faebf04fd07095d3a475. --- .../Internal/Grammars/Grammar.cs | 58 +++++++++++-------- 1 file changed, 35 insertions(+), 23 deletions(-) diff --git a/src/TextMateSharp/Internal/Grammars/Grammar.cs b/src/TextMateSharp/Internal/Grammars/Grammar.cs index ff78267..11a0ee0 100644 --- a/src/TextMateSharp/Internal/Grammars/Grammar.cs +++ b/src/TextMateSharp/Internal/Grammars/Grammar.cs @@ -1,4 +1,5 @@ using System; +using System.Buffers; using System.Collections.Generic; using TextMateSharp.Grammars; @@ -254,35 +255,46 @@ private object Tokenize(ReadOnlyMemory lineText, StateStack prevState, boo } // Check if we need to append newline + char[] rentedBuffer = null; ReadOnlyMemory effectiveLineText; - if (lineText.Length == 0 || lineText.Span[lineText.Length - 1] != '\n') - { - // Only add \n if the passed lineText didn't have it. - // Note: We cannot use ArrayPool here because the LineTokens/tokens may hold - // references to this memory after this method returns. Using ArrayPool would - // cause memory corruption when the buffer is returned and reused. - char[] buffer = new char[lineText.Length + 1]; - lineText.Span.CopyTo(buffer); - buffer[lineText.Length] = '\n'; - effectiveLineText = buffer.AsMemory(); - } - else + + try { - effectiveLineText = lineText; - } + if (lineText.Length == 0 || lineText.Span[lineText.Length - 1] != '\n') + { + // Only add \n if the passed lineText didn't have it. + // Use ArrayPool to avoid per-line allocation + int requiredLength = lineText.Length + 1; + rentedBuffer = ArrayPool.Shared.Rent(requiredLength); + lineText.Span.CopyTo(rentedBuffer); + rentedBuffer[lineText.Length] = '\n'; + effectiveLineText = rentedBuffer.AsMemory(0, requiredLength); + } + else + { + effectiveLineText = lineText; + } - int lineLength = effectiveLineText.Length; - LineTokens lineTokens = new LineTokens(emitBinaryTokens, effectiveLineText, _tokenTypeMatchers, _balancedBracketSelectors); - TokenizeStringResult tokenizeResult = LineTokenizer.TokenizeString(this, effectiveLineText, isFirstLine, 0, prevState, - lineTokens, true, timeLimit); + int lineLength = effectiveLineText.Length; + LineTokens lineTokens = new LineTokens(emitBinaryTokens, effectiveLineText, _tokenTypeMatchers, _balancedBracketSelectors); + TokenizeStringResult tokenizeResult = LineTokenizer.TokenizeString(this, effectiveLineText, isFirstLine, 0, prevState, + lineTokens, true, timeLimit); - if (emitBinaryTokens) - { - return new TokenizeLineResult2(lineTokens.GetBinaryResult(tokenizeResult.Stack, lineLength), + if (emitBinaryTokens) + { + return new TokenizeLineResult2(lineTokens.GetBinaryResult(tokenizeResult.Stack, lineLength), + tokenizeResult.Stack, tokenizeResult.StoppedEarly); + } + return new TokenizeLineResult(lineTokens.GetResult(tokenizeResult.Stack, lineLength), tokenizeResult.Stack, tokenizeResult.StoppedEarly); } - return new TokenizeLineResult(lineTokens.GetResult(tokenizeResult.Stack, lineLength), - tokenizeResult.Stack, tokenizeResult.StoppedEarly); + finally + { + if (rentedBuffer != null) + { + ArrayPool.Shared.Return(rentedBuffer); + } + } } From 32624a956a76cc45247f2ad2733ff14e597f9445 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Pen=CC=83alba?= Date: Thu, 11 Dec 2025 20:19:51 +0100 Subject: [PATCH 12/19] Revert "perf: Reduce allocations and improve tokenization performance by 39%" This reverts commit 0c1c0aae0a9be62110b7fa50bf78b9404c0f0671. --- .../Internal/Grammars/AttributedScopeStack.cs | 23 ++------ .../Internal/Grammars/Grammar.cs | 57 +++++++------------ .../Internal/Grammars/LineTokenizer.cs | 17 ++---- 3 files changed, 32 insertions(+), 65 deletions(-) diff --git a/src/TextMateSharp/Internal/Grammars/AttributedScopeStack.cs b/src/TextMateSharp/Internal/Grammars/AttributedScopeStack.cs index 4546068..3d869ab 100644 --- a/src/TextMateSharp/Internal/Grammars/AttributedScopeStack.cs +++ b/src/TextMateSharp/Internal/Grammars/AttributedScopeStack.cs @@ -10,7 +10,6 @@ public class AttributedScopeStack public AttributedScopeStack Parent { get; private set; } public string ScopePath { get; private set; } public int TokenAttributes { get; private set; } - private List _cachedScopeNames; public AttributedScopeStack(AttributedScopeStack parent, string scopePath, int tokenAttributes) { @@ -158,18 +157,13 @@ private static AttributedScopeStack Push(AttributedScopeStack target, Grammar gr { foreach (string scope in scopes) { - target = PushSingleScope(target, grammar, scope); + BasicScopeAttributes rawMetadata = grammar.GetMetadataForScope(scope); + int metadata = AttributedScopeStack.MergeAttributes(target.TokenAttributes, target, rawMetadata); + target = new AttributedScopeStack(target, scope, metadata); } return target; } - private static AttributedScopeStack PushSingleScope(AttributedScopeStack target, Grammar grammar, string scope) - { - BasicScopeAttributes rawMetadata = grammar.GetMetadataForScope(scope); - int metadata = AttributedScopeStack.MergeAttributes(target.TokenAttributes, target, rawMetadata); - return new AttributedScopeStack(target, scope, metadata); - } - public AttributedScopeStack PushAtributed(string scopePath, Grammar grammar) { if (scopePath == null) @@ -181,18 +175,13 @@ public AttributedScopeStack PushAtributed(string scopePath, Grammar grammar) // there are multiple scopes to push return Push(this, grammar, new List(scopePath.Split(new[] {" "}, StringSplitOptions.None))); } - // there is a single scope to push - avoid List allocation - return PushSingleScope(this, grammar, scopePath); + // there is a single scope to push + return Push(this, grammar, new List() { scopePath }); } - public List GetScopeNames() { - if (_cachedScopeNames == null) - { - _cachedScopeNames = GenerateScopes(this); - } - return _cachedScopeNames; + return AttributedScopeStack.GenerateScopes(this); } private static List GenerateScopes(AttributedScopeStack scopesList) diff --git a/src/TextMateSharp/Internal/Grammars/Grammar.cs b/src/TextMateSharp/Internal/Grammars/Grammar.cs index 11a0ee0..4651538 100644 --- a/src/TextMateSharp/Internal/Grammars/Grammar.cs +++ b/src/TextMateSharp/Internal/Grammars/Grammar.cs @@ -1,5 +1,4 @@ using System; -using System.Buffers; using System.Collections.Generic; using TextMateSharp.Grammars; @@ -255,49 +254,35 @@ private object Tokenize(ReadOnlyMemory lineText, StateStack prevState, boo } // Check if we need to append newline - char[] rentedBuffer = null; ReadOnlyMemory effectiveLineText; - - try + if (lineText.Length == 0 || lineText.Span[lineText.Length - 1] != '\n') { - if (lineText.Length == 0 || lineText.Span[lineText.Length - 1] != '\n') - { - // Only add \n if the passed lineText didn't have it. - // Use ArrayPool to avoid per-line allocation - int requiredLength = lineText.Length + 1; - rentedBuffer = ArrayPool.Shared.Rent(requiredLength); - lineText.Span.CopyTo(rentedBuffer); - rentedBuffer[lineText.Length] = '\n'; - effectiveLineText = rentedBuffer.AsMemory(0, requiredLength); - } - else - { - effectiveLineText = lineText; - } + // Only add \n if the passed lineText didn't have it. + // We need to allocate a new buffer with the newline + char[] buffer = new char[lineText.Length + 1]; + lineText.Span.CopyTo(buffer); + buffer[lineText.Length] = '\n'; + effectiveLineText = buffer.AsMemory(); + } + else + { + effectiveLineText = lineText; + } - int lineLength = effectiveLineText.Length; - LineTokens lineTokens = new LineTokens(emitBinaryTokens, effectiveLineText, _tokenTypeMatchers, _balancedBracketSelectors); - TokenizeStringResult tokenizeResult = LineTokenizer.TokenizeString(this, effectiveLineText, isFirstLine, 0, prevState, - lineTokens, true, timeLimit); + int lineLength = effectiveLineText.Length; + LineTokens lineTokens = new LineTokens(emitBinaryTokens, effectiveLineText, _tokenTypeMatchers, _balancedBracketSelectors); + TokenizeStringResult tokenizeResult = LineTokenizer.TokenizeString(this, effectiveLineText, isFirstLine, 0, prevState, + lineTokens, true, timeLimit); - if (emitBinaryTokens) - { - return new TokenizeLineResult2(lineTokens.GetBinaryResult(tokenizeResult.Stack, lineLength), - tokenizeResult.Stack, tokenizeResult.StoppedEarly); - } - return new TokenizeLineResult(lineTokens.GetResult(tokenizeResult.Stack, lineLength), - tokenizeResult.Stack, tokenizeResult.StoppedEarly); - } - finally + if (emitBinaryTokens) { - if (rentedBuffer != null) - { - ArrayPool.Shared.Return(rentedBuffer); - } + return new TokenizeLineResult2(lineTokens.GetBinaryResult(tokenizeResult.Stack, lineLength), + tokenizeResult.Stack, tokenizeResult.StoppedEarly); } + return new TokenizeLineResult(lineTokens.GetResult(tokenizeResult.Stack, lineLength), + tokenizeResult.Stack, tokenizeResult.StoppedEarly); } - private void GenerateRootId() { _isCompiling = true; diff --git a/src/TextMateSharp/Internal/Grammars/LineTokenizer.cs b/src/TextMateSharp/Internal/Grammars/LineTokenizer.cs index f79735a..33ed2bf 100644 --- a/src/TextMateSharp/Internal/Grammars/LineTokenizer.cs +++ b/src/TextMateSharp/Internal/Grammars/LineTokenizer.cs @@ -21,8 +21,6 @@ class LineTokenizer private int _anchorPosition = -1; private bool _stop; private int _lineLength; - private readonly List _localStackBuffer = new List(); - private readonly List _whileRulesBuffer = new List(); public LineTokenizer(Grammar grammar, ReadOnlyMemory lineText, bool isFirstLine, int linePos, StateStack stack, LineTokens lineTokens) @@ -50,13 +48,11 @@ public TokenizeStringResult Scan(bool checkWhileConditions, TimeSpan timeLimit) _anchorPosition = whileCheckResult.AnchorPosition; } - // Use Stopwatch.GetTimestamp() instead of new Stopwatch() to avoid allocation - long startTimestamp = Stopwatch.GetTimestamp(); - long timeoutTicks = (long)(timeLimit.TotalSeconds * Stopwatch.Frequency); - + var stopWatch = new Stopwatch(); + stopWatch.Start(); while (!_stop) { - if (Stopwatch.GetTimestamp() - startTimestamp > timeoutTicks) + if (stopWatch.Elapsed > timeLimit) { return new TokenizeStringResult(_stack, true); } @@ -66,7 +62,6 @@ public TokenizeStringResult Scan(bool checkWhileConditions, TimeSpan timeLimit) return new TokenizeStringResult(_stack, false); } - private void ScanNext() { MatchResult r = MatchRuleOrInjections(_grammar, _lineText, _isFirstLine, _linePos, _stack, _anchorPosition); @@ -397,8 +392,7 @@ private void HandleCaptures(Grammar grammar, ReadOnlyMemory lineText, bool } int len = Math.Min(captures.Count, captureIndices.Length); - _localStackBuffer.Clear(); - var localStack = _localStackBuffer; + List localStack = new List(); int maxEnd = captureIndices[0].End; IOnigCaptureIndex captureIndex; @@ -498,8 +492,7 @@ private WhileCheckResult CheckWhileConditions(Grammar grammar, ReadOnlyMemory whileRules = new List(); for (StateStack node = stack; node != null; node = node.Pop()) { Rule nodeRule = node.GetRule(grammar); From 9f702267ce0199429b63a6250a7865dd1f5c5004 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Pen=CC=83alba?= Date: Thu, 11 Dec 2025 20:26:34 +0100 Subject: [PATCH 13/19] Cache GetScopeNames() result in AttributedScopeStack --- .../Internal/Grammars/AttributedScopeStack.cs | 22 ++++++++++++++----- 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/src/TextMateSharp/Internal/Grammars/AttributedScopeStack.cs b/src/TextMateSharp/Internal/Grammars/AttributedScopeStack.cs index 3d869ab..c79ffe8 100644 --- a/src/TextMateSharp/Internal/Grammars/AttributedScopeStack.cs +++ b/src/TextMateSharp/Internal/Grammars/AttributedScopeStack.cs @@ -10,6 +10,7 @@ public class AttributedScopeStack public AttributedScopeStack Parent { get; private set; } public string ScopePath { get; private set; } public int TokenAttributes { get; private set; } + private List _cachedScopeNames; public AttributedScopeStack(AttributedScopeStack parent, string scopePath, int tokenAttributes) { @@ -157,13 +158,18 @@ private static AttributedScopeStack Push(AttributedScopeStack target, Grammar gr { foreach (string scope in scopes) { - BasicScopeAttributes rawMetadata = grammar.GetMetadataForScope(scope); - int metadata = AttributedScopeStack.MergeAttributes(target.TokenAttributes, target, rawMetadata); - target = new AttributedScopeStack(target, scope, metadata); + target = PushSingleScope(target, grammar, scope); } return target; } + private static AttributedScopeStack PushSingleScope(AttributedScopeStack target, Grammar grammar, string scope) + { + BasicScopeAttributes rawMetadata = grammar.GetMetadataForScope(scope); + int metadata = AttributedScopeStack.MergeAttributes(target.TokenAttributes, target, rawMetadata); + return new AttributedScopeStack(target, scope, metadata); + } + public AttributedScopeStack PushAtributed(string scopePath, Grammar grammar) { if (scopePath == null) @@ -175,13 +181,17 @@ public AttributedScopeStack PushAtributed(string scopePath, Grammar grammar) // there are multiple scopes to push return Push(this, grammar, new List(scopePath.Split(new[] {" "}, StringSplitOptions.None))); } - // there is a single scope to push - return Push(this, grammar, new List() { scopePath }); + // there is a single scope to push - avoid List allocation + return PushSingleScope(this, grammar, scopePath); } public List GetScopeNames() { - return AttributedScopeStack.GenerateScopes(this); + if (_cachedScopeNames == null) + { + _cachedScopeNames = GenerateScopes(this); + } + return _cachedScopeNames; } private static List GenerateScopes(AttributedScopeStack scopesList) From 54a330cffb14eb08bf8211604145e90add582fc6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Pen=CC=83alba?= Date: Thu, 11 Dec 2025 20:30:22 +0100 Subject: [PATCH 14/19] Replace new Stopwatch() with Stopwatch.GetTimestamp() in LineTokenizer.Scan() --- src/TextMateSharp/Internal/Grammars/LineTokenizer.cs | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/TextMateSharp/Internal/Grammars/LineTokenizer.cs b/src/TextMateSharp/Internal/Grammars/LineTokenizer.cs index 33ed2bf..629e73d 100644 --- a/src/TextMateSharp/Internal/Grammars/LineTokenizer.cs +++ b/src/TextMateSharp/Internal/Grammars/LineTokenizer.cs @@ -48,11 +48,12 @@ public TokenizeStringResult Scan(bool checkWhileConditions, TimeSpan timeLimit) _anchorPosition = whileCheckResult.AnchorPosition; } - var stopWatch = new Stopwatch(); - stopWatch.Start(); + long startTimestamp = Stopwatch.GetTimestamp(); + long timeoutTicks = (long)(timeLimit.TotalSeconds * Stopwatch.Frequency); + while (!_stop) { - if (stopWatch.Elapsed > timeLimit) + if (Stopwatch.GetTimestamp() - startTimestamp > timeoutTicks) { return new TokenizeStringResult(_stack, true); } From dec2d8c30c62e159410ffdfb2a4158dc14e5e9d6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Pen=CC=83alba?= Date: Thu, 11 Dec 2025 20:34:00 +0100 Subject: [PATCH 15/19] Revert "Replace new Stopwatch() with Stopwatch.GetTimestamp() in LineTokenizer.Scan()" This reverts commit 54a330cffb14eb08bf8211604145e90add582fc6. --- src/TextMateSharp/Internal/Grammars/LineTokenizer.cs | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/TextMateSharp/Internal/Grammars/LineTokenizer.cs b/src/TextMateSharp/Internal/Grammars/LineTokenizer.cs index 629e73d..33ed2bf 100644 --- a/src/TextMateSharp/Internal/Grammars/LineTokenizer.cs +++ b/src/TextMateSharp/Internal/Grammars/LineTokenizer.cs @@ -48,12 +48,11 @@ public TokenizeStringResult Scan(bool checkWhileConditions, TimeSpan timeLimit) _anchorPosition = whileCheckResult.AnchorPosition; } - long startTimestamp = Stopwatch.GetTimestamp(); - long timeoutTicks = (long)(timeLimit.TotalSeconds * Stopwatch.Frequency); - + var stopWatch = new Stopwatch(); + stopWatch.Start(); while (!_stop) { - if (Stopwatch.GetTimestamp() - startTimestamp > timeoutTicks) + if (stopWatch.Elapsed > timeLimit) { return new TokenizeStringResult(_stack, true); } From 4708f1de43465ec50b0beed0683330e5c2f027b8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Pen=CC=83alba?= Date: Thu, 11 Dec 2025 23:21:00 +0100 Subject: [PATCH 16/19] Organize the projects in the solution --- TextMateSharp.sln | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/TextMateSharp.sln b/TextMateSharp.sln index ce32781..f6b02e7 100644 --- a/TextMateSharp.sln +++ b/TextMateSharp.sln @@ -31,10 +31,12 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Workflows", "Workflows", "{ EndProject Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "TextMateSharp.Grammars.Tests", "src\TextMateSharp.Grammars.Tests\TextMateSharp.Grammars.Tests.csproj", "{B9194474-83A7-47E6-B5E6-6CE360B1189B}" EndProject -Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "src", "src", "{827E0CD3-B72D-47B6-A68D-7590B98EB39B}" -EndProject Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "TextMateSharp.Benchmarks", "src\TextMateSharp.Benchmarks\TextMateSharp.Benchmarks.csproj", "{C1F336BA-0CAD-4A76-8C83-E0CA2DB9DA54}" EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Benchmarks", "Benchmarks", "{0D367332-B489-41A1-AD22-3F8D07F627C1}" +EndProject +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Tests", "Tests", "{F84B0BEF-53D7-43AD-93AB-2025127B6D84}" +EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|Any CPU = Debug|Any CPU @@ -124,7 +126,9 @@ Global GlobalSection(NestedProjects) = preSolution {C4A8E28E-70B0-4184-B62B-7286CB2F5756} = {FB55729C-1952-4D20-BFE7-C3202B160A0B} {46BA508A-D22E-4F76-AD27-68AC62725952} = {FB55729C-1952-4D20-BFE7-C3202B160A0B} - {C1F336BA-0CAD-4A76-8C83-E0CA2DB9DA54} = {827E0CD3-B72D-47B6-A68D-7590B98EB39B} + {B9194474-83A7-47E6-B5E6-6CE360B1189B} = {F84B0BEF-53D7-43AD-93AB-2025127B6D84} + {B49D3C2E-6C4E-45B3-A645-592994B7B94D} = {F84B0BEF-53D7-43AD-93AB-2025127B6D84} + {C1F336BA-0CAD-4A76-8C83-E0CA2DB9DA54} = {0D367332-B489-41A1-AD22-3F8D07F627C1} EndGlobalSection GlobalSection(ExtensibilityGlobals) = postSolution SolutionGuid = {D82FE2B4-7A75-444B-AB90-DC50F82D89A8} From b5a6b7898636341a5c4f360a1a3acdd94a816cb1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Pen=CC=83alba?= Date: Fri, 12 Dec 2025 09:42:47 +0100 Subject: [PATCH 17/19] Reuse Stopwatch instances in LineTokenizer and TMModel to reduce allocations --- src/TextMateSharp/Internal/Grammars/LineTokenizer.cs | 6 +++--- src/TextMateSharp/Model/TMModel.cs | 7 ++++--- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/src/TextMateSharp/Internal/Grammars/LineTokenizer.cs b/src/TextMateSharp/Internal/Grammars/LineTokenizer.cs index 33ed2bf..4c0bb9d 100644 --- a/src/TextMateSharp/Internal/Grammars/LineTokenizer.cs +++ b/src/TextMateSharp/Internal/Grammars/LineTokenizer.cs @@ -20,6 +20,7 @@ class LineTokenizer private LineTokens _lineTokens; private int _anchorPosition = -1; private bool _stop; + private Stopwatch _stopwatch = new Stopwatch(); private int _lineLength; public LineTokenizer(Grammar grammar, ReadOnlyMemory lineText, bool isFirstLine, int linePos, StateStack stack, @@ -48,11 +49,10 @@ public TokenizeStringResult Scan(bool checkWhileConditions, TimeSpan timeLimit) _anchorPosition = whileCheckResult.AnchorPosition; } - var stopWatch = new Stopwatch(); - stopWatch.Start(); + _stopwatch.Restart(); while (!_stop) { - if (stopWatch.Elapsed > timeLimit) + if (_stopwatch.Elapsed > timeLimit) { return new TokenizeStringResult(_stack, true); } diff --git a/src/TextMateSharp/Model/TMModel.cs b/src/TextMateSharp/Model/TMModel.cs index 977cb35..bafe0eb 100644 --- a/src/TextMateSharp/Model/TMModel.cs +++ b/src/TextMateSharp/Model/TMModel.cs @@ -112,6 +112,8 @@ void ThreadWorker(object state) } while (!IsStopped && model._thread != null); } + Stopwatch _stopwatch = new Stopwatch(); + private void RevalidateTokens(int startLine, int? toLineIndexOrNull) { if (model._tokenizer == null) @@ -130,8 +132,7 @@ private void RevalidateTokens(int startLine, int? toLineIndexOrNull) long MAX_ALLOWED_TIME = 5; long currentEstimatedTimeToTokenize = 0; long elapsedTime; - Stopwatch stopwatch = new Stopwatch(); - stopwatch.Start(); + _stopwatch.Restart(); // Tokenize at most 1000 lines. Estimate the tokenization speed per // character and stop when: // - MAX_ALLOWED_TIME is reached @@ -140,7 +141,7 @@ private void RevalidateTokens(int startLine, int? toLineIndexOrNull) int lineIndex = startLine; while (lineIndex <= toLineIndex && lineIndex < model.GetLines().GetNumberOfLines()) { - elapsedTime = stopwatch.ElapsedMilliseconds; + elapsedTime = _stopwatch.ElapsedMilliseconds; if (elapsedTime > MAX_ALLOWED_TIME) { // Stop if MAX_ALLOWED_TIME is reached From 6d145c3226ac051b2aa490f032ee27ed32af0b9a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Pen=CC=83alba?= Date: Fri, 12 Dec 2025 09:45:26 +0100 Subject: [PATCH 18/19] Reuse local stack and while rules buffers in LineTokenizer to reduce allocations --- src/TextMateSharp/Internal/Grammars/LineTokenizer.cs | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/TextMateSharp/Internal/Grammars/LineTokenizer.cs b/src/TextMateSharp/Internal/Grammars/LineTokenizer.cs index 4c0bb9d..c6b4948 100644 --- a/src/TextMateSharp/Internal/Grammars/LineTokenizer.cs +++ b/src/TextMateSharp/Internal/Grammars/LineTokenizer.cs @@ -22,6 +22,8 @@ class LineTokenizer private bool _stop; private Stopwatch _stopwatch = new Stopwatch(); private int _lineLength; + private readonly List _localStackBuffer = new List(); + private readonly List _whileRulesBuffer = new List(); public LineTokenizer(Grammar grammar, ReadOnlyMemory lineText, bool isFirstLine, int linePos, StateStack stack, LineTokens lineTokens) @@ -392,7 +394,8 @@ private void HandleCaptures(Grammar grammar, ReadOnlyMemory lineText, bool } int len = Math.Min(captures.Count, captureIndices.Length); - List localStack = new List(); + _localStackBuffer.Clear(); + var localStack = _localStackBuffer; int maxEnd = captureIndices[0].End; IOnigCaptureIndex captureIndex; @@ -492,7 +495,8 @@ private WhileCheckResult CheckWhileConditions(Grammar grammar, ReadOnlyMemory whileRules = new List(); + _whileRulesBuffer.Clear(); + var whileRules = _whileRulesBuffer; for (StateStack node = stack; node != null; node = node.Pop()) { Rule nodeRule = node.GetRule(grammar); From fe73d4476a3a0d78ed3f18b8493c4300abe886d3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20Pen=CC=83alba?= Date: Fri, 12 Dec 2025 09:47:27 +0100 Subject: [PATCH 19/19] Use ArrayPool to reduce allocations when appending newline in Grammar tokenization --- .../Internal/Grammars/Grammar.cs | 56 ++++++++++++------- 1 file changed, 35 insertions(+), 21 deletions(-) diff --git a/src/TextMateSharp/Internal/Grammars/Grammar.cs b/src/TextMateSharp/Internal/Grammars/Grammar.cs index 4651538..ff277ea 100644 --- a/src/TextMateSharp/Internal/Grammars/Grammar.cs +++ b/src/TextMateSharp/Internal/Grammars/Grammar.cs @@ -1,4 +1,5 @@ using System; +using System.Buffers; using System.Collections.Generic; using TextMateSharp.Grammars; @@ -254,33 +255,46 @@ private object Tokenize(ReadOnlyMemory lineText, StateStack prevState, boo } // Check if we need to append newline + char[] rentedBuffer = null; ReadOnlyMemory effectiveLineText; - if (lineText.Length == 0 || lineText.Span[lineText.Length - 1] != '\n') - { - // Only add \n if the passed lineText didn't have it. - // We need to allocate a new buffer with the newline - char[] buffer = new char[lineText.Length + 1]; - lineText.Span.CopyTo(buffer); - buffer[lineText.Length] = '\n'; - effectiveLineText = buffer.AsMemory(); - } - else + + try { - effectiveLineText = lineText; - } + if (lineText.Length == 0 || lineText.Span[lineText.Length - 1] != '\n') + { + // Only add \n if the passed lineText didn't have it. + // Use ArrayPool to avoid per-line allocation + int requiredLength = lineText.Length + 1; + rentedBuffer = ArrayPool.Shared.Rent(requiredLength); + lineText.Span.CopyTo(rentedBuffer); + rentedBuffer[lineText.Length] = '\n'; + effectiveLineText = rentedBuffer.AsMemory(0, requiredLength); + } + else + { + effectiveLineText = lineText; + } - int lineLength = effectiveLineText.Length; - LineTokens lineTokens = new LineTokens(emitBinaryTokens, effectiveLineText, _tokenTypeMatchers, _balancedBracketSelectors); - TokenizeStringResult tokenizeResult = LineTokenizer.TokenizeString(this, effectiveLineText, isFirstLine, 0, prevState, - lineTokens, true, timeLimit); + int lineLength = effectiveLineText.Length; + LineTokens lineTokens = new LineTokens(emitBinaryTokens, effectiveLineText, _tokenTypeMatchers, _balancedBracketSelectors); + TokenizeStringResult tokenizeResult = LineTokenizer.TokenizeString(this, effectiveLineText, isFirstLine, 0, prevState, + lineTokens, true, timeLimit); - if (emitBinaryTokens) - { - return new TokenizeLineResult2(lineTokens.GetBinaryResult(tokenizeResult.Stack, lineLength), + if (emitBinaryTokens) + { + return new TokenizeLineResult2(lineTokens.GetBinaryResult(tokenizeResult.Stack, lineLength), + tokenizeResult.Stack, tokenizeResult.StoppedEarly); + } + return new TokenizeLineResult(lineTokens.GetResult(tokenizeResult.Stack, lineLength), tokenizeResult.Stack, tokenizeResult.StoppedEarly); } - return new TokenizeLineResult(lineTokens.GetResult(tokenizeResult.Stack, lineLength), - tokenizeResult.Stack, tokenizeResult.StoppedEarly); + finally + { + if (rentedBuffer != null) + { + ArrayPool.Shared.Return(rentedBuffer); + } + } } private void GenerateRootId()