#if UNITY_EDITOR || BURST_INTERNAL using System; using System.Collections.Generic; using System.Diagnostics; using System.Linq; using System.Runtime.CompilerServices; namespace Unity.Burst.Editor { internal partial class BurstDisassembler { /// /// Base class for providing extended information of an identifier /// internal abstract class AsmTokenKindProvider { // Internally using string slice instead of string // to support faster lookup from AsmToken private readonly Dictionary _tokenKinds; private int _maximumLength; protected AsmTokenKindProvider(int capacity) { _tokenKinds = new Dictionary(capacity); } protected void AddTokenKind(string text, AsmTokenKind kind) { _tokenKinds.Add(new StringSlice(text), kind); if (text.Length > _maximumLength) _maximumLength = text.Length; } public virtual AsmTokenKind FindTokenKind(StringSlice slice) { return slice.Length <= _maximumLength && _tokenKinds.TryGetValue(slice, out var tokenKind) ? tokenKind : AsmTokenKind.Identifier; } public virtual bool AcceptsCharAsIdentifierOrRegisterEnd(char c) { return false; } public virtual bool IsInstructionOrRegisterOrIdentifier(char c) { // we include . because we have instructions like `b.le` or `f32.const` return c >= 'a' && c <= 'z' || c >= 'A' && c <= 'Z' || c >= '0' && c <= '9' || c == '_' || c == '@' || c == '.'; } /// /// Checks whether regA == regB. This function assumes the given strings are proper registers. /// public virtual bool RegisterEqual(string regA, string regB) => regA == regB; public abstract SIMDkind SimdKind(StringSlice instruction); } /// /// The ASM tokenizer /// private struct AsmTokenizer { private readonly string _text; private readonly AsmKind _asmKind; private readonly AsmTokenKindProvider _tokenKindProvider; private int _position; private int _nextPosition; private int _alignedPosition; private int _nextAlignedPosition; private char _c; private readonly char _commentStartChar; private bool _doPad; private int _padding; public AsmTokenizer(string text, AsmKind asmKind, AsmTokenKindProvider tokenKindProvider, char commentStart) { _text = text; _asmKind = asmKind; _tokenKindProvider = tokenKindProvider; _position = 0; _nextPosition = 0; _alignedPosition = 0; _nextAlignedPosition = 0; _commentStartChar = commentStart; _doPad = false; _padding = 0; _c = (char)0; NextChar(); } public bool TryGetNextToken(out AsmToken token) { token = new AsmToken(); while (true) { var startPosition = _position; var startAlignedPosition = _alignedPosition; if (_c == 0) { return false; } if (_c == '.') { token = ParseDirective(startPosition, startAlignedPosition); return true; } // Like everywhere else in this file, we are inlining the matching characters instead // of using helper functions, as Mono might not be enough good at inlining by itself if (_c >= 'a' && _c <= 'z' || _c >= 'A' && _c <= 'Z' || _c == '_' || _c == '@') { token = ParseInstructionOrIdentifierOrRegister(startPosition, startAlignedPosition); PrepareAlignment(token); return true; } if (_c >= '0' && _c <= '9' || _c == '-') { token = ParseNumber(startPosition, startAlignedPosition); return true; } if (_c == '"') { token = ParseString(startPosition, startAlignedPosition); return true; } if (_c == _commentStartChar) { token = ParseComment(startPosition, startAlignedPosition); return true; } if (_c == '\r') { if (PreviewChar() == '\n') { NextChar(); // skip \r } token = ParseNewLine(startPosition, startAlignedPosition); return true; } if (_c == '\n') { token = ParseNewLine(startPosition, startAlignedPosition); return true; } if (_doPad) { _nextAlignedPosition += _padding; _doPad = false; } token = ParseMisc(startPosition, startAlignedPosition); return true; } } private void PrepareAlignment(AsmToken token) { var kind = token.Kind; _padding = InstructionAlignment - token.Length; _doPad = _asmKind == AsmKind.Intel && (kind == AsmTokenKind.Instruction || kind == AsmTokenKind.BranchInstruction || kind == AsmTokenKind.CallInstruction || kind == AsmTokenKind.JumpInstruction || kind == AsmTokenKind.ReturnInstruction || kind == AsmTokenKind.InstructionSIMD) && _c != '\n' && _c != '\r' // If there is no registers behind instruction don't align. && _padding > 0; } private AsmToken ParseNewLine(int startPosition, int startAlignedPosition) { var endPosition = _position; NextChar(); // Skip newline return new AsmToken(AsmTokenKind.NewLine, startPosition, startAlignedPosition, endPosition - startPosition + 1); } private AsmToken ParseMisc(int startPosition, int startAlignedPosition) { var endPosition = _position; // Parse anything that is not a directive, instruction, number, string or comment while (!((_c == (char)0) || (_c == '\r') || (_c == '\n') || (_c == '.') || (_c >= 'a' && _c <= 'z' || _c >= 'A' && _c <= 'Z' || _c == '_' || _c == '@') || (_c >= '0' && _c <= '9' || _c == '-') || (_c == '"') || (_c == _commentStartChar))) { endPosition = _position; NextChar(); } return new AsmToken(AsmTokenKind.Misc, startPosition, startAlignedPosition, endPosition - startPosition + 1); } private static readonly string[] DataDirectiveStrings = new[] { AssertDataDirectiveLength(".long"), AssertDataDirectiveLength(".byte"), AssertDataDirectiveLength(".short"), AssertDataDirectiveLength(".ascii"), AssertDataDirectiveLength(".asciz"), }; private static string AssertDataDirectiveLength(string text) { var length = text.Length; Debug.Assert(length == 5 || length == 6, $"Invalid length {length} for string {text}. Expecting 5 or 6"); return text; } private AsmToken ParseDirective(int startPosition, int startAlignedPosition) { var endPosition = _position; NextChar(); // skip . bool isLabel = _c == 'L'; // A label starts with a capital `L` like .Lthis_is_a_jump_label while (_c >= 'a' && _c <= 'z' || _c >= 'A' && _c <= 'Z' || _c >= '0' && _c <= '9' || _c == '.' || _c == '_' || _c == '@') { endPosition = _position; NextChar(); } // Refine the kind of directive: // // .Lfunc_begin => FunctionBegin // .Lfunc_end => FunctionEnd // .L????????? => Label // data directive (.byte, .long, .short...) => DataDirective // anything else => Directive const string MatchFunc = ".Lfunc_"; const int MatchFuncLength = 7; Debug.Assert(MatchFunc.Length == MatchFuncLength); var kind = isLabel ? AsmTokenKind.Label : AsmTokenKind.Directive; // Fast early check if (isLabel && string.CompareOrdinal(_text, startPosition, MatchFunc, 0, MatchFuncLength) == 0) { if (string.CompareOrdinal(_text, startPosition, ".Lfunc_begin", 0, ".Lfunc_begin".Length) == 0) { kind = AsmTokenKind.FunctionBegin; } else if (string.CompareOrdinal(_text, startPosition, ".Lfunc_end", 0, ".Lfunc_end".Length) == 0) { kind = AsmTokenKind.FunctionEnd; } } // Adjust directive to mark data directives, source location directives...etc. int length = endPosition - startPosition + 1; // Use length to early exit if (!isLabel && length >= 4 && length <= 8) { if ((length == 5 || length == 6)) { foreach (var dataDirectiveStr in DataDirectiveStrings) { if (string.CompareOrdinal(_text, startPosition, dataDirectiveStr, 0, dataDirectiveStr.Length) == 0) { kind = AsmTokenKind.DataDirective; break; } } // .file => SourceFile if (kind == AsmTokenKind.Directive && string.CompareOrdinal(_text, startPosition, ".file", 0, 5) == 0) { kind = AsmTokenKind.SourceFile; } } // .loc => SourceLocation // .cv_loc => SourceLocation else if ((length == 4 && string.CompareOrdinal(_text, startPosition, ".loc", 0, 4) == 0) || (length == 7 && string.CompareOrdinal(_text, startPosition, ".cv_loc", 0, 7) == 0)) { kind = AsmTokenKind.SourceLocation; } // .file .cv_file => SourceFile else if (length == 8 && string.CompareOrdinal(_text, startPosition, ".cv_file", 0, 8) == 0) { kind = AsmTokenKind.SourceFile; } } return new AsmToken(kind, startPosition, startAlignedPosition, length); } private AsmToken ParseInstructionOrIdentifierOrRegister(int startPosition, int startAlignedPosition) { var endPosition = _position; while (_tokenKindProvider.IsInstructionOrRegisterOrIdentifier(_c)) { endPosition = _position; NextChar(); } if (_tokenKindProvider.AcceptsCharAsIdentifierOrRegisterEnd(_c)) { endPosition = _position; NextChar(); } // Resolve token kind for identifier int length = endPosition - startPosition + 1; var tokenKind = _tokenKindProvider.FindTokenKind(new StringSlice(_text, startPosition, length)); if (tokenKind == AsmTokenKind.Identifier) { // If we have `:` right after an identifier, change from identifier to label declaration to help the semantic pass later if (_c == ':') { tokenKind = AsmTokenKind.Label; } } return new AsmToken(tokenKind, startPosition, startAlignedPosition, endPosition - startPosition + 1); } private AsmToken ParseNumber(int startPosition, int startAlignedPostion) { var endPosition = _position; if (_c == '-') { NextChar(); } while (_c >= '0' && _c <= '9' || _c >= 'a' && _c <= 'f' || _c >= 'A' && _c <= 'F' || _c == 'x' || _c == '.') { endPosition = _position; NextChar(); } // If we have `:` right after a number, change from number to label declaration to help the semantic pass later var numberKind = _c == ':' ? AsmTokenKind.Label : AsmTokenKind.Number; return new AsmToken(numberKind, startPosition, startAlignedPostion, endPosition - startPosition + 1); } private AsmToken ParseString(int startPosition, int startAlignedPostion) { var endPosition = _position; // Skip first " NextChar(); while (_c != (char)0 && _c != '"') { // Skip escape \" if (_c == '\\' && PreviewChar() == '"') { NextChar(); } endPosition = _position; NextChar(); } endPosition = _position; NextChar(); // Skip trailing 0 // If we have `:` right after a string, change from string to label declaration to help the semantic pass later var stringKind = _c == ':' ? AsmTokenKind.Label : AsmTokenKind.String; return new AsmToken(stringKind, startPosition, startAlignedPostion, endPosition - startPosition + 1); } private AsmToken ParseComment(int startPosition, int startAlignedPosition) { var endPosition = _position; while (_c != (char)0 && (_c != '\n' && _c != '\r')) { endPosition = _position; NextChar(); } return new AsmToken(AsmTokenKind.Comment, startPosition, startAlignedPosition, endPosition - startPosition + 1); } [MethodImpl(MethodImplOptions.AggressiveInlining)] private void NextChar() { if (_nextPosition < _text.Length) { _position = _nextPosition; _c = _text[_position]; _nextPosition = _position + 1; _alignedPosition = _nextAlignedPosition; _nextAlignedPosition = _alignedPosition + 1; } else { _c = (char)0; } } private char PreviewChar() { return _nextPosition < _text.Length ? _text[_nextPosition] : (char)0; } } public enum SIMDkind { Packed, Scalar, Infrastructure, } /// /// An ASM token. The token doesn't contain the string of the token, but provides method and to extract it. /// internal readonly struct AsmToken { // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ // CAUTION: It is important to not put *any managed objects* // into this struct for GC efficiency // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ public AsmToken(AsmTokenKind kind, int position, int alignedPosition, int length) { Kind = kind; Position = position; AlignedPosition = alignedPosition; Length = length; } public readonly AsmTokenKind Kind; public readonly int Position; public readonly int AlignedPosition; public readonly int Length; public StringSlice Slice(string text) => new StringSlice(text, Position, Length); public string ToString(string text) => text.Substring(Position, Length); public string ToFriendlyText(string text) { return $"{text.Substring(Position, Length)} : {Kind}"; } } /// /// Kind of an ASM token. /// internal enum AsmTokenKind { Eof, Directive, DataDirective, SourceFile, SourceLocation, Label, FunctionBegin, FunctionEnd, Identifier, Qualifier, Instruction, CallInstruction, BranchInstruction, JumpInstruction, ReturnInstruction, InstructionSIMD, Register, Number, String, Comment, NewLine, Misc } } /// /// A slice of a string from an original string. /// internal readonly struct StringSlice : IEquatable { private readonly string _text; public readonly int Position; public readonly int Length; public StringSlice(string text) { _text = text ?? throw new ArgumentNullException(nameof(text)); Position = 0; Length = text.Length; } public StringSlice(string text, int position, int length) { _text = text ?? throw new ArgumentNullException(nameof(text)); Position = position; Length = length; } public char this[int index] => _text[Position + index]; public bool Equals(StringSlice other) { if (Length != other.Length) return false; for (int i = 0; i < Length; i++) { if (this[i] != other[i]) { return false; } } return true; } public override bool Equals(object obj) { return obj is StringSlice other && Equals(other); } public override int GetHashCode() { unchecked { var hashCode = Length; for (int i = 0; i < Length; i++) { hashCode = (hashCode * 397) ^ this[i]; } return hashCode; } } public static bool operator ==(StringSlice left, StringSlice right) { return left.Equals(right); } public static bool operator !=(StringSlice left, StringSlice right) { return !left.Equals(right); } public override string ToString() { return _text.Substring(Position, Length); } public bool StartsWith(string text) { if (text == null) throw new ArgumentNullException(nameof(text)); if (Length < text.Length) return false; for (var i = 0; i < text.Length; i++) { var c = text[i]; if (_text[Position + i] != c) return false; } return true; } public bool Contains(char c) => _text.Skip(Position).Take(Length).Any(elm => elm == c); public int IndexOf(char c) { for (var i = 0; i < Length; i++) { if (_text[Position + i] == c) { return i; } } return -1; } } } #endif