399 lines
13 KiB
399 lines
13 KiB
![]() |
using System;
using System.Collections.Concurrent;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Text.RegularExpressions;
using System.Threading.Tasks;
using JetBrains.Annotations;
using UnityEngine;
namespace UnityEditor.Searcher
public class SearcherDatabase : SearcherDatabaseBase
Dictionary<string, IReadOnlyList<ValueTuple<string, float>>> m_Index = new Dictionary<string, IReadOnlyList<ValueTuple<string, float>>>();
class Result
public SearcherItem item;
public float maxScore;
const bool k_IsParallel = true;
public Func<string, SearcherItem, bool> MatchFilter { get; set; }
public static SearcherDatabase Create(
List<SearcherItem> items,
string databaseDirectory,
bool serializeToFile = true
if (serializeToFile && databaseDirectory != null && !Directory.Exists(databaseDirectory))
var database = new SearcherDatabase(databaseDirectory, items);
if (serializeToFile)
return database;
public static SearcherDatabase Load(string databaseDirectory)
if (!Directory.Exists(databaseDirectory))
throw new InvalidOperationException("databaseDirectory not found.");
var database = new SearcherDatabase(databaseDirectory, null);
return database;
public SearcherDatabase(IReadOnlyCollection<SearcherItem> db)
: this("", db)
SearcherDatabase(string databaseDirectory, IReadOnlyCollection<SearcherItem> db)
: base(databaseDirectory)
m_ItemList = new List<SearcherItem>();
var nextId = 0;
if (db != null)
foreach (var item in db)
AddItemToIndex(item, ref nextId, null);
public override List<SearcherItem> Search(string query, out float localMaxScore)
// Match assumes the query is trimmed
query = query.Trim(' ', '\t');
localMaxScore = 0;
if (string.IsNullOrWhiteSpace(query))
if (MatchFilter == null)
return m_ItemList;
// ReSharper disable once RedundantLogicalConditionalExpressionOperand
if (k_IsParallel && m_ItemList.Count > 100)
return FilterMultiThreaded(query);
return FilterSingleThreaded(query);
var finalResults = new List<SearcherItem> { null };
var max = new Result();
var tokenizedQuery = new List<string>();
foreach (var token in Tokenize(query))
// ReSharper disable once RedundantLogicalConditionalExpressionOperand
if (k_IsParallel && m_ItemList.Count > 100)
SearchMultithreaded(query, tokenizedQuery, max, finalResults);
SearchSingleThreaded(query, tokenizedQuery, max, finalResults);
localMaxScore = max.maxScore;
if (max.item != null)
finalResults[0] = max.item;
return finalResults;
protected virtual bool Match(string query, IReadOnlyList<string> tokenizedQuery, SearcherItem item, out float score)
var filter = MatchFilter?.Invoke(query, item) ?? true;
return Match(tokenizedQuery, item.Path, out score) && filter;
List<SearcherItem> FilterSingleThreaded(string query)
var result = new List<SearcherItem>();
foreach (var searcherItem in m_ItemList)
if (!MatchFilter.Invoke(query, searcherItem))
return result;
List<SearcherItem> FilterMultiThreaded(string query)
var result = new List<SearcherItem>();
var count = Environment.ProcessorCount;
var tasks = new Task[count];
var lists = new List<SearcherItem>[count];
var itemsPerTask = (int)Math.Ceiling(m_ItemList.Count / (float)count);
for (var i = 0; i < count; i++)
var i1 = i;
tasks[i] = Task.Run(() =>
lists[i1] = new List<SearcherItem>();
for (var j = 0; j < itemsPerTask; j++)
var index = j + itemsPerTask * i1;
if (index >= m_ItemList.Count)
var item = m_ItemList[index];
if (!MatchFilter.Invoke(query, item))
for (var i = 0; i < count; i++)
return result;
readonly float k_ScoreCutOff = 0.33f;
void SearchSingleThreaded(string query, IReadOnlyList<string> tokenizedQuery, Result max, ICollection<SearcherItem> finalResults)
List<Result> results = new List<Result>();
foreach (var item in m_ItemList)
float score = 0;
if (query.Length == 0 || Match(query, tokenizedQuery, item, out score))
if (score > max.maxScore)
max.item = item;
max.maxScore = score;
results.Add(new Result() { item = item, maxScore = score});
PostprocessResults(results, finalResults, max);
void SearchMultithreaded(string query, IReadOnlyList<string> tokenizedQuery, Result max, List<SearcherItem> finalResults)
var count = Environment.ProcessorCount;
var tasks = new Task[count];
var localResults = new Result[count];
var queue = new ConcurrentQueue<Result>();
var itemsPerTask = (int)Math.Ceiling(m_ItemList.Count / (float)count);
for (var i = 0; i < count; i++)
var i1 = i;
localResults[i1] = new Result();
tasks[i] = Task.Run(() =>
var result = localResults[i1];
for (var j = 0; j < itemsPerTask; j++)
var index = j + itemsPerTask * i1;
if (index >= m_ItemList.Count)
var item = m_ItemList[index];
float score = 0;
if (query.Length == 0 || Match(query, tokenizedQuery, item, out score))
if (score > result.maxScore)
result.maxScore = score;
result.item = item;
queue.Enqueue(new Result { item = item, maxScore = score });
for (var i = 0; i < count; i++)
if (localResults[i].maxScore > max.maxScore)
max.maxScore = localResults[i].maxScore;
max.item = localResults[i].item;
PostprocessResults(queue, finalResults, max);
void PostprocessResults(IEnumerable<Result> results, ICollection<SearcherItem> items, Result max)
foreach (var result in results)
var normalizedScore = result.maxScore / max.maxScore;
if (result.item != null && result.item != max.item && normalizedScore > k_ScoreCutOff)
public override void BuildIndex()
foreach (var item in m_ItemList)
if (!m_Index.ContainsKey(item.Path))
List<ValueTuple<string, float>> terms = new List<ValueTuple<string, float>>();
// If the item uses synonyms to return results for similar words/phrases, add them to the search terms
IList<string> tokens = null;
if (item.Synonyms == null)
tokens = Tokenize(item.Name);
tokens = Tokenize(string.Format("{0} {1}", item.Name, string.Join(" ", item.Synonyms)));
// Fixes bug: https://fogbugz.unity3d.com/f/cases/1359158/
// Without this, node names with spaces or those with Pascal casing were not added to index
var nodeName = item.Name.ToLower().Replace(" ", String.Empty);
string tokenSuite = "";
foreach (var token in tokens)
var t = token.ToLower();
if (t.Length > 1)
terms.Add(new ValueTuple<string, float>(t, 0.8f));
if (tokenSuite.Length > 0)
tokenSuite += " " + t;
terms.Add(new ValueTuple<string, float>(tokenSuite, 1f));
tokenSuite = t;
// Add a term containing all the uppercase letters (CamelCase World BBox => CCWBB)
var initialList = Regex.Split(item.Name, @"\P{Lu}+");
var initials = string.Concat(initialList).Trim();
if (!string.IsNullOrEmpty(initials))
terms.Add(new ValueTuple<string, float>(initials.ToLower(), 0.5f));
m_Index.Add(item.Path, terms);
static IList<string> Tokenize(string s)
var knownTokens = new HashSet<string>();
var tokens = new List<string>();
// Split on word boundaries
foreach (var t in Regex.Split(s, @"\W"))
// Split camel case words
var tt = Regex.Split(t, @"(\p{Lu}+\P{Lu}*)");
foreach (var ttt in tt)
var tttt = ttt.Trim();
if (!string.IsNullOrEmpty(tttt) && !knownTokens.Contains(tttt))
return tokens;
bool Match(IReadOnlyList<string> tokenizedQuery, string itemPath, out float score)
itemPath = itemPath.Trim();
if (itemPath == "")
if (tokenizedQuery.Count == 0)
score = 1;
return true;
score = 0;
return false;
IReadOnlyList<ValueTuple<string, float>> indexTerms;
if (!m_Index.TryGetValue(itemPath, out indexTerms))
score = 0;
return false;
float maxScore = 0.0f;
foreach (var t in indexTerms)
float scoreForTerm = 0f;
var querySuite = "";
var querySuiteFactor = 1.25f;
foreach (var q in tokenizedQuery)
if (t.Item1.StartsWith(q))
scoreForTerm += t.Item2 * q.Length / t.Item1.Length;
if (querySuite.Length > 0)
querySuite += " " + q;
if (t.Item1.StartsWith(querySuite))
scoreForTerm += t.Item2 * querySuiteFactor * querySuite.Length / t.Item1.Length;
querySuite = q;
querySuiteFactor *= querySuiteFactor;
maxScore = Mathf.Max(maxScore, scoreForTerm);
score = maxScore;
return score > 0;