Merge #5715

5715: Use djb2 for snapshot item hashlist r=def- a=Robyt3 The previous hash function was heavily biased towards the hash buckets 64-79, making those buckets overflow faster, which results in snapshot CRC errors and lags. Using the djb2 hash yields an almost even distribution over the entire range of values. Source for djb2: http://www.cse.yorku.ca/~oz/hash.html (we incidentally use the same implementation for `str_quickhash`) Sample which compares the usage of hash buckets in the test map from #5454 with the old and new hash function: [SnapshotHash.csv](https://github.com/ddnet/ddnet/files/9285148/SnapshotHash.csv) (this table also differentiates between the source and target of the snapshot delta) Closes #4379. ## Checklist - [X] Tested the change ingame - [ ] Provided screenshots if it is a visual change - [ ] Tested in combination with possibly related configuration options - [ ] Written a unit test (especially base/) or added coverage to integration test - [ ] Considered possible null pointers and out of bounds array indexing - [ ] Changed no physics that affect existing maps - [ ] Tested the change with [ASan+UBSan or valgrind's memcheck](https://github.com/ddnet/ddnet/#using-addresssanitizer--undefinedbehavioursanitizer-or-valgrinds-memcheck) (optional) Co-authored-by: Robert Müller <robytemueller@gmail.com>
2024-11-10 10:08:18 +00:00 · 2022-08-08 22:38:22 +00:00 · 2022-08-08 22:38:22 +00:00 · dbe6d8cb29
parent 3273781d3f 243ab7c2f0
commit dbe6d8cb29
1 changed files with 20 additions and 10 deletions
--- a/src/engine/shared/snapshot.cpp
+++ b/src/engine/shared/snapshot.cpp
@ -144,18 +144,28 @@ bool CSnapshot::IsValid(size_t ActualSize) const

 // CSnapshotDelta

-struct CItemList
-{
-	int m_Num;
-	int m_aKeys[64];
-	int m_aIndex[64];
-};
-
 enum
 {
 	HASHLIST_SIZE = 256,
+	HASHLIST_BUCKET_SIZE = 64,
 };

+struct CItemList
+{
+	int m_Num;
+	int m_aKeys[HASHLIST_BUCKET_SIZE];
+	int m_aIndex[HASHLIST_BUCKET_SIZE];
+};
+
+inline size_t CalcHashID(int Key)
+{
+	// djb2 (http://www.cse.yorku.ca/~oz/hash.html)
+	unsigned Hash = 5381;
+	for(unsigned Shift = 0; Shift < sizeof(int); Shift++)
+		Hash = ((Hash << 5) + Hash) + ((Key >> (Shift * 8)) & 0xFF);
+	return Hash % HASHLIST_SIZE;
+}
+
 static void GenerateHash(CItemList *pHashlist, CSnapshot *pSnapshot)
 {
 	for(int i = 0; i < HASHLIST_SIZE; i++)
@ -164,8 +174,8 @@ static void GenerateHash(CItemList *pHashlist, CSnapshot *pSnapshot)
 	for(int i = 0; i < pSnapshot->NumItems(); i++)
 	{
 		int Key = pSnapshot->GetItem(i)->Key();
-		int HashID = ((Key >> 12) & 0xf0) | (Key & 0xf);
-		if(pHashlist[HashID].m_Num != 64)
+		size_t HashID = CalcHashID(Key);
+		if(pHashlist[HashID].m_Num < HASHLIST_BUCKET_SIZE)
 		{
 			pHashlist[HashID].m_aIndex[pHashlist[HashID].m_Num] = i;
 			pHashlist[HashID].m_aKeys[pHashlist[HashID].m_Num] = Key;
@ -176,7 +186,7 @@ static void GenerateHash(CItemList *pHashlist, CSnapshot *pSnapshot)

 static int GetItemIndexHashed(int Key, const CItemList *pHashlist)
 {
-	int HashID = ((Key >> 12) & 0xf0) | (Key & 0xf);
+	size_t HashID = CalcHashID(Key);
 	for(int i = 0; i < pHashlist[HashID].m_Num; i++)
 	{
 		if(pHashlist[HashID].m_aKeys[i] == Key)