refactored and fixed crash bug, some memleaks

2016-12-24 12:12:27 +01:00 · 2016-12-24 12:12:27 +01:00 · 53cc45e7e1
parent 30cdbff8a6
commit 53cc45e7e1
11 changed files with 2018 additions and 514 deletions
--- a/119
+++ b/119
@ -26,8 +26,9 @@ class AsyncSocketConfig(object):
  def __init__(self):
    self.sdks = {}
    self.binaries = []
    self.sm_root = None
    self.extensions = []
    self.generated_headers = None
    self.sm_root = None
  @property
  def tag(self):
@ -39,22 +40,46 @@ class AsyncSocketConfig(object):
    if builder.options.sm_path:
      self.sm_root = builder.options.sm_path
    else:
-      self.sm_root = ResolveEnvPath('SOURCEMOD17', 'sourcemod-1.7')
+      self.sm_root = ResolveEnvPath('SOURCEMOD18', 'sourcemod-1.8')
      if not self.sm_root:
        self.sm_root = ResolveEnvPath('SOURCEMOD', 'sourcemod')
      if not self.sm_root:
-        self.sm_root = ResolveEnvPath('SMCENTRAL', 'sourcemod-central')
+        self.sm_root = ResolveEnvPath('SOURCEMOD_DEV', 'sourcemod-central')
    if not self.sm_root or not os.path.isdir(self.sm_root):
-      raise Exception('Could not find a source copy of Sourcemod')
+      raise Exception('Could not find a source copy of SourceMod')
    self.sm_root = Normalize(self.sm_root)
  def configure(self):
    builder.AddConfigureFile('pushbuild.txt')
    cxx = builder.DetectCompilers()
    if cxx.like('gcc'):
      self.configure_gcc(cxx)
    elif cxx.vendor == 'msvc':
      self.configure_msvc(cxx)
    # Optimizaiton
    if builder.options.opt == '1':
      cxx.defines += ['NDEBUG']
    # Debugging
    if builder.options.debug == '1':
      cxx.defines += ['DEBUG', '_DEBUG']
    # Platform-specifics
    if builder.target_platform == 'linux':
      self.configure_linux(cxx)
    elif builder.target_platform == 'mac':
      self.configure_mac(cxx)
    elif builder.target_platform == 'windows':
      self.configure_windows(cxx)
    # Finish up.
    cxx.includes += [
      os.path.join(self.sm_root, 'public'),
    ]
  def configure_gcc(self, cxx):
    cxx.defines += [
      'stricmp=strcasecmp',
      '_stricmp=strcasecmp',
@ -66,47 +91,51 @@ class AsyncSocketConfig(object):
    cxx.cflags += [
      '-pipe',
      '-fno-strict-aliasing',
-        '-Wall',
+#      '-Wall',
      '-Werror',
      '-Wno-unused',
      '-Wno-switch',
      '-Wno-array-bounds',
      '-msse',
      '-m32',
      '-fvisibility=hidden',
    ]
    cxx.cxxflags += [
      '-std=c++11',
      '-fno-exceptions',
      '-fno-threadsafe-statics',
      '-Wno-non-virtual-dtor',
      '-Wno-overloaded-virtual',
      '-fvisibility-inlines-hidden',
    ]
    cxx.linkflags += ['-m32']
    have_gcc = cxx.vendor == 'gcc'
    have_clang = cxx.vendor == 'clang'
-      if have_clang or (have_gcc and cxx.version >= '4'):
+    if cxx.version >= 'clang-3.6':
-        cxx.cflags += ['-fvisibility=hidden']
+      cxx.cxxflags += ['-Wno-inconsistent-missing-override']
-        cxx.cxxflags += ['-fvisibility-inlines-hidden']
+    if have_clang or (cxx.version >= 'gcc-4.6'):
        if have_clang or (have_gcc and cxx.version >= '4.6'):
      cxx.cflags += ['-Wno-narrowing']
-        if (have_gcc and cxx.version >= '4.7') or (have_clang and cxx.version >= '3'):
+    if have_clang or (cxx.version >= 'gcc-4.7'):
      cxx.cxxflags += ['-Wno-delete-non-virtual-dtor']
-        if have_gcc and cxx.version >= '4.8':
+    if cxx.version >= 'gcc-4.8':
      cxx.cflags += ['-Wno-unused-result']
    if have_clang:
      cxx.cxxflags += ['-Wno-implicit-exception-spec-mismatch']
-        if (builder.target_platform == 'mac' and cxx.version >= '5.1') or cxx.version >= '3.4':
+      if cxx.version >= 'apple-clang-5.1' or cxx.version >= 'clang-3.4':
        cxx.cxxflags += ['-Wno-deprecated-register']
      else:
        cxx.cxxflags += ['-Wno-deprecated']
      cxx.cflags += ['-Wno-sometimes-uninitialized']
      cxx.linkflags += ['-m32']
      cxx.cxxflags += [
        '-fno-exceptions',
        '-fno-threadsafe-statics',
        '-Wno-non-virtual-dtor',
        '-Wno-overloaded-virtual',
      ]
    if have_gcc:
      cxx.cflags += ['-mfpmath=sse']
-    elif cxx.vendor == 'msvc':
+
    if builder.options.opt == '1':
      cxx.cflags += ['-O3']
  def configure_msvc(self, cxx):
    if builder.options.debug == '1':
      cxx.cflags += ['/MTd']
      cxx.linkflags += ['/NODEFAULTLIB:libcmt']
@ -128,7 +157,6 @@ class AsyncSocketConfig(object):
    ]
    cxx.linkflags += [
      '/MACHINE:X86',
        '/SUBSYSTEM:WINDOWS',
      'kernel32.lib',
      'user32.lib',
      'gdi32.lib',
@ -143,36 +171,26 @@ class AsyncSocketConfig(object):
      'odbccp32.lib',
    ]
    # Optimization
    if builder.options.opt == '1':
-      cxx.defines += ['NDEBUG']
+      cxx.cflags += ['/Ox', '/Zo']
      if cxx.like('gcc'):
        cxx.cflags += ['-O3']
      elif cxx.like('msvc'):
        cxx.cflags += ['/Ox']
      cxx.linkflags += ['/OPT:ICF', '/OPT:REF']
    # Debugging
    if builder.options.debug == '1':
      cxx.defines += ['DEBUG', '_DEBUG']
      if cxx.like('msvc'):
      cxx.cflags += ['/Od', '/RTC1']
        if cxx.version >= 1600:
          cxx.cflags += ['/d2Zi+']
    # This needs to be after our optimization flags which could otherwise disable it.
    if cxx.vendor == 'msvc':
    # Don't omit the frame pointer.
    cxx.cflags += ['/Oy-']
-    # Platform-specifics
+  def configure_linux(self, cxx):
    if builder.target_platform == 'linux':
    cxx.defines += ['_LINUX', 'POSIX']
    cxx.linkflags += ['-Wl,--exclude-libs,ALL', '-lm']
    if cxx.vendor == 'gcc':
      cxx.linkflags += ['-static-libgcc']
    elif cxx.vendor == 'clang':
      cxx.linkflags += ['-lgcc_eh']
-    elif builder.target_platform == 'mac':
+
  def configure_mac(self, cxx):
    cxx.defines += ['OSX', '_OSX', 'POSIX']
    cxx.cflags += ['-mmacosx-version-min=10.5']
    cxx.linkflags += [
@ -182,17 +200,19 @@ class AsyncSocketConfig(object):
      '-stdlib=libstdc++',
    ]
    cxx.cxxflags += ['-stdlib=libstdc++']
-    elif builder.target_platform == 'windows':
+
  def configure_windows(self, cxx):
    cxx.defines += ['WIN32', '_WINDOWS']
  def ConfigureForExtension(self, context, compiler):
    compiler.cxxincludes += [
      os.path.join(context.currentSourcePath),
-	  os.path.join(context.currentSourcePath, 'libs', 'libuv-v1.5.0', 'include'),
+      os.path.join(context.currentSourcePath, 'sdk'),
      os.path.join(self.sm_root, 'public'),
      os.path.join(self.sm_root, 'public', 'amtl'),
      os.path.join(self.sm_root, 'public', 'extensions'),
-      os.path.join(self.sm_root, 'public', 'sourcepawn')
+      os.path.join(self.sm_root, 'sourcepawn', 'include'),
      os.path.join(self.sm_root, 'public', 'amtl', 'amtl'),
      os.path.join(self.sm_root, 'public', 'amtl'),
    ]
    return compiler
@ -200,5 +220,14 @@ AsyncSocket = AsyncSocketConfig()
 AsyncSocket.detectSDKs()
 AsyncSocket.configure()
-builder.RunBuildScripts(['extension/AMBuilder'], { 'AsyncSocket': AsyncSocket })
+# Add additional buildscripts here
-builder.RunScript('PackageScript', { 'AsyncSocket': AsyncSocket })
+BuildScripts = [
  'extension/AMBuilder'
 ]
 if builder.backend == 'amb2':
  BuildScripts += [
    'PackageScript',
  ]
 builder.RunBuildScripts(BuildScripts, { 'AsyncSocket': AsyncSocket})
--- a/README.md
+++ b/README.md
@ -0,0 +1,10 @@
 # async_connect
 You need to compile https://github.com/libuv/libuv for 32bit like below if you have a 64bit OS.
 ```
 sh autgen.sh
 ./configure --build=i686-pc-linux-gnu "CFLAGS=-m32" "CXXFLAGS=-m32" "LDFLAGS=-m32" --disable-shared --enable-static
 make
 ```
 Put the `libuv/include` folder and `libuv/.libs/libuv.a` in `extensions/libuv`.
--- a/extension/AMBuilder
+++ b/extension/AMBuilder
@ -5,7 +5,7 @@ binary = builder.compiler.Library('async_socket.ext')
 AsyncSocket.ConfigureForExtension(builder, binary.compiler)
 binary.compiler.includes += [
-  os.path.join(builder.sourcePath, 'extension', 'libs', 'libuv-v1.5.0', 'include')
+  os.path.join(builder.sourcePath, 'extension', 'libuv', 'include')
 ]
 binary.sources += [
@ -14,11 +14,17 @@ binary.sources += [
  os.path.join(AsyncSocket.sm_root, 'public', 'smsdk_ext.cpp')
 ]
-binary.compiler.defines += ['SOURCEMOD_BUILD']
+binary.compiler.defines += [
  'SOURCEMOD_BUILD'
 ]
-binary.compiler.postlink += [os.path.join(builder.sourcePath, 'extension', 'libs', 'libuv-v1.5.0', '.libs', 'libuv.a')]
+binary.compiler.postlink += [
  os.path.join(builder.sourcePath, 'extension', 'libuv', 'libuv.a')
 ]
 if builder.target_platform == 'windows':
  binary.compiler.linkflags += ['ws2_32.lib']
-AsyncSocket.extensions += [builder.Add(binary)]
+AsyncSocket.extensions += [
  builder.Add(binary)
 ]
--- a/extension/atomicops.h
+++ b/extension/atomicops.h
@ -0,0 +1,664 @@
 // ©2013-2016 Cameron Desrochers.
 // Distributed under the simplified BSD license (see the license file that
 // should have come with this header).
 // Uses Jeff Preshing's semaphore implementation (under the terms of its
 // separate zlib license, embedded below).
 #pragma once
 // Provides portable (VC++2010+, Intel ICC 13, GCC 4.7+, and anything C++11 compliant) implementation
 // of low-level memory barriers, plus a few semi-portable utility macros (for inlining and alignment).
 // Also has a basic atomic type (limited to hardware-supported atomics with no memory ordering guarantees).
 // Uses the AE_* prefix for macros (historical reasons), and the "moodycamel" namespace for symbols.
 #include <cassert>
 #include <type_traits>
 #include <cerrno>
 #include <cstdint>
 #include <ctime>
 // Platform detection
 #if defined(__INTEL_COMPILER)
 #define AE_ICC
 #elif defined(_MSC_VER)
 #define AE_VCPP
 #elif defined(__GNUC__)
 #define AE_GCC
 #endif
 #if defined(_M_IA64) || defined(__ia64__)
 #define AE_ARCH_IA64
 #elif defined(_WIN64) || defined(__amd64__) || defined(_M_X64) || defined(__x86_64__)
 #define AE_ARCH_X64
 #elif defined(_M_IX86) || defined(__i386__)
 #define AE_ARCH_X86
 #elif defined(_M_PPC) || defined(__powerpc__)
 #define AE_ARCH_PPC
 #else
 #define AE_ARCH_UNKNOWN
 #endif
 // AE_UNUSED
 #define AE_UNUSED(x) ((void)x)
 // AE_FORCEINLINE
 #if defined(AE_VCPP) || defined(AE_ICC)
 #define AE_FORCEINLINE __forceinline
 #elif defined(AE_GCC)
 //#define AE_FORCEINLINE __attribute__((always_inline)) 
 #define AE_FORCEINLINE inline
 #else
 #define AE_FORCEINLINE inline
 #endif
 // AE_ALIGN
 #if defined(AE_VCPP) || defined(AE_ICC)
 #define AE_ALIGN(x) __declspec(align(x))
 #elif defined(AE_GCC)
 #define AE_ALIGN(x) __attribute__((aligned(x)))
 #else
 // Assume GCC compliant syntax...
 #define AE_ALIGN(x) __attribute__((aligned(x)))
 #endif
 // Portable atomic fences implemented below:
 namespace moodycamel {
 enum memory_order {
 	memory_order_relaxed,
 	memory_order_acquire,
 	memory_order_release,
 	memory_order_acq_rel,
 	memory_order_seq_cst,
 	// memory_order_sync: Forces a full sync:
 	// #LoadLoad, #LoadStore, #StoreStore, and most significantly, #StoreLoad
 	memory_order_sync = memory_order_seq_cst
 };
 }    // end namespace moodycamel
 #if (defined(AE_VCPP) && (_MSC_VER < 1700 || defined(__cplusplus_cli))) || defined(AE_ICC)
 // VS2010 and ICC13 don't support std::atomic_*_fence, implement our own fences
 #include <intrin.h>
 #if defined(AE_ARCH_X64) || defined(AE_ARCH_X86)
 #define AeFullSync _mm_mfence
 #define AeLiteSync _mm_mfence
 #elif defined(AE_ARCH_IA64)
 #define AeFullSync __mf
 #define AeLiteSync __mf
 #elif defined(AE_ARCH_PPC)
 #include <ppcintrinsics.h>
 #define AeFullSync __sync
 #define AeLiteSync __lwsync
 #endif
 #ifdef AE_VCPP
 #pragma warning(push)
 #pragma warning(disable: 4365)		// Disable erroneous 'conversion from long to unsigned int, signed/unsigned mismatch' error when using `assert`
 #ifdef __cplusplus_cli
 #pragma managed(push, off)
 #endif
 #endif
 namespace moodycamel {
 AE_FORCEINLINE void compiler_fence(memory_order order)
 {
 	switch (order) {
 		case memory_order_relaxed: break;
 		case memory_order_acquire: _ReadBarrier(); break;
 		case memory_order_release: _WriteBarrier(); break;
 		case memory_order_acq_rel: _ReadWriteBarrier(); break;
 		case memory_order_seq_cst: _ReadWriteBarrier(); break;
 		default: assert(false);
 	}
 }
 // x86/x64 have a strong memory model -- all loads and stores have
 // acquire and release semantics automatically (so only need compiler
 // barriers for those).
 #if defined(AE_ARCH_X86) || defined(AE_ARCH_X64)
 AE_FORCEINLINE void fence(memory_order order)
 {
 	switch (order) {
 		case memory_order_relaxed: break;
 		case memory_order_acquire: _ReadBarrier(); break;
 		case memory_order_release: _WriteBarrier(); break;
 		case memory_order_acq_rel: _ReadWriteBarrier(); break;
 		case memory_order_seq_cst:
 			_ReadWriteBarrier();
 			AeFullSync();
 			_ReadWriteBarrier();
 			break;
 		default: assert(false);
 	}
 }
 #else
 AE_FORCEINLINE void fence(memory_order order)
 {
 	// Non-specialized arch, use heavier memory barriers everywhere just in case :-(
 	switch (order) {
 		case memory_order_relaxed:
 			break;
 		case memory_order_acquire:
 			_ReadBarrier();
 			AeLiteSync();
 			_ReadBarrier();
 			break;
 		case memory_order_release:
 			_WriteBarrier();
 			AeLiteSync();
 			_WriteBarrier();
 			break;
 		case memory_order_acq_rel:
 			_ReadWriteBarrier();
 			AeLiteSync();
 			_ReadWriteBarrier();
 			break;
 		case memory_order_seq_cst:
 			_ReadWriteBarrier();
 			AeFullSync();
 			_ReadWriteBarrier();
 			break;
 		default: assert(false);
 	}
 }
 #endif
 }    // end namespace moodycamel
 #else
 // Use standard library of atomics
 #include <atomic>
 namespace moodycamel {
 AE_FORCEINLINE void compiler_fence(memory_order order)
 {
 	switch (order) {
 		case memory_order_relaxed: break;
 		case memory_order_acquire: std::atomic_signal_fence(std::memory_order_acquire); break;
 		case memory_order_release: std::atomic_signal_fence(std::memory_order_release); break;
 		case memory_order_acq_rel: std::atomic_signal_fence(std::memory_order_acq_rel); break;
 		case memory_order_seq_cst: std::atomic_signal_fence(std::memory_order_seq_cst); break;
 		default: assert(false);
 	}
 }
 AE_FORCEINLINE void fence(memory_order order)
 {
 	switch (order) {
 		case memory_order_relaxed: break;
 		case memory_order_acquire: std::atomic_thread_fence(std::memory_order_acquire); break;
 		case memory_order_release: std::atomic_thread_fence(std::memory_order_release); break;
 		case memory_order_acq_rel: std::atomic_thread_fence(std::memory_order_acq_rel); break;
 		case memory_order_seq_cst: std::atomic_thread_fence(std::memory_order_seq_cst); break;
 		default: assert(false);
 	}
 }
 }    // end namespace moodycamel
 #endif
 #if !defined(AE_VCPP) || (_MSC_VER >= 1700 && !defined(__cplusplus_cli))
 #define AE_USE_STD_ATOMIC_FOR_WEAK_ATOMIC
 #endif
 #ifdef AE_USE_STD_ATOMIC_FOR_WEAK_ATOMIC
 #include <atomic>
 #endif
 #include <utility>
 // WARNING: *NOT* A REPLACEMENT FOR std::atomic. READ CAREFULLY:
 // Provides basic support for atomic variables -- no memory ordering guarantees are provided.
 // The guarantee of atomicity is only made for types that already have atomic load and store guarantees
 // at the hardware level -- on most platforms this generally means aligned pointers and integers (only).
 namespace moodycamel {
 template<typename T>
 class weak_atomic
 {
 public:
 	weak_atomic() { }
 #ifdef AE_VCPP
 #pragma warning(disable: 4100)		// Get rid of (erroneous) 'unreferenced formal parameter' warning
 #endif
 	template<typename U> weak_atomic(U&& x) : value(std::forward<U>(x)) {  }
 #ifdef __cplusplus_cli
 	// Work around bug with universal reference/nullptr combination that only appears when /clr is on
 	weak_atomic(nullptr_t) : value(nullptr) {  }
 #endif
 	weak_atomic(weak_atomic const& other) : value(other.value) {  }
 	weak_atomic(weak_atomic&& other) : value(std::move(other.value)) {  }
 #ifdef AE_VCPP
 #pragma warning(default: 4100)
 #endif
 	AE_FORCEINLINE operator T() const { return load(); }
 #ifndef AE_USE_STD_ATOMIC_FOR_WEAK_ATOMIC
 	template<typename U> AE_FORCEINLINE weak_atomic const& operator=(U&& x) { value = std::forward<U>(x); return *this; }
 	AE_FORCEINLINE weak_atomic const& operator=(weak_atomic const& other) { value = other.value; return *this; }
 	AE_FORCEINLINE T load() const { return value; }
 	AE_FORCEINLINE T fetch_add_acquire(T increment)
 	{
 #if defined(AE_ARCH_X64) || defined(AE_ARCH_X86)
 		if (sizeof(T) == 4) return _InterlockedExchangeAdd((long volatile*)&value, (long)increment);
 #if defined(_M_AMD64)
 		else if (sizeof(T) == 8) return _InterlockedExchangeAdd64((long long volatile*)&value, (long long)increment);
 #endif
 #else
 #error Unsupported platform
 #endif
 		assert(false && "T must be either a 32 or 64 bit type");
 		return value;
 	}
 	AE_FORCEINLINE T fetch_add_release(T increment)
 	{
 #if defined(AE_ARCH_X64) || defined(AE_ARCH_X86)
 		if (sizeof(T) == 4) return _InterlockedExchangeAdd((long volatile*)&value, (long)increment);
 #if defined(_M_AMD64)
 		else if (sizeof(T) == 8) return _InterlockedExchangeAdd64((long long volatile*)&value, (long long)increment);
 #endif
 #else
 #error Unsupported platform
 #endif
 		assert(false && "T must be either a 32 or 64 bit type");
 		return value;
 	}
 #else
 	template<typename U>
 	AE_FORCEINLINE weak_atomic const& operator=(U&& x)
 	{
 		value.store(std::forward<U>(x), std::memory_order_relaxed);
 		return *this;
 	}
 	AE_FORCEINLINE weak_atomic const& operator=(weak_atomic const& other)
 	{
 		value.store(other.value.load(std::memory_order_relaxed), std::memory_order_relaxed);
 		return *this;
 	}
 	AE_FORCEINLINE T load() const { return value.load(std::memory_order_relaxed); }
 	AE_FORCEINLINE T fetch_add_acquire(T increment)
 	{
 		return value.fetch_add(increment, std::memory_order_acquire);
 	}
 	AE_FORCEINLINE T fetch_add_release(T increment)
 	{
 		return value.fetch_add(increment, std::memory_order_release);
 	}
 #endif
 private:
 #ifndef AE_USE_STD_ATOMIC_FOR_WEAK_ATOMIC
 	// No std::atomic support, but still need to circumvent compiler optimizations.
 	// `volatile` will make memory access slow, but is guaranteed to be reliable.
 	volatile T value;
 #else
 	std::atomic<T> value;
 #endif
 };
 }	// end namespace moodycamel
 // Portable single-producer, single-consumer semaphore below:
 #if defined(_WIN32)
 // Avoid including windows.h in a header; we only need a handful of
 // items, so we'll redeclare them here (this is relatively safe since
 // the API generally has to remain stable between Windows versions).
 // I know this is an ugly hack but it still beats polluting the global
 // namespace with thousands of generic names or adding a .cpp for nothing.
 extern "C" {
 	struct _SECURITY_ATTRIBUTES;
 	__declspec(dllimport) void* __stdcall CreateSemaphoreW(_SECURITY_ATTRIBUTES* lpSemaphoreAttributes, long lInitialCount, long lMaximumCount, const wchar_t* lpName);
 	__declspec(dllimport) int __stdcall CloseHandle(void* hObject);
 	__declspec(dllimport) unsigned long __stdcall WaitForSingleObject(void* hHandle, unsigned long dwMilliseconds);
 	__declspec(dllimport) int __stdcall ReleaseSemaphore(void* hSemaphore, long lReleaseCount, long* lpPreviousCount);
 }
 #elif defined(__MACH__)
 #include <mach/mach.h>
 #elif defined(__unix__)
 #include <semaphore.h>
 #endif
 namespace moodycamel
 {
 	// Code in the spsc_sema namespace below is an adaptation of Jeff Preshing's
 	// portable + lightweight semaphore implementations, originally from
 	// https://github.com/preshing/cpp11-on-multicore/blob/master/common/sema.h
 	// LICENSE:
 	// Copyright (c) 2015 Jeff Preshing
 	//
 	// This software is provided 'as-is', without any express or implied
 	// warranty. In no event will the authors be held liable for any damages
 	// arising from the use of this software.
 	//
 	// Permission is granted to anyone to use this software for any purpose,
 	// including commercial applications, and to alter it and redistribute it
 	// freely, subject to the following restrictions:
 	//
 	// 1. The origin of this software must not be misrepresented; you must not
 	//    claim that you wrote the original software. If you use this software
 	//    in a product, an acknowledgement in the product documentation would be
 	//    appreciated but is not required.
 	// 2. Altered source versions must be plainly marked as such, and must not be
 	//    misrepresented as being the original software.
 	// 3. This notice may not be removed or altered from any source distribution.
 	namespace spsc_sema
 	{
 #if defined(_WIN32)
 		class Semaphore
 		{
 		private:
 		    void* m_hSema;
 		    Semaphore(const Semaphore& other);
 		    Semaphore& operator=(const Semaphore& other);
 		public:
 		    Semaphore(int initialCount = 0)
 		    {
 		        assert(initialCount >= 0);
 		        const long maxLong = 0x7fffffff;
 		        m_hSema = CreateSemaphoreW(nullptr, initialCount, maxLong, nullptr);
 		    }
 		    ~Semaphore()
 		    {
 		        CloseHandle(m_hSema);
 		    }
 		    void wait()
 		    {
 		    	const unsigned long infinite = 0xffffffff;
 		        WaitForSingleObject(m_hSema, infinite);
 		    }
 			bool try_wait()
 			{
 				const unsigned long RC_WAIT_TIMEOUT = 0x00000102;
 				return WaitForSingleObject(m_hSema, 0) != RC_WAIT_TIMEOUT;
 			}
 			bool timed_wait(std::uint64_t usecs)
 			{
 				const unsigned long RC_WAIT_TIMEOUT = 0x00000102;
 				return WaitForSingleObject(m_hSema, (unsigned long)(usecs / 1000)) != RC_WAIT_TIMEOUT;
 			}
 		    void signal(int count = 1)
 		    {
 		        ReleaseSemaphore(m_hSema, count, nullptr);
 		    }
 		};
 #elif defined(__MACH__)
 		//---------------------------------------------------------
 		// Semaphore (Apple iOS and OSX)
 		// Can't use POSIX semaphores due to http://lists.apple.com/archives/darwin-kernel/2009/Apr/msg00010.html
 		//---------------------------------------------------------
 		class Semaphore
 		{
 		private:
 		    semaphore_t m_sema;
 		    Semaphore(const Semaphore& other);
 		    Semaphore& operator=(const Semaphore& other);
 		public:
 		    Semaphore(int initialCount = 0)
 		    {
 		        assert(initialCount >= 0);
 		        semaphore_create(mach_task_self(), &m_sema, SYNC_POLICY_FIFO, initialCount);
 		    }
 		    ~Semaphore()
 		    {
 		        semaphore_destroy(mach_task_self(), m_sema);
 		    }
 		    void wait()
 		    {
 		        semaphore_wait(m_sema);
 		    }
 			bool try_wait()
 			{
 				return timed_wait(0);
 			}
 			bool timed_wait(std::int64_t timeout_usecs)
 			{
 				mach_timespec_t ts;
 				ts.tv_sec = timeout_usecs / 1000000;
 				ts.tv_nsec = (timeout_usecs % 1000000) * 1000;
 				// added in OSX 10.10: https://developer.apple.com/library/prerelease/mac/documentation/General/Reference/APIDiffsMacOSX10_10SeedDiff/modules/Darwin.html
 				kern_return_t rc = semaphore_timedwait(m_sema, ts);
 				return rc != KERN_OPERATION_TIMED_OUT;
 			}
 		    void signal()
 		    {
 		        semaphore_signal(m_sema);
 		    }
 		    void signal(int count)
 		    {
 		        while (count-- > 0)
 		        {
 		            semaphore_signal(m_sema);
 		        }
 		    }
 		};
 #elif defined(__unix__)
 		//---------------------------------------------------------
 		// Semaphore (POSIX, Linux)
 		//---------------------------------------------------------
 		class Semaphore
 		{
 		private:
 		    sem_t m_sema;
 		    Semaphore(const Semaphore& other);
 		    Semaphore& operator=(const Semaphore& other);
 		public:
 		    Semaphore(int initialCount = 0)
 		    {
 		        assert(initialCount >= 0);
 		        sem_init(&m_sema, 0, initialCount);
 		    }
 		    ~Semaphore()
 		    {
 		        sem_destroy(&m_sema);
 		    }
 		    void wait()
 		    {
 		        // http://stackoverflow.com/questions/2013181/gdb-causes-sem-wait-to-fail-with-eintr-error
 		        int rc;
 		        do
 		        {
 		            rc = sem_wait(&m_sema);
 		        }
 		        while (rc == -1 && errno == EINTR);
 		    }
 			bool try_wait()
 			{
 				int rc;
 				do {
 					rc = sem_trywait(&m_sema);
 				} while (rc == -1 && errno == EINTR);
 				return !(rc == -1 && errno == EAGAIN);
 			}
 			bool timed_wait(std::uint64_t usecs)
 			{
 				struct timespec ts;
 				const int usecs_in_1_sec = 1000000;
 				const int nsecs_in_1_sec = 1000000000;
 				clock_gettime(CLOCK_REALTIME, &ts);
 				ts.tv_sec += usecs / usecs_in_1_sec;
 				ts.tv_nsec += (usecs % usecs_in_1_sec) * 1000;
 				// sem_timedwait bombs if you have more than 1e9 in tv_nsec
 				// so we have to clean things up before passing it in
 				if (ts.tv_nsec > nsecs_in_1_sec) {
 					ts.tv_nsec -= nsecs_in_1_sec;
 					++ts.tv_sec;
 				}
 				int rc;
 				do {
 					rc = sem_timedwait(&m_sema, &ts);
 				} while (rc == -1 && errno == EINTR);
 				return !(rc == -1 && errno == ETIMEDOUT);
 			}
 		    void signal()
 		    {
 		        sem_post(&m_sema);
 		    }
 		    void signal(int count)
 		    {
 		        while (count-- > 0)
 		        {
 		            sem_post(&m_sema);
 		        }
 		    }
 		};
 #else
 #error Unsupported platform! (No semaphore wrapper available)
 #endif
 		//---------------------------------------------------------
 		// LightweightSemaphore
 		//---------------------------------------------------------
 		class LightweightSemaphore
 		{
 		public:
 			typedef std::make_signed<std::size_t>::type ssize_t;
 		private:
 		    weak_atomic<ssize_t> m_count;
 		    Semaphore m_sema;
 		    bool waitWithPartialSpinning(std::int64_t timeout_usecs = -1)
 		    {
 		        ssize_t oldCount;
 		        // Is there a better way to set the initial spin count?
 		        // If we lower it to 1000, testBenaphore becomes 15x slower on my Core i7-5930K Windows PC,
 		        // as threads start hitting the kernel semaphore.
 		        int spin = 10000;
 		        while (--spin >= 0)
 		        {
 		            if (m_count.load() > 0)
 		            {
 		                m_count.fetch_add_acquire(-1);
 		                return true;
 		            }
 		            compiler_fence(memory_order_acquire);     // Prevent the compiler from collapsing the loop.
 		        }
 		        oldCount = m_count.fetch_add_acquire(-1);
 				if (oldCount > 0)
 					return true;
 		        if (timeout_usecs < 0)
 				{
 					m_sema.wait();
 					return true;
 				}
 				if (m_sema.timed_wait(timeout_usecs))
 					return true;
 				// At this point, we've timed out waiting for the semaphore, but the
 				// count is still decremented indicating we may still be waiting on
 				// it. So we have to re-adjust the count, but only if the semaphore
 				// wasn't signaled enough times for us too since then. If it was, we
 				// need to release the semaphore too.
 				while (true)
 				{
 					oldCount = m_count.fetch_add_release(1);
 					if (oldCount < 0)
 						return false;    // successfully restored things to the way they were
 					// Oh, the producer thread just signaled the semaphore after all. Try again:
 					oldCount = m_count.fetch_add_acquire(-1);
 					if (oldCount > 0 && m_sema.try_wait())
 						return true;
 				}
 		    }
 		public:
 		    LightweightSemaphore(ssize_t initialCount = 0) : m_count(initialCount)
 		    {
 		        assert(initialCount >= 0);
 		    }
 		    bool tryWait()
 		    {
 		        if (m_count.load() > 0)
 		        {
 		        	m_count.fetch_add_acquire(-1);
 		        	return true;
 		        }
 		        return false;
 		    }
 		    void wait()
 		    {
 		        if (!tryWait())
 		            waitWithPartialSpinning();
 		    }
 			bool wait(std::int64_t timeout_usecs)
 			{
 				return tryWait() || waitWithPartialSpinning(timeout_usecs);
 			}
 		    void signal(ssize_t count = 1)
 		    {
 		    	assert(count >= 0);
 		        ssize_t oldCount = m_count.fetch_add_release(count);
 		        assert(oldCount >= -1);
 		        if (oldCount < 0)
 		        {
 		            m_sema.signal(1);
 		        }
 		    }
 		    ssize_t availableApprox() const
 		    {
 		    	ssize_t count = m_count.load();
 		    	return count > 0 ? count : 0;
 		    }
 		};
 	}	// end namespace spsc_sema
 }	// end namespace moodycamel
 #if defined(AE_VCPP) && (_MSC_VER < 1700 || defined(__cplusplus_cli))
 #pragma warning(pop)
 #ifdef __cplusplus_cli
 #pragma managed(pop)
 #endif
 #endif
--- a/extension/context.cpp
+++ b/extension/context.cpp
@ -1,91 +1,86 @@
 #include "context.h"
-AsyncSocketContext::AsyncSocketContext(IPluginContext* pContext) {
+CAsyncSocketContext::CAsyncSocketContext(IPluginContext *pContext)
-	this->pContext = pContext;
+{
 	this->m_pContext = pContext;
 	socket = NULL;
 	stream = NULL;
-	connectCallback = NULL;
+	m_pConnectCallback = NULL;
-	errorCallback = NULL;
+	m_pErrorCallback = NULL;
-	dataCallback = NULL;
+	m_pDataCallback = NULL;
 }
-AsyncSocketContext::~AsyncSocketContext() {
+CAsyncSocketContext::~CAsyncSocketContext()
-	if (connect_req != NULL) {
+{
-		free(connect_req);
+	if(socket != NULL)
-	}
+		uv_close((uv_handle_t *)socket, NULL);
-	if (socket != NULL) {
+	if(m_pConnectCallback)
-		uv_close((uv_handle_t *) socket, NULL);
+		forwards->ReleaseForward(m_pConnectCallback);
 	}
-	if (connectCallback) {
+	if(m_pErrorCallback)
-		forwards->ReleaseForward(connectCallback);
+		forwards->ReleaseForward(m_pErrorCallback);
 	}
-	if (errorCallback) {
+	if(m_pDataCallback)
-		forwards->ReleaseForward(errorCallback);
+		forwards->ReleaseForward(m_pDataCallback);
 	}
 	if (dataCallback) {
 		forwards->ReleaseForward(dataCallback);
 	}
 }
-void AsyncSocketContext::Connected() {
+void CAsyncSocketContext::Connected()
-	if (!connectCallback) {
+{
 	if(!m_pConnectCallback)
 		return;
 	}
-	connectCallback->PushCell(hndl);
+	m_pConnectCallback->PushCell(m_Handle);
-    connectCallback->Execute(NULL);
+    m_pConnectCallback->Execute(NULL);
 }
-void AsyncSocketContext::OnError(int error) {
+void CAsyncSocketContext::OnError(int error)
-	if (!errorCallback) {
+{
 	if(!m_pErrorCallback)
 		return;
 	}
-	errorCallback->PushCell(hndl);
+	m_pErrorCallback->PushCell(m_Handle);
-	errorCallback->PushCell(error);
+	m_pErrorCallback->PushCell(error);
-	errorCallback->PushString(uv_err_name(error));
+	m_pErrorCallback->PushString(uv_err_name(error));
-	errorCallback->Execute(NULL);
+	m_pErrorCallback->Execute(NULL);
 }
-void AsyncSocketContext::OnData(char* data, ssize_t size) {
+void CAsyncSocketContext::OnData(char* data, ssize_t size)
-	if (!dataCallback) {
+{
 	if(!m_pDataCallback)
 		return;
 	}
-	dataCallback->PushCell(hndl);
+	m_pDataCallback->PushCell(m_Handle);
-	dataCallback->PushString(data);
+	m_pDataCallback->PushString(data);
-	dataCallback->PushCell(size);
+	m_pDataCallback->PushCell(size);
-    dataCallback->Execute(NULL);
+    m_pDataCallback->Execute(NULL);
 }
-bool AsyncSocketContext::SetConnectCallback(funcid_t function) {
+bool CAsyncSocketContext::SetConnectCallback(funcid_t function)
-	if (connectCallback) {
+{
-		forwards->ReleaseForward(connectCallback);
+	if(m_pConnectCallback)
-	}
+		forwards->ReleaseForward(m_pConnectCallback);
-	connectCallback = forwards->CreateForwardEx(NULL, ET_Single, 1, NULL, Param_Cell);
+	m_pConnectCallback = forwards->CreateForwardEx(NULL, ET_Single, 1, NULL, Param_Cell);
-	return connectCallback->AddFunction(pContext, function);
+	return m_pConnectCallback->AddFunction(m_pContext, function);
 }
-bool AsyncSocketContext::SetErrorCallback(funcid_t function) {
+bool CAsyncSocketContext::SetErrorCallback(funcid_t function)
-	if (connectCallback) {
+{
-		forwards->ReleaseForward(errorCallback);
+	if(m_pConnectCallback)
-	}
+		forwards->ReleaseForward(m_pErrorCallback);
-	errorCallback = forwards->CreateForwardEx(NULL, ET_Single, 3, NULL, Param_Cell, Param_Cell, Param_String);
+	m_pErrorCallback = forwards->CreateForwardEx(NULL, ET_Single, 3, NULL, Param_Cell, Param_Cell, Param_String);
-	return errorCallback->AddFunction(pContext, function);
+	return m_pErrorCallback->AddFunction(m_pContext, function);
 }
-bool AsyncSocketContext::SetDataCallback(funcid_t function) {
+bool CAsyncSocketContext::SetDataCallback(funcid_t function)
-	if (dataCallback) {
+{
-		forwards->ReleaseForward(dataCallback);
+	if(m_pDataCallback)
-	}
+		forwards->ReleaseForward(m_pDataCallback);
-	dataCallback = forwards->CreateForwardEx(NULL, ET_Single, 3, NULL, Param_Cell, Param_String, Param_Cell);
+	m_pDataCallback = forwards->CreateForwardEx(NULL, ET_Single, 3, NULL, Param_Cell, Param_String, Param_Cell);
-	return dataCallback->AddFunction(pContext, function);
+	return m_pDataCallback->AddFunction(m_pContext, function);
 }
--- a/extension/context.h
+++ b/extension/context.h
@ -6,32 +6,31 @@
 #include "smsdk_ext.h"
-class AsyncSocketContext {
+class CAsyncSocketContext
 {
 public:
-    IPluginContext* pContext;
+    IPluginContext *m_pContext;
 	Handle_t m_Handle;
-	Handle_t hndl;
+	char *m_pHost;
 	int m_Port;
-	char* host;
+    IChangeableForward *m_pConnectCallback;
-	int port;
+	IChangeableForward *m_pErrorCallback;
-
+	IChangeableForward *m_pDataCallback;
    IChangeableForward *connectCallback;
 	IChangeableForward *errorCallback;
 	IChangeableForward *dataCallback;
 	uv_getaddrinfo_t resolver;
-	uv_connect_t* connect_req;
+	uv_tcp_t *socket;
-	uv_tcp_t* socket;
+	uv_stream_t *stream;
 	uv_stream_t* stream;
-    AsyncSocketContext(IPluginContext* plugin);
+    CAsyncSocketContext(IPluginContext *plugin);
-    ~AsyncSocketContext();
+    ~CAsyncSocketContext();
 	void Connected();
 	void OnError(int error);
-	void OnData(char* data, ssize_t size);
+	void OnData(char *data, ssize_t size);
 	bool SetConnectCallback(funcid_t function);
 	bool SetErrorCallback(funcid_t function);
--- a/extension/extension.cpp
+++ b/extension/extension.cpp
@ -30,8 +30,8 @@
 */
 #include "extension.h"
 #include "queue.h"
 #include "context.h"
 #include "readerwriterqueue.h"
 #include <uv.h>
 /**
@ -39,277 +39,190 @@
 * @brief Implement extension code here.
 */
-LockedQueue<AsyncSocketContext*> g_connect_queue;
+moodycamel::ReaderWriterQueue<CAsyncSocketContext *> g_ConnectQueue;
-LockedQueue<socket_data_t*> g_data_queue;
+moodycamel::ReaderWriterQueue<CSocketError *> g_ErrorQueue;
-LockedQueue<error_data_t*> g_error_queue;
+moodycamel::ReaderWriterQueue<CSocketData *> g_DataQueue;
-uv_loop_t *loop;
+uv_loop_t *g_UV_Loop;
 uv_thread_t g_UV_LoopThread;
-uv_thread_t loop_thread;
+uv_async_t g_UV_AsyncAdded;
-
+moodycamel::ReaderWriterQueue<CAsyncAddJob> g_AsyncAddQueue;
 uv_async_t g_async_resolve;
 uv_async_t g_async_write;
 AsyncSocket g_AsyncSocket;		/**< Global singleton for extension's main interface */
 SMEXT_LINK(&g_AsyncSocket);
-void push_error(AsyncSocketContext *ctx, int error);
+CAsyncSocketContext *AsyncSocket::GetSocketInstanceByHandle(Handle_t handle)
-
+{
 AsyncSocketContext* AsyncSocket::GetSocketInstanceByHandle(Handle_t handle) {
 	HandleSecurity sec;
 	sec.pOwner = NULL;
 	sec.pIdentity = myself->GetIdentity();
-	AsyncSocketContext *client;
+	CAsyncSocketContext *pContext;
-	if (handlesys->ReadHandle(handle, socketHandleType, &sec, (void**)&client) != HandleError_None)
+	if(handlesys->ReadHandle(handle, socketHandleType, &sec, (void **)&pContext) != HandleError_None)
 		return NULL;
-	return client;
+	return pContext;
 }
-void AsyncSocket::OnHandleDestroy(HandleType_t type, void *object) {
+void AsyncSocket::OnHandleDestroy(HandleType_t type, void *object)
-	if(object != NULL) {
+{
-		AsyncSocketContext *ctx = (AsyncSocketContext *) object;
+	if(object != NULL)
-
+	{
-		delete ctx;
+		CAsyncSocketContext *pContext = (CAsyncSocketContext *)object;
 		delete pContext;
 	}
 }
-void OnGameFrame(bool simulating) {
+void OnGameFrame(bool simulating)
-	if (!g_connect_queue.Empty()) {
+{
-		g_connect_queue.Lock();
+	CAsyncSocketContext *pContext;
-		while(!g_connect_queue.Empty()) {
+	while(g_ConnectQueue.try_dequeue(pContext))
-			g_connect_queue.Pop()->Connected();
+	{
-		}
+		pContext->Connected();
 		g_connect_queue.Unlock();
 	}
-	if (!g_error_queue.Empty()) {
+	CSocketError *pError;
-		g_error_queue.Lock();
+	while(g_ErrorQueue.try_dequeue(pError))
-		while(!g_error_queue.Empty()) {
+	{
-			error_data_t *err = g_error_queue.Pop();
+		pError->pAsyncContext->OnError(pError->Error);
-			err->ctx->OnError(err->err);
+		free(pError);
 			free(err);
 		}
 		g_error_queue.Unlock();
 	}
-	if (!g_data_queue.Empty()) {
+	CSocketData *pData;
-		g_data_queue.Lock();
+	while(g_DataQueue.try_dequeue(pData))
-		while(!g_data_queue.Empty()) {
+	{
-			socket_data_t *data = g_data_queue.Pop();
+		pData->pAsyncContext->OnData(pData->pBuffer, pData->BufferSize);
-			data->ctx->OnData(data->buf, data->size);
+		free(pData->pBuffer);
-
+		free(pData);
 			free(data->buf);
 			free(data);
 	}
-		g_data_queue.Unlock();
+}
 void UV_OnAsyncAdded(uv_async_t *pHandle)
 {
 	CAsyncAddJob Job;
 	while(g_AsyncAddQueue.try_dequeue(Job))
 	{
 		uv_async_t *pAsync = (uv_async_t *)malloc(sizeof(uv_async_t));
 		uv_async_init(g_UV_Loop, pAsync, Job.CallbackFn);
 		pAsync->data = Job.pData;
 		uv_async_send(pAsync);
 	}
 }
 // main event loop thread
-void EventLoop(void* data) {
+void UV_EventLoop(void *data)
-	uv_run(loop, UV_RUN_DEFAULT);
+{
 	uv_run(g_UV_Loop, UV_RUN_DEFAULT);
 }
-void alloc_buffer(uv_handle_t *handle, size_t suggested_size, uv_buf_t *buf) {
+void UV_AllocBuffer(uv_handle_t *handle, size_t suggested_size, uv_buf_t *buf)
-	buf->base = (char*) malloc(suggested_size);
+{
 	buf->base = (char *)malloc(suggested_size);
 	buf->len = suggested_size;
 }
-void on_read(uv_stream_t *client, ssize_t nread, const uv_buf_t *buf) {
+void UV_HandleCleanup(uv_handle_t *handle)
-	if (nread < 0) {
+{
-		push_error((AsyncSocketContext*) client->data, nread);
+	free(handle);
-		// Should we decide to close the socket? For now let's let the plugin handle errors, including EOF.
+}
 void UV_PushError(CAsyncSocketContext *pContext, int error)
 {
 	CSocketError *pError = (CSocketError *)malloc(sizeof(CSocketError));
 	pError->pAsyncContext = pContext;
 	pError->Error = error;
 	g_ErrorQueue.enqueue(pError);
 }
 void UV_OnRead(uv_stream_t *client, ssize_t nread, const uv_buf_t *buf)
 {
 	CAsyncSocketContext *pContext = (CAsyncSocketContext *)client->data;
 	if(nread < 0)
 	{
 		// Connection closed
 		uv_close((uv_handle_t *)client, NULL);
 		pContext->socket = NULL;
 		UV_PushError((CAsyncSocketContext *)client->data, nread);
 		return;
 	}
-	char *data = (char*) malloc(sizeof(char) * (nread+1));
+	char *data = (char *)malloc(sizeof(char) * (nread + 1));
-	data[nread] = '\0';
+	data[nread] = 0;
 	strncpy(data, buf->base, nread);
-	socket_data_t *s = (socket_data_t *) malloc(sizeof(socket_data_t));
+	CSocketData *pData = (CSocketData *)malloc(sizeof(CSocketData));
 	pData->pAsyncContext = pContext;
 	pData->pBuffer = data;
 	pData->BufferSize = nread;
-	s->ctx = static_cast<AsyncSocketContext*>(client->data);
+	g_DataQueue.enqueue(pData);
 	s->buf = data;
 	s->size = nread;
 	g_data_queue.Lock();
 	g_data_queue.Push(s);
 	g_data_queue.Unlock();
 	free(buf->base);
 }
-void on_connect(uv_connect_t *req, int status) {
+void UV_OnConnect(uv_connect_t *req, int status)
-	AsyncSocketContext *ctx = (AsyncSocketContext*) req->data;
+{
 	CAsyncSocketContext *pContext = (CAsyncSocketContext *)req->data;
-	if (status < 0) {
+	if(status < 0)
-		push_error(ctx, status);
+	{
 		UV_PushError(pContext, status);
 		return;
 	}
-	ctx->connect_req = NULL;
+	pContext->stream = req->handle;
 	ctx->stream = req->handle;
-	g_connect_queue.Lock();
+	g_ConnectQueue.enqueue(pContext);
 	g_connect_queue.Push(ctx);
 	g_connect_queue.Unlock();
 	req->handle->data = req->data;
 	free(req);
-	uv_read_start(ctx->stream, alloc_buffer, on_read);
+	uv_read_start(pContext->stream, UV_AllocBuffer, UV_OnRead);
 }
-void push_error(AsyncSocketContext *ctx, int error) {
+void UV_OnAsyncResolved(uv_getaddrinfo_t *resolver, int status, struct addrinfo *res)
-	error_data_t *err = (error_data_t*) malloc(sizeof(error_data_t));
+{
 	free(resolver->service);
 	CAsyncSocketContext *pContext = (CAsyncSocketContext *) resolver->data;
-	err->ctx = ctx;
+	if(status < 0)
-	err->err = error;
+	{
-
+		UV_PushError(pContext, status);
 	g_error_queue.Lock();
 	g_error_queue.Push(err);
 	g_error_queue.Unlock();
 }
 void on_resolved(uv_getaddrinfo_t *resolver, int status, struct addrinfo *res) {
 	AsyncSocketContext *ctx = (AsyncSocketContext *) resolver->data;
 	if (status < 0) {
 		push_error(ctx, status);
 		return;
 	}
-	uv_connect_t *connect_req = (uv_connect_t*) malloc(sizeof(uv_connect_t));
+	uv_connect_t *connect_req = (uv_connect_t *)malloc(sizeof(uv_connect_t));
-	uv_tcp_t *socket = (uv_tcp_t*) malloc(sizeof(uv_tcp_t));
+	uv_tcp_t *socket = (uv_tcp_t *)malloc(sizeof(uv_tcp_t));
-	ctx->connect_req = connect_req;
+	pContext->socket = socket;
-	ctx->socket = socket;
+	connect_req->data = pContext;
-	connect_req->data = ctx;
+	char addr[32] = {0};
 	uv_ip4_name((struct sockaddr_in *)res->ai_addr, addr, sizeof(addr));
-	char addr[17] = {'\0'};
+	uv_tcp_init(g_UV_Loop, socket);
-	uv_ip4_name((struct sockaddr_in*) res->ai_addr, addr, 16);
+	uv_tcp_connect(connect_req, socket, (const struct sockaddr*) res->ai_addr, UV_OnConnect);
 	uv_tcp_init(loop, socket);
 	uv_tcp_connect(connect_req, socket, (const struct sockaddr*) res->ai_addr, on_connect);
 	uv_freeaddrinfo(res);
 }
-cell_t Socket_Create(IPluginContext *pContext, const cell_t *params) {
+void UV_OnAsyncResolve(uv_async_t *handle)
-	AsyncSocketContext *ctx = new AsyncSocketContext(pContext);
+{
 	CAsyncSocketContext *pAsyncContext = (CAsyncSocketContext *)handle->data;
 	uv_close((uv_handle_t *)handle, UV_HandleCleanup);
-	ctx->hndl = handlesys->CreateHandle(g_AsyncSocket.socketHandleType, ctx, pContext->GetIdentity(), myself->GetIdentity(), NULL);
+	pAsyncContext->resolver.data = pAsyncContext;
-	return ctx->hndl;
+	char *service = (char *)malloc(8);
-}
+	sprintf(service, "%d", pAsyncContext->m_Port);
 cell_t Socket_Connect(IPluginContext *pContext, const cell_t *params) {
 	AsyncSocketContext *ctx = g_AsyncSocket.GetSocketInstanceByHandle(params[1]);
 	if (ctx == NULL) {
 		return pContext->ThrowNativeError("Invalid socket handle");
 	}
 	if (params[3] < 0 || params[3] > 65535) {
 		return pContext->ThrowNativeError("Invalid port specified");
 	}
 	char *address = NULL;
 	pContext->LocalToString(params[2], &address);
 	ctx->host = address;
 	ctx->port = params[3];
 	g_async_resolve.data = ctx;
 	uv_async_send(&g_async_resolve);
 	return 1;
 }
 cell_t Socket_Write(IPluginContext *pContext, const cell_t *params) {
 	AsyncSocketContext *ctx = g_AsyncSocket.GetSocketInstanceByHandle(params[1]);
 	if (ctx == NULL) {
 		return pContext->ThrowNativeError("Invalid socket handle");
 	}
 	char *data = NULL;
 	pContext->LocalToString(params[2], &data);
 	uv_buf_t* buffer = (uv_buf_t *) malloc(sizeof(uv_buf_t));
 	buffer->base = strdup(data);
 	buffer->len = strlen(data);
 	socket_write_t *write = (socket_write_t *) malloc(sizeof(socket_write_t));
 	write->ctx = ctx;
 	write->buf = buffer;
 	g_async_write.data = write;
 	uv_async_send(&g_async_write);
 	return 1;
 }
 cell_t Socket_SetConnectCallback(IPluginContext *pContext, const cell_t *params) {
 	AsyncSocketContext *ctx = g_AsyncSocket.GetSocketInstanceByHandle(params[1]);
 	if (ctx == NULL) {
 		return pContext->ThrowNativeError("Invalid socket handle");
 	}
 	if (!ctx->SetConnectCallback(params[2])) {
 		return pContext->ThrowNativeError("Invalid callback");
 	}
 	return true;
 }
 cell_t Socket_SetErrorCallback(IPluginContext *pContext, const cell_t *params) {
 	AsyncSocketContext *ctx = g_AsyncSocket.GetSocketInstanceByHandle(params[1]);
 	if (ctx == NULL) {
 		return pContext->ThrowNativeError("Invalid socket handle");
 	}
 	if (!ctx->SetErrorCallback(params[2])) {
 		return pContext->ThrowNativeError("Invalid callback");
 	}
 	return true;
 }
 cell_t Socket_SetDataCallback(IPluginContext *pContext, const cell_t *params) {
 	AsyncSocketContext *ctx = g_AsyncSocket.GetSocketInstanceByHandle(params[1]);
 	if (ctx == NULL) {
 		return pContext->ThrowNativeError("Invalid socket handle");
 	}
 	if (!ctx->SetDataCallback(params[2])) {
 		return pContext->ThrowNativeError("Invalid callback");
 	}
 	return true;
 }
 void async_resolve(uv_async_t *handle) {
 	AsyncSocketContext *ctx = static_cast<AsyncSocketContext *>(handle->data);
 	ctx->resolver.data = ctx;
 	char *service = (char *) malloc(sizeof(char) * 6);
 	sprintf(service, "%d", ctx->port);
 	struct addrinfo hints;
 	hints.ai_family = PF_INET;
@ -317,47 +230,151 @@ void async_resolve(uv_async_t *handle) {
 	hints.ai_protocol = IPPROTO_TCP;
 	hints.ai_flags = 0;
-	int r = uv_getaddrinfo(loop, &ctx->resolver, on_resolved, ctx->host, service, &hints);
+	int err = uv_getaddrinfo(g_UV_Loop, &pAsyncContext->resolver, UV_OnAsyncResolved, pAsyncContext->m_pHost, service, &hints);
-
+	if(err)
-	if (r) {
+		UV_PushError(pAsyncContext, err);
 		push_error(ctx, r);
 	}
 }
-void async_write_cb(uv_write_t* req, int status) {
+void UV_OnAsyncWriteCleanup(uv_write_t *req, int status)
-	socket_write_t *data = (socket_write_t *) req->data;
+{
-
+	CAsyncWrite *pWrite = (CAsyncWrite *)req->data;
 	if (data->buf->base) {
 		free(data->buf->base);
 	}
 	free(data->buf);
 	free(data);
 	free(pWrite->pBuffer->base);
 	free(pWrite->pBuffer);
 	free(pWrite);
 	free(req);
 }
-void async_write(uv_async_t *handle) {
+void UV_OnAsyncWrite(uv_async_t *handle)
-	socket_write_t *data = (socket_write_t *) handle->data;
+{
 	CAsyncWrite *pWrite = (CAsyncWrite *)handle->data;
 	uv_close((uv_handle_t *)handle, UV_HandleCleanup);
-	if (data == NULL || data->buf == NULL) {
+	if(pWrite == NULL || pWrite->pBuffer == NULL)
 		return;
 	if(pWrite->pAsyncContext == NULL || pWrite->pAsyncContext->stream == NULL)
 	{
 		free(pWrite->pBuffer->base);
 		free(pWrite->pBuffer);
 		free(pWrite);
 		return;
 	}
-	if (data->ctx == NULL || data->ctx->stream == NULL) {
+	uv_write_t *req = (uv_write_t *)malloc(sizeof(uv_write_t));
-		return;
+	req->data = pWrite;
 	}
-	uv_write_t* req = (uv_write_t *) malloc(sizeof(uv_write_t));
+	uv_write(req, pWrite->pAsyncContext->stream, pWrite->pBuffer, 1, UV_OnAsyncWriteCleanup);
 }
-	req->data = data;
+cell_t Native_AsyncSocket_Create(IPluginContext *pContext, const cell_t *params)
 {
 	CAsyncSocketContext *pAsyncContext = new CAsyncSocketContext(pContext);
-	uv_write(req, data->ctx->stream, data->buf, 1, async_write_cb);
+	pAsyncContext->m_Handle = handlesys->CreateHandle(g_AsyncSocket.socketHandleType, pAsyncContext,
 		pContext->GetIdentity(), myself->GetIdentity(), NULL);
 	return pAsyncContext->m_Handle;
 }
 cell_t Native_AsyncSocket_Connect(IPluginContext *pContext, const cell_t *params)
 {
 	CAsyncSocketContext *pAsyncContext = g_AsyncSocket.GetSocketInstanceByHandle(params[1]);
 	if(pAsyncContext == NULL)
 		return pContext->ThrowNativeError("Invalid socket handle");
 	if(params[3] < 0 || params[3] > 65535)
 		return pContext->ThrowNativeError("Invalid port specified");
 	char *address = NULL;
 	pContext->LocalToString(params[2], &address);
 	pAsyncContext->m_pHost = address;
 	pAsyncContext->m_Port = params[3];
 	CAsyncAddJob Job;
 	Job.CallbackFn = UV_OnAsyncResolve;
 	Job.pData = pAsyncContext;
 	g_AsyncAddQueue.enqueue(Job);
 	uv_async_send(&g_UV_AsyncAdded);
 	return 1;
 }
 cell_t Native_AsyncSocket_Write(IPluginContext *pContext, const cell_t *params)
 {
 	CAsyncSocketContext *pAsyncContext = g_AsyncSocket.GetSocketInstanceByHandle(params[1]);
 	if(pAsyncContext == NULL)
 		return pContext->ThrowNativeError("Invalid socket handle");
 	char *data = NULL;
 	pContext->LocalToString(params[2], &data);
 	uv_buf_t* buffer = (uv_buf_t *)malloc(sizeof(uv_buf_t));
 	buffer->base = strdup(data);
 	buffer->len = strlen(data);
 	CAsyncWrite *pWrite = (CAsyncWrite *)malloc(sizeof(CAsyncWrite));
 	pWrite->pAsyncContext = pAsyncContext;
 	pWrite->pBuffer = buffer;
 	CAsyncAddJob Job;
 	Job.CallbackFn = UV_OnAsyncWrite;
 	Job.pData = pWrite;
 	g_AsyncAddQueue.enqueue(Job);
 	uv_async_send(&g_UV_AsyncAdded);
 	return 1;
 }
 cell_t Native_AsyncSocket_SetConnectCallback(IPluginContext *pContext, const cell_t *params)
 {
 	CAsyncSocketContext *pAsyncContext = g_AsyncSocket.GetSocketInstanceByHandle(params[1]);
 	if(pAsyncContext == NULL)
 		return pContext->ThrowNativeError("Invalid socket handle");
 	if(!pAsyncContext->SetConnectCallback(params[2]))
 		return pContext->ThrowNativeError("Invalid callback");
 	return true;
 }
 cell_t Native_AsyncSocket_SetErrorCallback(IPluginContext *pContext, const cell_t *params)
 {
 	CAsyncSocketContext *pAsyncContext = g_AsyncSocket.GetSocketInstanceByHandle(params[1]);
 	if(pAsyncContext == NULL)
 		return pContext->ThrowNativeError("Invalid socket handle");
 	if(!pAsyncContext->SetErrorCallback(params[2]))
 		return pContext->ThrowNativeError("Invalid callback");
 	return true;
 }
 cell_t Native_AsyncSocket_SetDataCallback(IPluginContext *pContext, const cell_t *params)
 {
 	CAsyncSocketContext *pAsyncContext = g_AsyncSocket.GetSocketInstanceByHandle(params[1]);
 	if(pAsyncContext == NULL)
 		return pContext->ThrowNativeError("Invalid socket handle");
 	if(!pAsyncContext->SetDataCallback(params[2]))
 		return pContext->ThrowNativeError("Invalid callback");
 	return true;
 }
 // Sourcemod Plugin Events
-bool AsyncSocket::SDK_OnLoad(char *error, size_t maxlength, bool late) {
+bool AsyncSocket::SDK_OnLoad(char *error, size_t maxlength, bool late)
 {
 	sharesys->AddNatives(myself, AsyncSocketNatives);
 	sharesys->RegisterLibrary(myself, "async_socket");
@ -365,32 +382,34 @@ bool AsyncSocket::SDK_OnLoad(char *error, size_t maxlength, bool late) {
 	smutils->AddGameFrameHook(OnGameFrame);
-	loop = uv_default_loop();
+	g_UV_Loop = uv_default_loop();
-	uv_async_init(loop, &g_async_resolve, async_resolve);
+	uv_async_init(g_UV_Loop, &g_UV_AsyncAdded, UV_OnAsyncAdded);
 	uv_async_init(loop, &g_async_write, async_write);
-	uv_thread_create(&loop_thread, EventLoop, NULL);
+	uv_thread_create(&g_UV_LoopThread, UV_EventLoop, NULL);
 	return true;
 }
-void AsyncSocket::SDK_OnUnload() {
+void AsyncSocket::SDK_OnUnload()
 {
 	handlesys->RemoveType(socketHandleType, NULL);
-	uv_thread_join(&loop_thread);
+	uv_close((uv_handle_t *)&g_UV_AsyncAdded, NULL);
-	uv_loop_close(loop);
+	uv_thread_join(&g_UV_LoopThread);
 	uv_loop_close(g_UV_Loop);
 	smutils->RemoveGameFrameHook(OnGameFrame);
 }
 const sp_nativeinfo_t AsyncSocketNatives[] = {
-	{"AsyncSocket.AsyncSocket", Socket_Create},
+	{"AsyncSocket.AsyncSocket", Native_AsyncSocket_Create},
-	{"AsyncSocket.Connect", Socket_Connect},
+	{"AsyncSocket.Connect", Native_AsyncSocket_Connect},
-	{"AsyncSocket.Write", Socket_Write},
+	{"AsyncSocket.Write", Native_AsyncSocket_Write},
-	{"AsyncSocket.SetConnectCallback", Socket_SetConnectCallback},
+	{"AsyncSocket.SetConnectCallback", Native_AsyncSocket_SetConnectCallback},
-	{"AsyncSocket.SetErrorCallback", Socket_SetErrorCallback},
+	{"AsyncSocket.SetErrorCallback", Native_AsyncSocket_SetErrorCallback},
-	{"AsyncSocket.SetDataCallback", Socket_SetDataCallback},
+	{"AsyncSocket.SetDataCallback", Native_AsyncSocket_SetDataCallback},
 	{NULL, NULL}
 };
--- a/extension/extension.h
+++ b/extension/extension.h
@ -42,20 +42,29 @@
 #include "smsdk_ext.h"
 #include "context.h"
-struct socket_write_t {
+struct CAsyncAddJob
-	AsyncSocketContext *ctx;
+{
-	uv_buf_t* buf;
+	uv_async_cb CallbackFn;
 	void *pData;
 };
-struct socket_data_t {
+struct CAsyncWrite
-	AsyncSocketContext *ctx;
+{
-	char* buf;
+	CAsyncSocketContext *pAsyncContext;
-	ssize_t size;
+	uv_buf_t *pBuffer;
 };
-struct error_data_t {
+struct CSocketData
-	AsyncSocketContext *ctx;
+{
-	int err;
+	CAsyncSocketContext *pAsyncContext;
 	char *pBuffer;
 	ssize_t BufferSize;
 };
 struct CSocketError
 {
 	CAsyncSocketContext *pAsyncContext;
 	int Error;
 };
 /**
@ -135,7 +144,7 @@ public:
 public:
 	HandleType_t socketHandleType;
-	AsyncSocketContext* GetSocketInstanceByHandle(Handle_t handle);
+	CAsyncSocketContext* GetSocketInstanceByHandle(Handle_t handle);
 public:
 	void OnHandleDestroy(HandleType_t type, void *object);
 };
--- a/extension/libuv/.gitignore
+++ b/extension/libuv/.gitignore
@ -0,0 +1,2 @@
 *
 !.gitignore
--- a/extension/queue.h
+++ b/extension/queue.h
@ -1,44 +0,0 @@
 #ifndef ASYNC_QUEUE_H
 #define ASYNC_QUEUE_H
 #include <deque>
 #include <uv.h>
 template <class T> 
 class LockedQueue {
    uv_mutex_t lock;
    std::deque<T> queue;
 public:    
    LockedQueue() {
        uv_mutex_init(&lock);
    }
    ~LockedQueue() {
        uv_mutex_destroy(&lock);
    }
    void Lock() {
        uv_mutex_lock(&lock);
    }
    void Unlock() {
        uv_mutex_unlock(&lock);
    }
    T Pop() {
        T output = queue.front();
        queue.pop_front();
        return output;
    }
    void Push(T item) {
        queue.push_back(item);
    }
    bool Empty() {
        return queue.empty();
    }
 };
 #endif
--- a/extension/readerwriterqueue.h
+++ b/extension/readerwriterqueue.h
@ -0,0 +1,815 @@
 // ©2013-2016 Cameron Desrochers.
 // Distributed under the simplified BSD license (see the license file that
 // should have come with this header).
 #pragma once
 #include "atomicops.h"
 #include <type_traits>
 #include <utility>
 #include <cassert>
 #include <stdexcept>
 #include <new>
 #include <cstdint>
 #include <cstdlib>		// For malloc/free/abort & size_t
 #if __cplusplus > 199711L || _MSC_VER >= 1700 // C++11 or VS2012
 #include <chrono>
 #endif
 // A lock-free queue for a single-consumer, single-producer architecture.
 // The queue is also wait-free in the common path (except if more memory
 // needs to be allocated, in which case malloc is called).
 // Allocates memory sparingly (O(lg(n) times, amortized), and only once if
 // the original maximum size estimate is never exceeded.
 // Tested on x86/x64 processors, but semantics should be correct for all
 // architectures (given the right implementations in atomicops.h), provided
 // that aligned integer and pointer accesses are naturally atomic.
 // Note that there should only be one consumer thread and producer thread;
 // Switching roles of the threads, or using multiple consecutive threads for
 // one role, is not safe unless properly synchronized.
 // Using the queue exclusively from one thread is fine, though a bit silly.
 #ifndef MOODYCAMEL_CACHE_LINE_SIZE
 #define MOODYCAMEL_CACHE_LINE_SIZE 64
 #endif
 #ifndef MOODYCAMEL_EXCEPTIONS_ENABLED
 #if (defined(_MSC_VER) && defined(_CPPUNWIND)) || (defined(__GNUC__) && defined(__EXCEPTIONS)) || (!defined(_MSC_VER) && !defined(__GNUC__))
 #define MOODYCAMEL_EXCEPTIONS_ENABLED
 #endif
 #endif
 #ifdef AE_VCPP
 #pragma warning(push)
 #pragma warning(disable: 4324)	// structure was padded due to __declspec(align())
 #pragma warning(disable: 4820)	// padding was added
 #pragma warning(disable: 4127)	// conditional expression is constant
 #endif
 namespace moodycamel {
 template<typename T, size_t MAX_BLOCK_SIZE = 512>
 class ReaderWriterQueue
 {
 	// Design: Based on a queue-of-queues. The low-level queues are just
 	// circular buffers with front and tail indices indicating where the
 	// next element to dequeue is and where the next element can be enqueued,
 	// respectively. Each low-level queue is called a "block". Each block
 	// wastes exactly one element's worth of space to keep the design simple
 	// (if front == tail then the queue is empty, and can't be full).
 	// The high-level queue is a circular linked list of blocks; again there
 	// is a front and tail, but this time they are pointers to the blocks.
 	// The front block is where the next element to be dequeued is, provided
 	// the block is not empty. The back block is where elements are to be
 	// enqueued, provided the block is not full.
 	// The producer thread owns all the tail indices/pointers. The consumer
 	// thread owns all the front indices/pointers. Both threads read each
 	// other's variables, but only the owning thread updates them. E.g. After
 	// the consumer reads the producer's tail, the tail may change before the
 	// consumer is done dequeuing an object, but the consumer knows the tail
 	// will never go backwards, only forwards.
 	// If there is no room to enqueue an object, an additional block (of
 	// equal size to the last block) is added. Blocks are never removed.
 public:
 	// Constructs a queue that can hold maxSize elements without further
 	// allocations. If more than MAX_BLOCK_SIZE elements are requested,
 	// then several blocks of MAX_BLOCK_SIZE each are reserved (including
 	// at least one extra buffer block).
 	explicit ReaderWriterQueue(size_t maxSize = 15)
 #ifndef NDEBUG
 		: enqueuing(false)
 		,dequeuing(false)
 #endif
 	{
 		assert(maxSize > 0);
 		assert(MAX_BLOCK_SIZE == ceilToPow2(MAX_BLOCK_SIZE) && "MAX_BLOCK_SIZE must be a power of 2");
 		assert(MAX_BLOCK_SIZE >= 2 && "MAX_BLOCK_SIZE must be at least 2");
 		Block* firstBlock = nullptr;
 		largestBlockSize = ceilToPow2(maxSize + 1);		// We need a spare slot to fit maxSize elements in the block
 		if (largestBlockSize > MAX_BLOCK_SIZE * 2) {
 			// We need a spare block in case the producer is writing to a different block the consumer is reading from, and
 			// wants to enqueue the maximum number of elements. We also need a spare element in each block to avoid the ambiguity
 			// between front == tail meaning "empty" and "full".
 			// So the effective number of slots that are guaranteed to be usable at any time is the block size - 1 times the
 			// number of blocks - 1. Solving for maxSize and applying a ceiling to the division gives us (after simplifying):
 			size_t initialBlockCount = (maxSize + MAX_BLOCK_SIZE * 2 - 3) / (MAX_BLOCK_SIZE - 1);
 			largestBlockSize = MAX_BLOCK_SIZE;
 			Block* lastBlock = nullptr;
 			for (size_t i = 0; i != initialBlockCount; ++i) {
 				auto block = make_block(largestBlockSize);
 				if (block == nullptr) {
 #ifdef MOODYCAMEL_EXCEPTIONS_ENABLED
 					throw std::bad_alloc();
 #else
 					abort();
 #endif
 				}
 				if (firstBlock == nullptr) {
 					firstBlock = block;
 				}
 				else {
 					lastBlock->next = block;
 				}
 				lastBlock = block;
 				block->next = firstBlock;
 			}
 		}
 		else {
 			firstBlock = make_block(largestBlockSize);
 			if (firstBlock == nullptr) {
 #ifdef MOODYCAMEL_EXCEPTIONS_ENABLED
 				throw std::bad_alloc();
 #else
 				abort();
 #endif
 			}
 			firstBlock->next = firstBlock;
 		}
 		frontBlock = firstBlock;
 		tailBlock = firstBlock;
 		// Make sure the reader/writer threads will have the initialized memory setup above:
 		fence(memory_order_sync);
 	}
 	// Note: The queue should not be accessed concurrently while it's
 	// being deleted. It's up to the user to synchronize this.
 	~ReaderWriterQueue()
 	{
 		// Make sure we get the latest version of all variables from other CPUs:
 		fence(memory_order_sync);
 		// Destroy any remaining objects in queue and free memory
 		Block* frontBlock_ = frontBlock;
 		Block* block = frontBlock_;
 		do {
 			Block* nextBlock = block->next;
 			size_t blockFront = block->front;
 			size_t blockTail = block->tail;
 			for (size_t i = blockFront; i != blockTail; i = (i + 1) & block->sizeMask) {
 				auto element = reinterpret_cast<T*>(block->data + i * sizeof(T));
 				element->~T();
 				(void)element;
 			}
 			auto rawBlock = block->rawThis;
 			block->~Block();
 			std::free(rawBlock);
 			block = nextBlock;
 		} while (block != frontBlock_);
 	}
 	// Enqueues a copy of element if there is room in the queue.
 	// Returns true if the element was enqueued, false otherwise.
 	// Does not allocate memory.
 	AE_FORCEINLINE bool try_enqueue(T const& element)
 	{
 		return inner_enqueue<CannotAlloc>(element);
 	}
 	// Enqueues a moved copy of element if there is room in the queue.
 	// Returns true if the element was enqueued, false otherwise.
 	// Does not allocate memory.
 	AE_FORCEINLINE bool try_enqueue(T&& element)
 	{
 		return inner_enqueue<CannotAlloc>(std::forward<T>(element));
 	}
 	// Enqueues a copy of element on the queue.
 	// Allocates an additional block of memory if needed.
 	// Only fails (returns false) if memory allocation fails.
 	AE_FORCEINLINE bool enqueue(T const& element)
 	{
 		return inner_enqueue<CanAlloc>(element);
 	}
 	// Enqueues a moved copy of element on the queue.
 	// Allocates an additional block of memory if needed.
 	// Only fails (returns false) if memory allocation fails.
 	AE_FORCEINLINE bool enqueue(T&& element)
 	{
 		return inner_enqueue<CanAlloc>(std::forward<T>(element));
 	}
 	// Attempts to dequeue an element; if the queue is empty,
 	// returns false instead. If the queue has at least one element,
 	// moves front to result using operator=, then returns true.
 	template<typename U>
 	bool try_dequeue(U& result)
 	{
 #ifndef NDEBUG
 		ReentrantGuard guard(this->dequeuing);
 #endif
 		// High-level pseudocode:
 		// Remember where the tail block is
 		// If the front block has an element in it, dequeue it
 		// Else
 		//     If front block was the tail block when we entered the function, return false
 		//     Else advance to next block and dequeue the item there
 		// Note that we have to use the value of the tail block from before we check if the front
 		// block is full or not, in case the front block is empty and then, before we check if the
 		// tail block is at the front block or not, the producer fills up the front block *and
 		// moves on*, which would make us skip a filled block. Seems unlikely, but was consistently
 		// reproducible in practice.
 		// In order to avoid overhead in the common case, though, we do a double-checked pattern
 		// where we have the fast path if the front block is not empty, then read the tail block,
 		// then re-read the front block and check if it's not empty again, then check if the tail
 		// block has advanced.
 		Block* frontBlock_ = frontBlock.load();
 		size_t blockTail = frontBlock_->localTail;
 		size_t blockFront = frontBlock_->front.load();
 		if (blockFront != blockTail || blockFront != (frontBlock_->localTail = frontBlock_->tail.load())) {
 			fence(memory_order_acquire);
 		non_empty_front_block:
 			// Front block not empty, dequeue from here
 			auto element = reinterpret_cast<T*>(frontBlock_->data + blockFront * sizeof(T));
 			result = std::move(*element);
 			element->~T();
 			blockFront = (blockFront + 1) & frontBlock_->sizeMask;
 			fence(memory_order_release);
 			frontBlock_->front = blockFront;
 		}
 		else if (frontBlock_ != tailBlock.load()) {
 			fence(memory_order_acquire);
 			frontBlock_ = frontBlock.load();
 			blockTail = frontBlock_->localTail = frontBlock_->tail.load();
 			blockFront = frontBlock_->front.load();
 			fence(memory_order_acquire);
 			if (blockFront != blockTail) {
 				// Oh look, the front block isn't empty after all
 				goto non_empty_front_block;
 			}
 			// Front block is empty but there's another block ahead, advance to it
 			Block* nextBlock = frontBlock_->next;
 			// Don't need an acquire fence here since next can only ever be set on the tailBlock,
 			// and we're not the tailBlock, and we did an acquire earlier after reading tailBlock which
 			// ensures next is up-to-date on this CPU in case we recently were at tailBlock.
 			size_t nextBlockFront = nextBlock->front.load();
 			size_t nextBlockTail = nextBlock->localTail = nextBlock->tail.load();
 			fence(memory_order_acquire);
 			// Since the tailBlock is only ever advanced after being written to,
 			// we know there's for sure an element to dequeue on it
 			assert(nextBlockFront != nextBlockTail);
 			AE_UNUSED(nextBlockTail);
 			// We're done with this block, let the producer use it if it needs
 			fence(memory_order_release);		// Expose possibly pending changes to frontBlock->front from last dequeue
 			frontBlock = frontBlock_ = nextBlock;
 			compiler_fence(memory_order_release);	// Not strictly needed
 			auto element = reinterpret_cast<T*>(frontBlock_->data + nextBlockFront * sizeof(T));
 			result = std::move(*element);
 			element->~T();
 			nextBlockFront = (nextBlockFront + 1) & frontBlock_->sizeMask;
 			fence(memory_order_release);
 			frontBlock_->front = nextBlockFront;
 		}
 		else {
 			// No elements in current block and no other block to advance to
 			return false;
 		}
 		return true;
 	}
 	// Returns a pointer to the front element in the queue (the one that
 	// would be removed next by a call to `try_dequeue` or `pop`). If the
 	// queue appears empty at the time the method is called, nullptr is
 	// returned instead.
 	// Must be called only from the consumer thread.
 	T* peek()
 	{
 #ifndef NDEBUG
 		ReentrantGuard guard(this->dequeuing);
 #endif
 		// See try_dequeue() for reasoning
 		Block* frontBlock_ = frontBlock.load();
 		size_t blockTail = frontBlock_->localTail;
 		size_t blockFront = frontBlock_->front.load();
 		if (blockFront != blockTail || blockFront != (frontBlock_->localTail = frontBlock_->tail.load())) {
 			fence(memory_order_acquire);
 		non_empty_front_block:
 			return reinterpret_cast<T*>(frontBlock_->data + blockFront * sizeof(T));
 		}
 		else if (frontBlock_ != tailBlock.load()) {
 			fence(memory_order_acquire);
 			frontBlock_ = frontBlock.load();
 			blockTail = frontBlock_->localTail = frontBlock_->tail.load();
 			blockFront = frontBlock_->front.load();
 			fence(memory_order_acquire);
 			if (blockFront != blockTail) {
 				goto non_empty_front_block;
 			}
 			Block* nextBlock = frontBlock_->next;
 			size_t nextBlockFront = nextBlock->front.load();
 			fence(memory_order_acquire);
 			assert(nextBlockFront != nextBlock->tail.load());
 			return reinterpret_cast<T*>(nextBlock->data + nextBlockFront * sizeof(T));
 		}
 		return nullptr;
 	}
 	// Removes the front element from the queue, if any, without returning it.
 	// Returns true on success, or false if the queue appeared empty at the time
 	// `pop` was called.
 	bool pop()
 	{
 #ifndef NDEBUG
 		ReentrantGuard guard(this->dequeuing);
 #endif
 		// See try_dequeue() for reasoning
 		Block* frontBlock_ = frontBlock.load();
 		size_t blockTail = frontBlock_->localTail;
 		size_t blockFront = frontBlock_->front.load();
 		if (blockFront != blockTail || blockFront != (frontBlock_->localTail = frontBlock_->tail.load())) {
 			fence(memory_order_acquire);
 		non_empty_front_block:
 			auto element = reinterpret_cast<T*>(frontBlock_->data + blockFront * sizeof(T));
 			element->~T();
 			blockFront = (blockFront + 1) & frontBlock_->sizeMask;
 			fence(memory_order_release);
 			frontBlock_->front = blockFront;
 		}
 		else if (frontBlock_ != tailBlock.load()) {
 			fence(memory_order_acquire);
 			frontBlock_ = frontBlock.load();
 			blockTail = frontBlock_->localTail = frontBlock_->tail.load();
 			blockFront = frontBlock_->front.load();
 			fence(memory_order_acquire);
 			if (blockFront != blockTail) {
 				goto non_empty_front_block;
 			}
 			// Front block is empty but there's another block ahead, advance to it
 			Block* nextBlock = frontBlock_->next;
 			size_t nextBlockFront = nextBlock->front.load();
 			size_t nextBlockTail = nextBlock->localTail = nextBlock->tail.load();
 			fence(memory_order_acquire);
 			assert(nextBlockFront != nextBlockTail);
 			AE_UNUSED(nextBlockTail);
 			fence(memory_order_release);
 			frontBlock = frontBlock_ = nextBlock;
 			compiler_fence(memory_order_release);
 			auto element = reinterpret_cast<T*>(frontBlock_->data + nextBlockFront * sizeof(T));
 			element->~T();
 			nextBlockFront = (nextBlockFront + 1) & frontBlock_->sizeMask;
 			fence(memory_order_release);
 			frontBlock_->front = nextBlockFront;
 		}
 		else {
 			// No elements in current block and no other block to advance to
 			return false;
 		}
 		return true;
 	}
 	// Returns the approximate number of items currently in the queue.
 	// Safe to call from both the producer and consumer threads.
 	inline size_t size_approx() const
 	{
 		size_t result = 0;
 		Block* frontBlock_ = frontBlock.load();
 		Block* block = frontBlock_;
 		do {
 			fence(memory_order_acquire);
 			size_t blockFront = block->front.load();
 			size_t blockTail = block->tail.load();
 			result += (blockTail - blockFront) & block->sizeMask;
 			block = block->next.load();
 		} while (block != frontBlock_);
 		return result;
 	}
 private:
 	enum AllocationMode { CanAlloc, CannotAlloc };
 	template<AllocationMode canAlloc, typename U>
 	bool inner_enqueue(U&& element)
 	{
 #ifndef NDEBUG
 		ReentrantGuard guard(this->enqueuing);
 #endif
 		// High-level pseudocode (assuming we're allowed to alloc a new block):
 		// If room in tail block, add to tail
 		// Else check next block
 		//     If next block is not the head block, enqueue on next block
 		//     Else create a new block and enqueue there
 		//     Advance tail to the block we just enqueued to
 		Block* tailBlock_ = tailBlock.load();
 		size_t blockFront = tailBlock_->localFront;
 		size_t blockTail = tailBlock_->tail.load();
 		size_t nextBlockTail = (blockTail + 1) & tailBlock_->sizeMask;
 		if (nextBlockTail != blockFront || nextBlockTail != (tailBlock_->localFront = tailBlock_->front.load())) {
 			fence(memory_order_acquire);
 			// This block has room for at least one more element
 			char* location = tailBlock_->data + blockTail * sizeof(T);
 			new (location) T(std::forward<U>(element));
 			fence(memory_order_release);
 			tailBlock_->tail = nextBlockTail;
 		}
 		else {
 			fence(memory_order_acquire);
 			if (tailBlock_->next.load() != frontBlock) {
 				// Note that the reason we can't advance to the frontBlock and start adding new entries there
 				// is because if we did, then dequeue would stay in that block, eventually reading the new values,
 				// instead of advancing to the next full block (whose values were enqueued first and so should be
 				// consumed first).
 				fence(memory_order_acquire);		// Ensure we get latest writes if we got the latest frontBlock
 				// tailBlock is full, but there's a free block ahead, use it
 				Block* tailBlockNext = tailBlock_->next.load();
 				size_t nextBlockFront = tailBlockNext->localFront = tailBlockNext->front.load();
 				nextBlockTail = tailBlockNext->tail.load();
 				fence(memory_order_acquire);
 				// This block must be empty since it's not the head block and we
 				// go through the blocks in a circle
 				assert(nextBlockFront == nextBlockTail);
 				tailBlockNext->localFront = nextBlockFront;
 				char* location = tailBlockNext->data + nextBlockTail * sizeof(T);
 				new (location) T(std::forward<U>(element));
 				tailBlockNext->tail = (nextBlockTail + 1) & tailBlockNext->sizeMask;
 				fence(memory_order_release);
 				tailBlock = tailBlockNext;
 			}
 			else if (canAlloc == CanAlloc) {
 				// tailBlock is full and there's no free block ahead; create a new block
 				auto newBlockSize = largestBlockSize >= MAX_BLOCK_SIZE ? largestBlockSize : largestBlockSize * 2;
 				auto newBlock = make_block(newBlockSize);
 				if (newBlock == nullptr) {
 					// Could not allocate a block!
 					return false;
 				}
 				largestBlockSize = newBlockSize;
 				new (newBlock->data) T(std::forward<U>(element));
 				assert(newBlock->front == 0);
 				newBlock->tail = newBlock->localTail = 1;
 				newBlock->next = tailBlock_->next.load();
 				tailBlock_->next = newBlock;
 				// Might be possible for the dequeue thread to see the new tailBlock->next
 				// *without* seeing the new tailBlock value, but this is OK since it can't
 				// advance to the next block until tailBlock is set anyway (because the only
 				// case where it could try to read the next is if it's already at the tailBlock,
 				// and it won't advance past tailBlock in any circumstance).
 				fence(memory_order_release);
 				tailBlock = newBlock;
 			}
 			else if (canAlloc == CannotAlloc) {
 				// Would have had to allocate a new block to enqueue, but not allowed
 				return false;
 			}
 			else {
 				assert(false && "Should be unreachable code");
 				return false;
 			}
 		}
 		return true;
 	}
 	// Disable copying
 	ReaderWriterQueue(ReaderWriterQueue const&) {  }
 	// Disable assignment
 	ReaderWriterQueue& operator=(ReaderWriterQueue const&) {  }
 	AE_FORCEINLINE static size_t ceilToPow2(size_t x)
 	{
 		// From http://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2
 		--x;
 		x |= x >> 1;
 		x |= x >> 2;
 		x |= x >> 4;
 		for (size_t i = 1; i < sizeof(size_t); i <<= 1) {
 			x |= x >> (i << 3);
 		}
 		++x;
 		return x;
 	}
 	template<typename U>
 	static AE_FORCEINLINE char* align_for(char* ptr)
 	{
 		const std::size_t alignment = std::alignment_of<U>::value;
 		return ptr + (alignment - (reinterpret_cast<std::uintptr_t>(ptr) % alignment)) % alignment;
 	}
 private:
 #ifndef NDEBUG
 	struct ReentrantGuard
 	{
 		ReentrantGuard(bool& _inSection)
 			: inSection(_inSection)
 		{
 			assert(!inSection && "ReaderWriterQueue does not support enqueuing or dequeuing elements from other elements' ctors and dtors");
 			inSection = true;
 		}
 		~ReentrantGuard() { inSection = false; }
 	private:
 		ReentrantGuard& operator=(ReentrantGuard const&);
 	private:
 		bool& inSection;
 	};
 #endif
 	struct Block
 	{
 		// Avoid false-sharing by putting highly contended variables on their own cache lines
 		weak_atomic<size_t> front;	// (Atomic) Elements are read from here
 		size_t localTail;			// An uncontended shadow copy of tail, owned by the consumer
 		char cachelineFiller0[MOODYCAMEL_CACHE_LINE_SIZE - sizeof(weak_atomic<size_t>) - sizeof(size_t)];
 		weak_atomic<size_t> tail;	// (Atomic) Elements are enqueued here
 		size_t localFront;
 		char cachelineFiller1[MOODYCAMEL_CACHE_LINE_SIZE - sizeof(weak_atomic<size_t>) - sizeof(size_t)];	// next isn't very contended, but we don't want it on the same cache line as tail (which is)
 		weak_atomic<Block*> next;	// (Atomic)
 		char* data;		// Contents (on heap) are aligned to T's alignment
 		const size_t sizeMask;
 		// size must be a power of two (and greater than 0)
 		Block(size_t const& _size, char* _rawThis, char* _data)
 			: front(0), localTail(0), tail(0), localFront(0), next(nullptr), data(_data), sizeMask(_size - 1), rawThis(_rawThis)
 		{
 		}
 	private:
 		// C4512 - Assignment operator could not be generated
 		Block& operator=(Block const&);
 	public:
 		char* rawThis;
 	};
 	static Block* make_block(size_t capacity)
 	{
 		// Allocate enough memory for the block itself, as well as all the elements it will contain
 		auto size = sizeof(Block) + std::alignment_of<Block>::value - 1;
 		size += sizeof(T) * capacity + std::alignment_of<T>::value - 1;
 		auto newBlockRaw = static_cast<char*>(std::malloc(size));
 		if (newBlockRaw == nullptr) {
 			return nullptr;
 		}
 		auto newBlockAligned = align_for<Block>(newBlockRaw);
 		auto newBlockData = align_for<T>(newBlockAligned + sizeof(Block));
 		return new (newBlockAligned) Block(capacity, newBlockRaw, newBlockData);
 	}
 private:
 	weak_atomic<Block*> frontBlock;		// (Atomic) Elements are enqueued to this block
 	char cachelineFiller[MOODYCAMEL_CACHE_LINE_SIZE - sizeof(weak_atomic<Block*>)];
 	weak_atomic<Block*> tailBlock;		// (Atomic) Elements are dequeued from this block
 	size_t largestBlockSize;
 #ifndef NDEBUG
 	bool enqueuing;
 	bool dequeuing;
 #endif
 };
 // Like ReaderWriterQueue, but also providees blocking operations
 template<typename T, size_t MAX_BLOCK_SIZE = 512>
 class BlockingReaderWriterQueue
 {
 private:
 	typedef ::moodycamel::ReaderWriterQueue<T, MAX_BLOCK_SIZE> ReaderWriterQueue;
 public:
 	explicit BlockingReaderWriterQueue(size_t maxSize = 15)
 		: inner(maxSize)
 	{ }
 	// Enqueues a copy of element if there is room in the queue.
 	// Returns true if the element was enqueued, false otherwise.
 	// Does not allocate memory.
 	AE_FORCEINLINE bool try_enqueue(T const& element)
 	{
 		if (inner.try_enqueue(element)) {
 			sema.signal();
 			return true;
 		}
 		return false;
 	}
 	// Enqueues a moved copy of element if there is room in the queue.
 	// Returns true if the element was enqueued, false otherwise.
 	// Does not allocate memory.
 	AE_FORCEINLINE bool try_enqueue(T&& element)
 	{
 		if (inner.try_enqueue(std::forward<T>(element))) {
 			sema.signal();
 			return true;
 		}
 		return false;
 	}
 	// Enqueues a copy of element on the queue.
 	// Allocates an additional block of memory if needed.
 	// Only fails (returns false) if memory allocation fails.
 	AE_FORCEINLINE bool enqueue(T const& element)
 	{
 		if (inner.enqueue(element)) {
 			sema.signal();
 			return true;
 		}
 		return false;
 	}
 	// Enqueues a moved copy of element on the queue.
 	// Allocates an additional block of memory if needed.
 	// Only fails (returns false) if memory allocation fails.
 	AE_FORCEINLINE bool enqueue(T&& element)
 	{
 		if (inner.enqueue(std::forward<T>(element))) {
 			sema.signal();
 			return true;
 		}
 		return false;
 	}
 	// Attempts to dequeue an element; if the queue is empty,
 	// returns false instead. If the queue has at least one element,
 	// moves front to result using operator=, then returns true.
 	template<typename U>
 	bool try_dequeue(U& result)
 	{
 		if (sema.tryWait()) {
 			bool success = inner.try_dequeue(result);
 			assert(success);
 			AE_UNUSED(success);
 			return true;
 		}
 		return false;
 	}
 	// Attempts to dequeue an element; if the queue is empty,
 	// waits until an element is available, then dequeues it.
 	template<typename U>
 	void wait_dequeue(U& result)
 	{
 		sema.wait();
 		bool success = inner.try_dequeue(result);
 		AE_UNUSED(result);
 		assert(success);
 		AE_UNUSED(success);
 	}
 	// Attempts to dequeue an element; if the queue is empty,
 	// waits until an element is available up to the specified timeout,
 	// then dequeues it and returns true, or returns false if the timeout
 	// expires before an element can be dequeued.
 	// Using a negative timeout indicates an indefinite timeout,
 	// and is thus functionally equivalent to calling wait_dequeue.
 	template<typename U>
 	bool wait_dequeue_timed(U& result, std::int64_t timeout_usecs)
 	{
 		if (!sema.wait(timeout_usecs)) {
 			return false;
 		}
 		bool success = inner.try_dequeue(result);
 		AE_UNUSED(result);
 		assert(success);
 		AE_UNUSED(success);
 		return true;
 	}
 #if __cplusplus > 199711L || _MSC_VER >= 1700
 	// Attempts to dequeue an element; if the queue is empty,
 	// waits until an element is available up to the specified timeout,
 	// then dequeues it and returns true, or returns false if the timeout
 	// expires before an element can be dequeued.
 	// Using a negative timeout indicates an indefinite timeout,
 	// and is thus functionally equivalent to calling wait_dequeue.
 	template<typename U, typename Rep, typename Period>
 	inline bool wait_dequeue_timed(U& result, std::chrono::duration<Rep, Period> const& timeout)
 	{
        return wait_dequeue_timed(result, std::chrono::duration_cast<std::chrono::microseconds>(timeout).count());
 	}
 #endif
 	// Returns a pointer to the front element in the queue (the one that
 	// would be removed next by a call to `try_dequeue` or `pop`). If the
 	// queue appears empty at the time the method is called, nullptr is
 	// returned instead.
 	// Must be called only from the consumer thread.
 	AE_FORCEINLINE T* peek()
 	{
 		return inner.peek();
 	}
 	// Removes the front element from the queue, if any, without returning it.
 	// Returns true on success, or false if the queue appeared empty at the time
 	// `pop` was called.
 	AE_FORCEINLINE bool pop()
 	{
 		if (sema.tryWait()) {
 			bool result = inner.pop();
 			assert(result);
 			AE_UNUSED(result);
 			return true;
 		}
 		return false;
 	}
 	// Returns the approximate number of items currently in the queue.
 	// Safe to call from both the producer and consumer threads.
 	AE_FORCEINLINE size_t size_approx() const
 	{
 		return sema.availableApprox();
 	}
 private:
 	// Disable copying & assignment
 	BlockingReaderWriterQueue(ReaderWriterQueue const&) {  }
 	BlockingReaderWriterQueue& operator=(ReaderWriterQueue const&) {  }
 private:
 	ReaderWriterQueue inner;
 	spsc_sema::LightweightSemaphore sema;
 };
 }    // end namespace moodycamel
 #ifdef AE_VCPP
 #pragma warning(pop)
 #endif