diff --git a/LLama.Unittest/BasicTest.cs b/LLama.Unittest/BasicTest.cs
index 2cd1806f9..2c06dd47b 100644
--- a/LLama.Unittest/BasicTest.cs
+++ b/LLama.Unittest/BasicTest.cs
@@ -1,16 +1,20 @@
 using System.Text;
 using LLama.Common;
+using LLama.Native;
+using Xunit.Abstractions;
 
 namespace LLama.Unittest
 {
     public sealed class BasicTest
         : IDisposable
     {
+        private readonly ITestOutputHelper _testOutputHelper;
         private readonly ModelParams _params;
         private readonly LLamaWeights _model;
 
-        public BasicTest()
+        public BasicTest(ITestOutputHelper testOutputHelper)
         {
+            _testOutputHelper = testOutputHelper;
             _params = new ModelParams(Constants.ModelPath)
             {
                 ContextSize = 2048
@@ -30,5 +34,57 @@ public void BasicModelProperties()
             Assert.Equal(4096, _model.ContextSize);
             Assert.Equal(4096, _model.EmbeddingSize);
         }
+
+        [Fact]
+        public void AdvancedModelProperties()
+        {
+            var expected = new Dictionary<string, string>
+            {
+                { "general.name", "LLaMA v2" },
+                { "general.architecture", "llama" },
+                { "general.quantization_version", "2" },
+                { "general.file_type", "2" },
+
+                { "llama.context_length", "4096" },
+                { "llama.rope.dimension_count", "128" },
+                { "llama.embedding_length", "4096" },
+                { "llama.block_count", "32" },
+                { "llama.feed_forward_length", "11008" },
+                { "llama.attention.head_count", "32" },
+                { "llama.attention.head_count_kv", "32" },
+                { "llama.attention.layer_norm_rms_epsilon", "0.000001" },
+
+                { "tokenizer.ggml.eos_token_id", "2" },
+                { "tokenizer.ggml.model", "llama" },
+                { "tokenizer.ggml.bos_token_id", "1" },
+                { "tokenizer.ggml.unknown_token_id", "0" },
+            };
+
+            var metaCount = NativeApi.llama_model_meta_count(_model.NativeHandle);
+            Assert.Equal(expected.Count, metaCount);
+
+            Span<byte> buffer = stackalloc byte[128];
+            for (var i = 0; i < expected.Count; i++)
+            {
+                unsafe
+                {
+                    fixed (byte* ptr = buffer)
+                    {
+                        var length = NativeApi.llama_model_meta_key_by_index(_model.NativeHandle, i, ptr, 128);
+                        Assert.True(length > 0);
+                        var key = Encoding.UTF8.GetString(buffer[..length]);
+
+                        length = NativeApi.llama_model_meta_val_str_by_index(_model.NativeHandle, i, ptr, 128);
+                        Assert.True(length > 0);
+                        var val = Encoding.UTF8.GetString(buffer[..length]);
+
+                        _testOutputHelper.WriteLine($"{key} == {val}");
+
+                        Assert.True(expected.ContainsKey(key));
+                        Assert.Equal(expected[key], val);
+                    }
+                }
+            }
+        }
     }
 }
\ No newline at end of file
diff --git a/LLama.Unittest/LLama.Unittest.csproj b/LLama.Unittest/LLama.Unittest.csproj
index 0532244df..8effd951a 100644
--- a/LLama.Unittest/LLama.Unittest.csproj
+++ b/LLama.Unittest/LLama.Unittest.csproj
@@ -8,6 +8,8 @@
     <Nullable>enable</Nullable>
 
     <IsPackable>false</IsPackable>
+
+    <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
   </PropertyGroup>
 
   <ItemGroup>
diff --git a/LLama/LLamaSharp.Runtime.targets b/LLama/LLamaSharp.Runtime.targets
index 9085e5f7f..93600b299 100644
--- a/LLama/LLamaSharp.Runtime.targets
+++ b/LLama/LLamaSharp.Runtime.targets
@@ -3,41 +3,69 @@
         <IncludeBuiltInRuntimes Condition="'$(IncludeBuiltInRuntimes)' == ''">true</IncludeBuiltInRuntimes>
     </PropertyGroup>
     <ItemGroup Condition="'$(IncludeBuiltInRuntimes)' == 'true'">
-        <None Include="$(MSBuildThisFileDirectory)runtimes/libllama.dll">
-            <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
-            <Link>runtimes/win-x64/native/libllama.dll</Link>
-        </None>
-        <None Include="$(MSBuildThisFileDirectory)runtimes/libllama-cuda11.dll">
-            <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
-            <Link>runtimes/win-x64/native/cuda11/libllama.dll</Link>
-        </None>
-        <None Include="$(MSBuildThisFileDirectory)runtimes/libllama-cuda12.dll">
-            <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
-            <Link>runtimes/win-x64/native/cuda12/libllama.dll</Link>
-        </None>
-        <None Include="$(MSBuildThisFileDirectory)runtimes/libllama.so">
-            <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
-            <Link>runtimes/linux-x64/native/libllama.so</Link>
-        </None>
-        <None Include="$(MSBuildThisFileDirectory)runtimes/libllama-cuda11.so">
-            <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
-            <Link>runtimes/linux-x64/native/cuda11/libllama.so</Link>
-        </None>
-        <None Include="$(MSBuildThisFileDirectory)runtimes/libllama-cuda12.so">
-            <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
-            <Link>runtimes/linux-x64/native/cuda12/libllama.so</Link>
-        </None>
-        <None Include="$(MSBuildThisFileDirectory)runtimes/osx-arm64/libllama.dylib">
-            <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
-            <Link>runtimes/osx-arm64/native/libllama.dylib</Link>
-        </None>
-        <None Include="$(MSBuildThisFileDirectory)runtimes/osx-arm64/ggml-metal.metal">
-            <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
-            <Link>runtimes/osx-arm64/native/ggml-metal.metal</Link>
-        </None>
-        <None Include="$(MSBuildThisFileDirectory)runtimes/osx-x64/libllama.dylib">
-            <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
-            <Link>runtimes/osx-x64/native/libllama.dylib</Link>
-        </None>
+
+      <None Include="$(MSBuildThisFileDirectory)runtimes/deps/libllama.dll">
+        <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
+        <Link>runtimes/win-x64/native/noavx/libllama.dll</Link>
+      </None>
+      <None Include="$(MSBuildThisFileDirectory)runtimes/deps/avx/libllama.dll">
+        <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
+        <Link>runtimes/win-x64/native/avx/libllama.dll</Link>
+      </None>
+      <None Include="$(MSBuildThisFileDirectory)runtimes/deps/avx2/libllama.dll">
+        <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
+        <Link>runtimes/win-x64/native/avx2/libllama.dll</Link>
+      </None>
+      <None Include="$(MSBuildThisFileDirectory)runtimes/deps/avx512/libllama.dll">
+        <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
+        <Link>runtimes/win-x64/native/avx512/libllama.dll</Link>
+      </None>
+      <None Include="$(MSBuildThisFileDirectory)runtimes/deps/cu11.7.1/libllama.dll">
+        <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
+        <Link>runtimes/win-x64/native/cuda11/libllama.dll</Link>
+      </None>
+      <None Include="$(MSBuildThisFileDirectory)runtimes/deps/cu12.1.0/libllama.dll">
+        <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
+        <Link>runtimes/win-x64/native/cuda12/libllama.dll</Link>
+      </None>
+
+      <None Include="$(MSBuildThisFileDirectory)runtimes/deps/libllama.so">
+        <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
+        <Link>runtimes/linux-x64/native/noavx/libllama.so</Link>
+      </None>
+      <None Include="$(MSBuildThisFileDirectory)runtimes/deps/avx/libllama.so">
+        <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
+        <Link>runtimes/linux-x64/native/avx/libllama.so</Link>
+      </None>
+      <None Include="$(MSBuildThisFileDirectory)runtimes/deps/avx2/libllama.so">
+        <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
+        <Link>runtimes/linux-x64/native/avx2/libllama.so</Link>
+      </None>
+      <None Include="$(MSBuildThisFileDirectory)runtimes/deps/avx512/libllama.so">
+        <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
+        <Link>runtimes/linux-x64/native/avx512/libllama.so</Link>
+      </None>
+      <None Include="$(MSBuildThisFileDirectory)runtimes/deps/cu11.7.1/libllama.so">
+        <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
+        <Link>runtimes/linux-x64/native/cuda11/libllama.so</Link>
+      </None>
+      <None Include="$(MSBuildThisFileDirectory)runtimes/deps/cu12.1.0/libllama.so">
+        <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
+        <Link>runtimes/linux-x64/native/cuda12/libllama.so</Link>
+      </None>
+
+      <None Include="$(MSBuildThisFileDirectory)runtimes/deps/osx-arm64/libllama.dylib">
+        <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
+        <Link>runtimes/osx-arm64/native/libllama.dylib</Link>
+      </None>
+      <None Include="$(MSBuildThisFileDirectory)runtimes/deps/osx-arm64/ggml-metal.metal">
+        <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
+        <Link>runtimes/osx-arm64/native/ggml-metal.metal</Link>
+      </None>
+
+      <None Include="$(MSBuildThisFileDirectory)runtimes/deps/osx-x64/libllama.dylib">
+        <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
+        <Link>runtimes/osx-x64/native/libllama.dylib</Link>
+      </None>
     </ItemGroup>
 </Project>
\ No newline at end of file
diff --git a/LLama/Native/NativeApi.cs b/LLama/Native/NativeApi.cs
index 074a8e9fd..a4f97a004 100644
--- a/LLama/Native/NativeApi.cs
+++ b/LLama/Native/NativeApi.cs
@@ -302,6 +302,20 @@ public static int llama_tokenize(SafeLLamaContextHandle ctx, string text, Encodi
         [DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)]
         public static extern llama_token llama_token_nl(SafeLlamaModelHandle model);
 
+        /// <summary>
+        /// Returns -1 if unknown, 1 for true or 0 for false.
+        /// </summary>
+        /// <returns></returns>
+        [DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)]
+        public static extern int llama_add_bos_token(SafeLlamaModelHandle model);
+
+        /// <summary>
+        /// Returns -1 if unknown, 1 for true or 0 for false.
+        /// </summary>
+        /// <returns></returns>
+        [DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)]
+        public static extern int llama_add_eos_token(SafeLlamaModelHandle model);
+
         /// <summary>
         /// Print out timing information for this context
         /// </summary>
@@ -348,18 +362,77 @@ public static int llama_tokenize(SafeLLamaContextHandle ctx, string text, Encodi
         public static extern int llama_n_embd(SafeLlamaModelHandle model);
 
         /// <summary>
-        /// Get the size of the model in bytes
+        /// Get the model's RoPE frequency scaling factor
+        /// </summary>
+        /// <param name="model"></param>
+        /// <returns></returns>
+        [DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)]
+        public static extern float llama_rope_freq_scale_train(SafeLlamaModelHandle model);
+
+        /// <summary>
+        /// Get metadata value as a string by key name
+        /// </summary>
+        /// <param name="model"></param>
+        /// <param name="key"></param>
+        /// <param name="buf"></param>
+        /// <param name="buf_size"></param>
+        /// <returns>The length of the string on success, or -1 on failure</returns>
+        [DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)]
+        public static extern int llama_model_meta_val_str(SafeLlamaModelHandle model, byte* key, byte* buf, long buf_size);
+
+        /// <summary>
+        /// Get the number of metadata key/value pairs
         /// </summary>
         /// <param name="model"></param>
         /// <returns></returns>
         [DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)]
+        public static extern int llama_model_meta_count(SafeLlamaModelHandle model);
+
+        /// <summary>
+        /// Get metadata key name by index
+        /// </summary>
+        /// <param name="model"></param>
+        /// <param name="index"></param>
+        /// <param name="buf"></param>
+        /// <param name="buf_size"></param>
+        /// <returns>The length of the string on success, or -1 on failure</returns>
+        [DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)]
+        public static extern int llama_model_meta_key_by_index(SafeLlamaModelHandle model, int index, byte* buf, long buf_size);
+
+        /// <summary>
+        /// Get metadata value as a string by index
+        /// </summary>
+        /// <param name="model"></param>
+        /// <param name="index"></param>
+        /// <param name="buf"></param>
+        /// <param name="buf_size"></param>
+        /// <returns>The length of the string on success, or -1 on failure</returns>
+        [DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)]
+        public static extern int llama_model_meta_val_str_by_index(SafeLlamaModelHandle model, int index, byte* buf, long buf_size);
+
+        /// <summary>
+        /// Get a string describing the model type
+        /// </summary>
+        /// <param name="model"></param>
+        /// <param name="buf"></param>
+        /// <param name="buf_size"></param>
+        /// <returns>The length of the string on success, or -1 on failure</returns>
+        [DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)]
+        public static extern int llama_model_desc(SafeLlamaModelHandle model, byte* buf, long buf_size);
+
+        /// <summary>
+        /// Get the size of the model in bytes
+        /// </summary>
+        /// <param name="model"></param>
+        /// <returns>The size of the model</returns>
+        [DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)]
         public static extern ulong llama_model_size(SafeLlamaModelHandle model);
 
         /// <summary>
         /// Get the number of parameters in this model
         /// </summary>
         /// <param name="model"></param>
-        /// <returns></returns>
+        /// <returns>The functions return the length of the string on success, or -1 on failure</returns>
         [DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)]
         public static extern ulong llama_model_n_params(SafeLlamaModelHandle model);
 
@@ -370,7 +443,7 @@ public static int llama_tokenize(SafeLLamaContextHandle ctx, string text, Encodi
         /// <param name="llamaToken"></param>
         /// <param name="buffer">buffer to write string into</param>
         /// <param name="length">size of the buffer</param>
-        /// <returns>The length writte, or if the buffer is too small a negative that indicates the length required</returns>
+        /// <returns>The length written, or if the buffer is too small a negative that indicates the length required</returns>
         [DllImport(libraryName, CallingConvention = CallingConvention.Cdecl)]
         public static extern int llama_token_to_piece(SafeLlamaModelHandle model, int llamaToken, byte* buffer, int length);
 
diff --git a/LLama/runtimes/build/LLamaSharp.Backend.Cpu.nuspec b/LLama/runtimes/build/LLamaSharp.Backend.Cpu.nuspec
index 29466a1fe..e7ae5e58a 100644
--- a/LLama/runtimes/build/LLamaSharp.Backend.Cpu.nuspec
+++ b/LLama/runtimes/build/LLamaSharp.Backend.Cpu.nuspec
@@ -17,11 +17,21 @@
 
   <files>
     <file src="LLamaSharpBackend.props" target="build/netstandard2.0/LLamaSharp.Backend.Cpu.props" />
-    <file src="runtimes/libllama.dll" target="runtimes\win-x64\native\libllama.dll" />
-    <file src="runtimes/libllama.so" target="runtimes\linux-x64\native\libllama.so" />
-    <file src="runtimes/osx-x64/libllama.dylib" target="runtimes\osx-x64\native\libllama.dylib" />
-    <file src="runtimes/osx-arm64/libllama.dylib" target="runtimes\osx-arm64\native\libllama.dylib" />
-    <file src="runtimes/osx-arm64/ggml-metal.metal" target="runtimes\osx-arm64\native\ggml-metal.metal" />
+
+    <file src="runtimes/deps/libllama.dll" target="runtimes\win-x64\native\libllama.dll" />
+    <file src="runtimes/deps/avx/libllama.dll" target="runtimes\win-x64\native\avx\libllama.dll" />
+    <file src="runtimes/deps/avx2/libllama.dll" target="runtimes\win-x64\native\avx2\libllama.dll" />
+    <file src="runtimes/deps/avx512/libllama.dll" target="runtimes\win-x64\native\avx512\libllama.dll" />
+
+    <file src="runtimes/deps/libllama.so" target="runtimes\linux-x64\native\libllama.so" />
+    <file src="runtimes/deps/avx/libllama.so" target="runtimes\linux-x64\native\avx\libllama.so" />
+    <file src="runtimes/deps/avx2/libllama.so" target="runtimes\linux-x64\native\avx2\libllama.so" />
+    <file src="runtimes/deps/avx512/libllama.so" target="runtimes\linux-x64\native\avx512\libllama.so" />
+    
+    <file src="runtimes/deps/osx-x64/libllama.dylib" target="runtimes\osx-x64\native\libllama.dylib" />
+    <file src="runtimes/deps/osx-arm64/libllama.dylib" target="runtimes\osx-arm64\native\libllama.dylib" />
+    <file src="runtimes/deps/osx-arm64/ggml-metal.metal" target="runtimes\osx-arm64\native\ggml-metal.metal" />
+    
     <file src="icon512.png" target="icon512.png" />
   </files>
 </package>
diff --git a/LLama/runtimes/build/LLamaSharp.Backend.Cuda11.nuspec b/LLama/runtimes/build/LLamaSharp.Backend.Cuda11.nuspec
index d8876f4f3..4b7b6f046 100644
--- a/LLama/runtimes/build/LLamaSharp.Backend.Cuda11.nuspec
+++ b/LLama/runtimes/build/LLamaSharp.Backend.Cuda11.nuspec
@@ -17,8 +17,10 @@
 
   <files>
     <file src="LLamaSharpBackend.props" target="build/netstandard2.0/LLamaSharp.Backend.Cuda11.props" />
-    <file src="runtimes/libllama-cuda11.dll" target="runtimes\win-x64\native\cuda11\libllama.dll" />
-    <file src="runtimes/libllama-cuda11.so" target="runtimes\linux-x64\native\cuda11\libllama.so" />
+    
+    <file src="runtimes/deps/cu11.7.1/libllama.dll" target="runtimes\win-x64\native\cuda11\libllama.dll" />
+    <file src="runtimes/deps/cu11.7.1/libllama.so" target="runtimes\linux-x64\native\cuda11\libllama.so" />
+    
     <file src="icon512.png" target="icon512.png" />
   </files>
 </package>
diff --git a/LLama/runtimes/build/LLamaSharp.Backend.Cuda12.nuspec b/LLama/runtimes/build/LLamaSharp.Backend.Cuda12.nuspec
index 5ffd8ef12..d915ba4c1 100644
--- a/LLama/runtimes/build/LLamaSharp.Backend.Cuda12.nuspec
+++ b/LLama/runtimes/build/LLamaSharp.Backend.Cuda12.nuspec
@@ -17,8 +17,10 @@
 
   <files>
     <file src="LLamaSharpBackend.props" target="build/netstandard2.0/LLamaSharp.Backend.Cuda12.props" />
-    <file src="runtimes/libllama-Cuda12.dll" target="runtimes\win-x64\native\cuda12\libllama.dll" />
-    <file src="runtimes/libllama-Cuda12.so" target="runtimes\linux-x64\native\cuda12\libllama.so" />
+    
+    <file src="runtimes/deps/cu12.1.0/libllama.dll" target="runtimes\win-x64\native\cuda12\libllama.dll" />
+    <file src="runtimes/deps/cu12.1.0/libllama.so" target="runtimes\linux-x64\native\cuda12\libllama.so" />
+    
     <file src="icon512.png" target="icon512.png" />
   </files>
 </package>
diff --git a/LLama/runtimes/deps/avx/libllama.dll b/LLama/runtimes/deps/avx/libllama.dll
new file mode 100644
index 000000000..55d574843
Binary files /dev/null and b/LLama/runtimes/deps/avx/libllama.dll differ
diff --git a/LLama/runtimes/deps/avx/libllama.so b/LLama/runtimes/deps/avx/libllama.so
new file mode 100644
index 000000000..e9360b95b
Binary files /dev/null and b/LLama/runtimes/deps/avx/libllama.so differ
diff --git a/LLama/runtimes/deps/avx2/libllama.dll b/LLama/runtimes/deps/avx2/libllama.dll
new file mode 100644
index 000000000..52330a971
Binary files /dev/null and b/LLama/runtimes/deps/avx2/libllama.dll differ
diff --git a/LLama/runtimes/deps/avx2/libllama.so b/LLama/runtimes/deps/avx2/libllama.so
new file mode 100644
index 000000000..9f84c424c
Binary files /dev/null and b/LLama/runtimes/deps/avx2/libllama.so differ
diff --git a/LLama/runtimes/deps/avx512/libllama.dll b/LLama/runtimes/deps/avx512/libllama.dll
new file mode 100644
index 000000000..5f68f81b4
Binary files /dev/null and b/LLama/runtimes/deps/avx512/libllama.dll differ
diff --git a/LLama/runtimes/deps/avx512/libllama.so b/LLama/runtimes/deps/avx512/libllama.so
new file mode 100644
index 000000000..2791a7491
Binary files /dev/null and b/LLama/runtimes/deps/avx512/libllama.so differ
diff --git a/LLama/runtimes/libllama-cuda11.dll b/LLama/runtimes/deps/cu11.7.1/libllama.dll
similarity index 64%
rename from LLama/runtimes/libllama-cuda11.dll
rename to LLama/runtimes/deps/cu11.7.1/libllama.dll
index ab4f4be28..8aa06f952 100644
Binary files a/LLama/runtimes/libllama-cuda11.dll and b/LLama/runtimes/deps/cu11.7.1/libllama.dll differ
diff --git a/LLama/runtimes/libllama-cuda11.so b/LLama/runtimes/deps/cu11.7.1/libllama.so
similarity index 61%
rename from LLama/runtimes/libllama-cuda11.so
rename to LLama/runtimes/deps/cu11.7.1/libllama.so
index 146b30abd..4f98e823b 100644
Binary files a/LLama/runtimes/libllama-cuda11.so and b/LLama/runtimes/deps/cu11.7.1/libllama.so differ
diff --git a/LLama/runtimes/libllama-cuda12.dll b/LLama/runtimes/deps/cu12.1.0/libllama.dll
similarity index 63%
rename from LLama/runtimes/libllama-cuda12.dll
rename to LLama/runtimes/deps/cu12.1.0/libllama.dll
index a51954b89..802e357e8 100644
Binary files a/LLama/runtimes/libllama-cuda12.dll and b/LLama/runtimes/deps/cu12.1.0/libllama.dll differ
diff --git a/LLama/runtimes/libllama-cuda12.so b/LLama/runtimes/deps/cu12.1.0/libllama.so
similarity index 60%
rename from LLama/runtimes/libllama-cuda12.so
rename to LLama/runtimes/deps/cu12.1.0/libllama.so
index 615d9c704..5a794f8e7 100644
Binary files a/LLama/runtimes/libllama-cuda12.so and b/LLama/runtimes/deps/cu12.1.0/libllama.so differ
diff --git a/LLama/runtimes/deps/libllama.dll b/LLama/runtimes/deps/libllama.dll
new file mode 100644
index 000000000..a68c94185
Binary files /dev/null and b/LLama/runtimes/deps/libllama.dll differ
diff --git a/LLama/runtimes/deps/libllama.so b/LLama/runtimes/deps/libllama.so
new file mode 100644
index 000000000..d0ef8a591
Binary files /dev/null and b/LLama/runtimes/deps/libllama.so differ
diff --git a/LLama/runtimes/osx-arm64/ggml-metal.metal b/LLama/runtimes/deps/osx-arm64/ggml-metal.metal
similarity index 96%
rename from LLama/runtimes/osx-arm64/ggml-metal.metal
rename to LLama/runtimes/deps/osx-arm64/ggml-metal.metal
index 7c35f23a7..5d1357cd7 100644
--- a/LLama/runtimes/osx-arm64/ggml-metal.metal
+++ b/LLama/runtimes/deps/osx-arm64/ggml-metal.metal
@@ -792,7 +792,7 @@ kernel void kernel_mul_mv_f32_f32(
         constant   int64_t & ne0,
         constant   int64_t & ne1,
         uint3 tgpig[[threadgroup_position_in_grid]],
-        uint tiisg[[thread_index_in_simdgroup]]) {
+        uint  tiisg[[thread_index_in_simdgroup]]) {
 
     const int64_t r0 = tgpig.x;
     const int64_t rb = tgpig.y*N_F32_F32;
@@ -844,6 +844,79 @@ kernel void kernel_mul_mv_f32_f32(
     }
 }
 
+#define N_F16_F16 4
+
+kernel void kernel_mul_mv_f16_f16(
+        device const  char * src0,
+        device const  char * src1,
+        device       float * dst,
+        constant   int64_t & ne00,
+        constant   int64_t & ne01,
+        constant   int64_t & ne02,
+        constant  uint64_t & nb00,
+        constant  uint64_t & nb01,
+        constant  uint64_t & nb02,
+        constant   int64_t & ne10,
+        constant   int64_t & ne11,
+        constant   int64_t & ne12,
+        constant  uint64_t & nb10,
+        constant  uint64_t & nb11,
+        constant  uint64_t & nb12,
+        constant   int64_t & ne0,
+        constant   int64_t & ne1,
+        uint3 tgpig[[threadgroup_position_in_grid]],
+        uint  tiisg[[thread_index_in_simdgroup]]) {
+
+    const int64_t r0 = tgpig.x;
+    const int64_t rb = tgpig.y*N_F16_F16;
+    const int64_t im = tgpig.z;
+
+    device const half * x = (device const half *) (src0 + r0*nb01 + im/(ne12/ne02)*nb02);
+
+    if (ne00 < 128) {
+        for (int row = 0; row < N_F16_F16; ++row) {
+            int r1 = rb + row;
+            if (r1 >= ne11) {
+                break;
+            }
+
+            device const half * y = (device const half *) (src1 + r1*nb11 + im*nb12);
+
+            float sumf = 0;
+            for (int i = tiisg; i < ne00; i += 32) {
+                sumf += (half) x[i] * (half) y[i];
+            }
+
+            float all_sum = simd_sum(sumf);
+            if (tiisg == 0) {
+                dst[im*ne1*ne0 + r1*ne0 + r0] = all_sum;
+            }
+        }
+    } else {
+        device const half4 * x4 = (device const half4 *)x;
+        for (int row = 0; row < N_F16_F16; ++row) {
+            int r1 = rb + row;
+            if (r1 >= ne11) {
+                break;
+            }
+
+            device const half  * y  = (device const half  *) (src1 + r1*nb11 + im*nb12);
+            device const half4 * y4 = (device const half4 *) y;
+
+            float sumf = 0;
+            for (int i = tiisg; i < ne00/4; i += 32) {
+                for (int k = 0; k < 4; ++k) sumf += (half) x4[i][k] * y4[i][k];
+            }
+
+            float all_sum = simd_sum(sumf);
+            if (tiisg == 0) {
+                for (int i = 4*(ne00/4); i < ne00; ++i) all_sum += (half) x[i] * y[i];
+                dst[im*ne1*ne0 + r1*ne0 + r0] = all_sum;
+            }
+        }
+    }
+}
+
 kernel void kernel_mul_mv_f16_f32_1row(
         device const  char * src0,
         device const  char * src1,
@@ -1229,6 +1302,39 @@ kernel void kernel_rope(
 template [[host_name("kernel_rope_f32")]] kernel rope_t kernel_rope<float>;
 template [[host_name("kernel_rope_f16")]] kernel rope_t kernel_rope<half>;
 
+kernel void kernel_im2col_f16(
+        device const float * x,
+        device       half * dst,
+        constant   int32_t & ofs0,
+        constant   int32_t & ofs1,
+        constant   int32_t & IW,
+        constant   int32_t & IH,
+        constant   int32_t & CHW,
+        constant   int32_t & s0,
+        constant   int32_t & s1,
+        constant   int32_t & p0,
+        constant   int32_t & p1,
+        constant   int32_t & d0,
+        constant   int32_t & d1,
+        uint3 tgpig[[threadgroup_position_in_grid]],
+        uint3  tgpg[[threadgroups_per_grid]],
+        uint3 tpitg[[thread_position_in_threadgroup]],
+        uint3   ntg[[threads_per_threadgroup]]) {
+    const int32_t iiw = tgpig[2] * s0 + tpitg[2] * d0 - p0;
+    const int32_t iih = tgpig[1] * s1 + tpitg[1] * d1 - p1;
+
+    const int32_t offset_dst =
+        (tpitg[0] * tgpg[1] * tgpg[2] + tgpig[1] * tgpg[2] + tgpig[2]) * CHW +
+        (tgpig[0] * (ntg[1] * ntg[2]) + tpitg[1] * ntg[2] + tpitg[2]);
+
+    if (iih < 0 || iih >= IH || iiw < 0 || iiw >= IW) {
+        dst[offset_dst] = 0.0f;
+    } else {
+        const int32_t offset_src = tpitg[0] * ofs0 + tgpig[0] * ofs1;
+        dst[offset_dst] = x[offset_src + iih * IW + iiw];
+    }
+}
+
 kernel void kernel_cpy_f16_f16(
         device const half * src0,
         device       half * dst,
diff --git a/LLama/runtimes/deps/osx-arm64/libllama.dylib b/LLama/runtimes/deps/osx-arm64/libllama.dylib
new file mode 100644
index 000000000..df57f7dfa
Binary files /dev/null and b/LLama/runtimes/deps/osx-arm64/libllama.dylib differ
diff --git a/LLama/runtimes/deps/osx-x64/libllama.dylib b/LLama/runtimes/deps/osx-x64/libllama.dylib
new file mode 100644
index 000000000..ee6f29b47
Binary files /dev/null and b/LLama/runtimes/deps/osx-x64/libllama.dylib differ
diff --git a/LLama/runtimes/libllama.dll b/LLama/runtimes/libllama.dll
deleted file mode 100644
index d2cc2a7be..000000000
Binary files a/LLama/runtimes/libllama.dll and /dev/null differ
diff --git a/LLama/runtimes/libllama.so b/LLama/runtimes/libllama.so
deleted file mode 100644
index e5a01286a..000000000
Binary files a/LLama/runtimes/libllama.so and /dev/null differ
diff --git a/LLama/runtimes/osx-arm64/libllama.dylib b/LLama/runtimes/osx-arm64/libllama.dylib
deleted file mode 100644
index 54d7a9324..000000000
Binary files a/LLama/runtimes/osx-arm64/libllama.dylib and /dev/null differ
diff --git a/LLama/runtimes/osx-x64/libllama.dylib b/LLama/runtimes/osx-x64/libllama.dylib
deleted file mode 100644
index 37eb3cd43..000000000
Binary files a/LLama/runtimes/osx-x64/libllama.dylib and /dev/null differ