lsf-bench update
This commit is contained in:
@@ -1,3 +1,6 @@
|
|||||||
[Install Lune](https://lune-org.github.io/docs/getting-started/1-installation/), then in this directory run `lune run src/build`. A `index.html` file will be created in `dist/`. You can either open that directly in your browser, or run `lune run src/dev` which will host it's contents on `http://localhost:3000`.
|
[Install Lune](https://lune-org.github.io/docs/getting-started/1-installation/), then in this directory run `lune run src/build`. A `index.html` file will be created in `dist/`. You can either open that directly in your browser, or run `lune run src/dev` which will host it's contents on `http://localhost:3000`.
|
||||||
|
|
||||||
I essentially just forked [this website](https://luau.page/), because I too like making websites in Luau.
|
I essentially just forked [this website](https://luau.page/), because I too like making websites in Luau.
|
||||||
|
|
||||||
|
## lsf-bench specific:
|
||||||
|
this is just a fork of the homepage with some javascript added for rendering the markdown of the benchmark results
|
126
results.md
Normal file
126
results.md
Normal file
@@ -0,0 +1,126 @@
|
|||||||
|
| Rank | Model | Weighted % | Total Count | Sum Total |
|
||||||
|
|---:|---|---:|---:|---:|
|
||||||
|
| 1 | gpt-oss:20b | 89.1% | 49 | 733 |
|
||||||
|
| 2 | hf.co/BasedBase/Qwen3-Coder-30B-A3B-Instruct-480B-Distill-V2:Q4_K_M | 88.2% | 49 | 726 |
|
||||||
|
| 3 | hf.co/bartowski/NousResearch_Hermes-4-14B-GGUF:Q4_K_M | 73.6% | 49 | 606 |
|
||||||
|
| 4 | hf.co/unsloth/gemma-3n-E4B-it-GGUF:Q8_0 | 66.0% | 49 | 543 |
|
||||||
|
| 5 | hf.co/unsloth/Phi-4-mini-reasoning-GGUF:Q8_0 | 65.5% | 49 | 539 |
|
||||||
|
| 6 | hf.co/unsloth/gemma-3n-E2B-it-GGUF:Q8_0 | 49.9% | 49 | 411 |
|
||||||
|
| 7 | hf.co/unsloth/Qwen3-0.6B-GGUF:BF16 | 45.8% | 49 | 377 |
|
||||||
|
| 8 | hf.co/unsloth/gemma-3-4b-it-GGUF:Q8_0 | 44.2% | 49 | 364 |
|
||||||
|
| 9 | hf.co/bartowski/Llama-3.2-3B-Instruct-GGUF:Q8_0 | 42.2% | 49 | 347 |
|
||||||
|
| 10 | hf.co/unsloth/gemma-3-1b-it-GGUF:BF16 | 35.0% | 49 | 288 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Per-category stats
|
||||||
|
|
||||||
|
### 1) gpt-oss:20b
|
||||||
|
- **Aggregated:** 89.1% - **Count:** 49 - **Sum Total:** 733
|
||||||
|
| Category | % | Count | Total | Max |
|
||||||
|
|---|---:|---:|---:|---:|
|
||||||
|
| syntax | 100.0% | 8 | 95 | 95 |
|
||||||
|
| types | 95.5% | 11 | 189 | 198 |
|
||||||
|
| advanced | 93.0% | 8 | 107 | 115 |
|
||||||
|
| internals | 88.4% | 8 | 190 | 215 |
|
||||||
|
| runtimes | 78.9% | 6 | 75 | 95 |
|
||||||
|
| compatibility | 66.7% | 8 | 70 | 105 |
|
||||||
|
|
||||||
|
### 2) hf.co/BasedBase/Qwen3-Coder-30B-A3B-Instruct-480B-Distill-V2:Q4_K_M
|
||||||
|
- **Aggregated:** 88.2% - **Count:** 49 - **Sum Total:** 726
|
||||||
|
| Category | % | Count | Total | Max |
|
||||||
|
|---|---:|---:|---:|---:|
|
||||||
|
| syntax | 100.0% | 8 | 95 | 95 |
|
||||||
|
| types | 95.5% | 11 | 189 | 198 |
|
||||||
|
| advanced | 93.0% | 8 | 107 | 115 |
|
||||||
|
| internals | 88.4% | 8 | 190 | 215 |
|
||||||
|
| runtimes | 78.9% | 6 | 75 | 95 |
|
||||||
|
| compatibility | 66.7% | 8 | 70 | 105 |
|
||||||
|
|
||||||
|
### 3) hf.co/bartowski/NousResearch_Hermes-4-14B-GGUF:Q4_K_M
|
||||||
|
- **Aggregated:** 73.6% - **Count:** 49 - **Sum Total:** 606
|
||||||
|
| Category | % | Count | Total | Max |
|
||||||
|
|---|---:|---:|---:|---:|
|
||||||
|
| syntax | 82.1% | 8 | 78 | 95 |
|
||||||
|
| types | 80.8% | 11 | 160 | 198 |
|
||||||
|
| advanced | 78.3% | 8 | 90 | 115 |
|
||||||
|
| internals | 75.8% | 8 | 163 | 215 |
|
||||||
|
| runtimes | 57.9% | 6 | 55 | 95 |
|
||||||
|
| compatibility | 57.1% | 8 | 60 | 105 |
|
||||||
|
|
||||||
|
### 4) hf.co/unsloth/gemma-3n-E4B-it-GGUF:Q8_0
|
||||||
|
- **Aggregated:** 66.0% - **Count:** 49 - **Sum Total:** 543
|
||||||
|
| Category | % | Count | Total | Max |
|
||||||
|
|---|---:|---:|---:|---:|
|
||||||
|
| syntax | 76.8% | 8 | 73 | 95 |
|
||||||
|
| advanced | 73.0% | 8 | 84 | 115 |
|
||||||
|
| internals | 73.0% | 8 | 157 | 215 |
|
||||||
|
| types | 59.6% | 11 | 118 | 198 |
|
||||||
|
| runtimes | 55.8% | 6 | 53 | 95 |
|
||||||
|
| compatibility | 55.2% | 8 | 58 | 105 |
|
||||||
|
|
||||||
|
### 5) hf.co/unsloth/Phi-4-mini-reasoning-GGUF:Q8_0
|
||||||
|
- **Aggregated:** 65.5% - **Count:** 49 - **Sum Total:** 539
|
||||||
|
| Category | % | Count | Total | Max |
|
||||||
|
|---|---:|---:|---:|---:|
|
||||||
|
| syntax | 76.8% | 8 | 73 | 95 |
|
||||||
|
| internals | 76.7% | 8 | 165 | 215 |
|
||||||
|
| types | 70.7% | 11 | 140 | 198 |
|
||||||
|
| runtimes | 55.8% | 6 | 53 | 95 |
|
||||||
|
| advanced | 50.4% | 8 | 58 | 115 |
|
||||||
|
| compatibility | 47.6% | 8 | 50 | 105 |
|
||||||
|
|
||||||
|
### 6) hf.co/unsloth/gemma-3n-E2B-it-GGUF:Q8_0
|
||||||
|
- **Aggregated:** 49.9% - **Count:** 49 - **Sum Total:** 411
|
||||||
|
| Category | % | Count | Total | Max |
|
||||||
|
|---|---:|---:|---:|---:|
|
||||||
|
| syntax | 62.1% | 8 | 59 | 95 |
|
||||||
|
| internals | 54.4% | 8 | 117 | 215 |
|
||||||
|
| advanced | 48.7% | 8 | 56 | 115 |
|
||||||
|
| types | 47.5% | 11 | 94 | 198 |
|
||||||
|
| compatibility | 44.8% | 8 | 47 | 105 |
|
||||||
|
| runtimes | 40.0% | 6 | 38 | 95 |
|
||||||
|
|
||||||
|
### 7) hf.co/unsloth/Qwen3-0.6B-GGUF:BF16
|
||||||
|
- **Aggregated:** 45.8% - **Count:** 49 - **Sum Total:** 377
|
||||||
|
| Category | % | Count | Total | Max |
|
||||||
|
|---|---:|---:|---:|---:|
|
||||||
|
| advanced | 60.9% | 8 | 70 | 115 |
|
||||||
|
| internals | 49.8% | 8 | 107 | 215 |
|
||||||
|
| compatibility | 45.7% | 8 | 48 | 105 |
|
||||||
|
| types | 42.4% | 11 | 84 | 198 |
|
||||||
|
| runtimes | 36.8% | 6 | 35 | 95 |
|
||||||
|
| syntax | 34.7% | 8 | 33 | 95 |
|
||||||
|
|
||||||
|
### 8) hf.co/unsloth/gemma-3-4b-it-GGUF:Q8_0
|
||||||
|
- **Aggregated:** 44.2% - **Count:** 49 - **Sum Total:** 364
|
||||||
|
| Category | % | Count | Total | Max |
|
||||||
|
|---|---:|---:|---:|---:|
|
||||||
|
| syntax | 58.9% | 8 | 56 | 95 |
|
||||||
|
| advanced | 53.0% | 8 | 61 | 115 |
|
||||||
|
| types | 47.5% | 11 | 94 | 198 |
|
||||||
|
| internals | 44.2% | 8 | 95 | 215 |
|
||||||
|
| compatibility | 31.4% | 8 | 33 | 105 |
|
||||||
|
| runtimes | 26.3% | 6 | 25 | 95 |
|
||||||
|
|
||||||
|
### 9) hf.co/bartowski/Llama-3.2-3B-Instruct-GGUF:Q8_0
|
||||||
|
- **Aggregated:** 42.2% - **Count:** 49 - **Sum Total:** 347
|
||||||
|
| Category | % | Count | Total | Max |
|
||||||
|
|---|---:|---:|---:|---:|
|
||||||
|
| advanced | 65.2% | 8 | 75 | 115 |
|
||||||
|
| syntax | 46.3% | 8 | 44 | 95 |
|
||||||
|
| internals | 40.5% | 8 | 87 | 215 |
|
||||||
|
| runtimes | 37.9% | 6 | 36 | 95 |
|
||||||
|
| types | 35.4% | 11 | 70 | 198 |
|
||||||
|
| compatibility | 33.3% | 8 | 35 | 105 |
|
||||||
|
|
||||||
|
### 10) hf.co/unsloth/gemma-3-1b-it-GGUF:BF16
|
||||||
|
- **Aggregated:** 35.0% - **Count:** 49 - **Sum Total:** 288
|
||||||
|
| Category | % | Count | Total | Max |
|
||||||
|
|---|---:|---:|---:|---:|
|
||||||
|
| advanced | 43.5% | 8 | 50 | 115 |
|
||||||
|
| runtimes | 38.9% | 6 | 37 | 95 |
|
||||||
|
| internals | 37.2% | 8 | 80 | 215 |
|
||||||
|
| types | 33.3% | 11 | 66 | 198 |
|
||||||
|
| syntax | 31.6% | 8 | 30 | 95 |
|
||||||
|
| compatibility | 23.8% | 8 | 25 | 105 |
|
212
src/init.luau
212
src/init.luau
@@ -1,78 +1,134 @@
|
|||||||
local h = require("@lib/html")
|
local h = require("@lib/html")
|
||||||
|
local fs = require("@lune/fs")
|
||||||
return h.html({ lang = "en" })({
|
|
||||||
h.head()({
|
local function b64encode(data: string): string
|
||||||
h.meta({ charset = "utf-8" }),
|
local b = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"
|
||||||
h.meta({ name = "viewport", content = "width=device-width, initial-scale=1" }),
|
local bytes = { string.byte(data, 1, #data) }
|
||||||
h.meta({ name = "color-scheme", content = "dark light" }),
|
local out = {}
|
||||||
h.title()({
|
local i = 1
|
||||||
"luau software",
|
while i <= #bytes do
|
||||||
}),
|
local b1 = bytes[i]
|
||||||
h.link({
|
local b2 = bytes[i + 1]
|
||||||
rel = "icon",
|
local b3 = bytes[i + 2]
|
||||||
href = "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAACAAAAAgCAMAAABEpIrGAAAAGFBMVEUAov4Aov4Aof4Aov7l8/6l0v5ctP4Fof7s4QukAAAABHRSTlMBRY/MU7dEywAAALZJREFUeNqNk0EOwyAMBGvs3f3/j1sXWkKDK+biQ0YwisXjFPdm/76bJL0cq4VBdVBTp5RcSS2ZbqxJTQVe3QBwEfRLJJRakcCPYEUCIqkTugCpVQkEqoQJJdn5XwB1Y7khtoJ9BYR2zEUAkkBxmzBuYOQkuEnIbwJyAgFxSWgpAATSec8QMRM8ZYIIBIGUIewW0c9g9JkJA7N2lSKTx6on00nrsupC4txkLdWPzNxrYR7kp8/+CUlvDvtDdoJIAAAAAElFTkSuQmCC",
|
local n = b1 * 65536 + (b2 or 0) * 256 + (b3 or 0)
|
||||||
}),
|
local c1 = math.floor(n / 262144) % 64 + 1
|
||||||
h.link({
|
local c2 = math.floor(n / 4096) % 64 + 1
|
||||||
rel = "stylesheet",
|
local c3 = math.floor(n / 64) % 64 + 1
|
||||||
href = "https://cdn.jsdelivr.net/npm/@picocss/pico@2.1.1/css/pico.classless.min.css",
|
local c4 = (n % 64) + 1
|
||||||
}),
|
if not b2 then
|
||||||
}),
|
table.insert(out, string.sub(b, c1, c1))
|
||||||
h.body()({
|
table.insert(out, string.sub(b, c2, c2))
|
||||||
h.header()({
|
table.insert(out, "==")
|
||||||
h.h1()({
|
elseif not b3 then
|
||||||
"luau software",
|
table.insert(out, string.sub(b, c1, c1))
|
||||||
}),
|
table.insert(out, string.sub(b, c2, c2))
|
||||||
h.p()({
|
table.insert(out, string.sub(b, c3, c3))
|
||||||
"my personal website. i make software using luau. this website is written in luau.",
|
table.insert(out, "=")
|
||||||
}),
|
else
|
||||||
}),
|
table.insert(out, string.sub(b, c1, c1))
|
||||||
h.main()({
|
table.insert(out, string.sub(b, c2, c2))
|
||||||
h.p()({
|
table.insert(out, string.sub(b, c3, c3))
|
||||||
"you can find most of what you're looking for here:",
|
table.insert(out, string.sub(b, c4, c4))
|
||||||
}),
|
end
|
||||||
h.div({ style = "display: flex; flex-direction: row; flex-wrap: wrap; column-gap: 10px;" })({
|
i += 3
|
||||||
h.article()({
|
end
|
||||||
h.header()({
|
return table.concat(out)
|
||||||
"my email",
|
end
|
||||||
}),
|
|
||||||
h.p()({
|
local resultsMarkdown = ""
|
||||||
"cyclic@luau.software",
|
do
|
||||||
}),
|
local ok, content = pcall(fs.readFile, "results.md")
|
||||||
}),
|
if ok and typeof(content) == "string" then
|
||||||
h.article()({
|
resultsMarkdown = content
|
||||||
h.header()({
|
else
|
||||||
"my git (including projects)",
|
resultsMarkdown = "(results.md not found or could not be read)"
|
||||||
}),
|
end
|
||||||
h.a({ href = "https://git.luau.software/cyclic" })({
|
end
|
||||||
"profile",
|
|
||||||
}),
|
local resultsMarkdownB64 = b64encode(resultsMarkdown)
|
||||||
}),
|
local renderScript = [[
|
||||||
h.article()({
|
(function(){
|
||||||
h.header()({
|
var el = document.getElementById('results-rendered');
|
||||||
"my matrix",
|
if (!el || !window.marked) { return; }
|
||||||
}),
|
var b64 = el.getAttribute('data-md-b64') || '';
|
||||||
h.a({ href = "https://matrix.to/#/@cyclic:luau.software" })({
|
try {
|
||||||
"@cyclic:luau.software",
|
var md = atob(b64);
|
||||||
}),
|
el.innerHTML = window.marked.parse(md, { gfm: true, breaks: false });
|
||||||
}),
|
} catch (e) {
|
||||||
h.article()({
|
el.textContent = 'Failed to render markdown: ' + e;
|
||||||
h.header()({
|
}
|
||||||
"my public pgp key",
|
})();
|
||||||
}),
|
]]
|
||||||
h.a({ href = "https://git.luau.software/cyclic/public-keys/raw/branch/main/publickey.asc" })({
|
local renderScriptSrc = "data:text/javascript;base64," .. b64encode(renderScript)
|
||||||
"pgp key",
|
|
||||||
}),
|
return h.html({ lang = "en" })({
|
||||||
}),
|
h.head()({
|
||||||
h.article()({
|
h.meta({ charset = "utf-8" }),
|
||||||
h.header()({
|
h.meta({ name = "viewport", content = "width=device-width, initial-scale=1" }),
|
||||||
"my openalias (xmr)",
|
h.meta({ name = "color-scheme", content = "dark light" }),
|
||||||
}),
|
h.title()({
|
||||||
h.p()({
|
"lsf bench",
|
||||||
"cyclic@luau.software",
|
}),
|
||||||
}),
|
h.link({
|
||||||
}),
|
rel = "icon",
|
||||||
h.img({src="https://git.luau.software/luau.software/luau.software/raw/branch/main/public/luauchan.png", style = "position: absolute; bottom: 0px; right: 0px; width: 300px; height: auto;"})({}), -- this is a stupid way of doing things
|
href = "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAACAAAAAgCAMAAABEpIrGAAAAGFBMVEUAov4Aov4Aof4Aov7l8/6l0v5ctP4Fof7s4QukAAAABHRSTlMBRY/MU7dEywAAALZJREFUeNqNk0EOwyAMBGvs3f3/j1sXWkKDK+biQ0YwisXjFPdm/76bJL0cq4VBdVBTp5RcSS2ZbqxJTQVe3QBwEfRLJJRakcCPYEUCIqkTugCpVQkEqoQJJdn5XwB1Y7khtoJ9BYR2zEUAkkBxmzBuYOQkuEnIbwJyAgFxSWgpAATSec8QMRM8ZYIIBIGUIewW0c9g9JkJA7N2lSKTx6on00nrsupC4txkLdWPzNxrYR7kp8/+CUlvDvtDdoJIAAAAAElFTkSuQmCC",
|
||||||
}),
|
}),
|
||||||
}),
|
h.link({
|
||||||
}),
|
rel = "stylesheet",
|
||||||
})
|
href = "https://cdn.jsdelivr.net/npm/@picocss/pico@2.1.1/css/pico.classless.min.css",
|
||||||
|
}),
|
||||||
|
h.script({ src = "https://cdn.jsdelivr.net/npm/marked/marked.min.js", defer = "defer" })({}),
|
||||||
|
h.script({ src = renderScriptSrc, defer = "defer" })({}),
|
||||||
|
}),
|
||||||
|
h.body()({
|
||||||
|
h.header()({
|
||||||
|
h.h1()({
|
||||||
|
"lsf bench (luau.software bench)",
|
||||||
|
}),
|
||||||
|
h.p()({
|
||||||
|
"a benchmark for testing local LLMs capability with Luau programming - written in Luau",
|
||||||
|
}),
|
||||||
|
h.p()({
|
||||||
|
"ollama was used for inference, all model names are the exact model names you'd ollama run",
|
||||||
|
}),
|
||||||
|
}),
|
||||||
|
h.main()({
|
||||||
|
h.section()({
|
||||||
|
h.h2()({ "Benchmark Results" }),
|
||||||
|
h.div({ id = "results-rendered", ["data-md-b64"] = resultsMarkdownB64 })({}),
|
||||||
|
h.noscript()({
|
||||||
|
"JavaScript is required to render Markdown. Showing raw contents:",
|
||||||
|
h.pre({ style = "white-space: pre; overflow-x: auto;" })({
|
||||||
|
h.code({ class = "language-markdown" })({ resultsMarkdown }),
|
||||||
|
}),
|
||||||
|
}),
|
||||||
|
h.hr()({}),
|
||||||
|
}),
|
||||||
|
h.header()({
|
||||||
|
h.h1()({ "info for this benchmark:" }),
|
||||||
|
}),
|
||||||
|
h.div({ style = "display: flex; flex-direction: row; flex-wrap: wrap; column-gap: 10px;" })({
|
||||||
|
h.article()({
|
||||||
|
h.header()({
|
||||||
|
"my email (contact for questions about the benchmark)",
|
||||||
|
}),
|
||||||
|
h.p()({
|
||||||
|
"cyclic@luau.software",
|
||||||
|
}),
|
||||||
|
}),
|
||||||
|
h.article()({
|
||||||
|
h.header()({
|
||||||
|
"the git repo for the source and partial technical details",
|
||||||
|
}),
|
||||||
|
h.a({ href = "https://git.luau.software/cyclic/lsfbench" })({
|
||||||
|
"lsfbench",
|
||||||
|
}),
|
||||||
|
}),
|
||||||
|
h.img({
|
||||||
|
src = "https://git.luau.software/luau.software/luau.software/raw/branch/main/public/luauchan.png",
|
||||||
|
style = "position: fixed; bottom: 0px; right: 0px; width: 300px; height: auto; z-index: 9999;",
|
||||||
|
})({})
|
||||||
|
}),
|
||||||
|
}),
|
||||||
|
}),
|
||||||
|
})
|
||||||
|
Reference in New Issue
Block a user