diff --git a/README.md b/README.md index 78a99e6..4d3d639 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,6 @@ [Install Lune](https://lune-org.github.io/docs/getting-started/1-installation/), then in this directory run `lune run src/build`. A `index.html` file will be created in `dist/`. You can either open that directly in your browser, or run `lune run src/dev` which will host it's contents on `http://localhost:3000`. -I essentially just forked [this website](https://luau.page/), because I too like making websites in Luau. \ No newline at end of file +I essentially just forked [this website](https://luau.page/), because I too like making websites in Luau. + +## lsf-bench specific: +this is just a fork of the homepage with some javascript added for rendering the markdown of the benchmark results \ No newline at end of file diff --git a/results.md b/results.md new file mode 100644 index 0000000..bbdac32 --- /dev/null +++ b/results.md @@ -0,0 +1,126 @@ +| Rank | Model | Weighted % | Total Count | Sum Total | +|---:|---|---:|---:|---:| +| 1 | gpt-oss:20b | 89.1% | 49 | 733 | +| 2 | hf.co/BasedBase/Qwen3-Coder-30B-A3B-Instruct-480B-Distill-V2:Q4_K_M | 88.2% | 49 | 726 | +| 3 | hf.co/bartowski/NousResearch_Hermes-4-14B-GGUF:Q4_K_M | 73.6% | 49 | 606 | +| 4 | hf.co/unsloth/gemma-3n-E4B-it-GGUF:Q8_0 | 66.0% | 49 | 543 | +| 5 | hf.co/unsloth/Phi-4-mini-reasoning-GGUF:Q8_0 | 65.5% | 49 | 539 | +| 6 | hf.co/unsloth/gemma-3n-E2B-it-GGUF:Q8_0 | 49.9% | 49 | 411 | +| 7 | hf.co/unsloth/Qwen3-0.6B-GGUF:BF16 | 45.8% | 49 | 377 | +| 8 | hf.co/unsloth/gemma-3-4b-it-GGUF:Q8_0 | 44.2% | 49 | 364 | +| 9 | hf.co/bartowski/Llama-3.2-3B-Instruct-GGUF:Q8_0 | 42.2% | 49 | 347 | +| 10 | hf.co/unsloth/gemma-3-1b-it-GGUF:BF16 | 35.0% | 49 | 288 | + +--- + +## Per-category stats + +### 1) gpt-oss:20b +- **Aggregated:** 89.1% - **Count:** 49 - **Sum Total:** 733 +| Category | % | Count | Total | Max | +|---|---:|---:|---:|---:| +| syntax | 100.0% | 8 | 95 | 95 | +| types | 95.5% | 11 | 189 | 198 | +| advanced | 93.0% | 8 | 107 | 115 | +| internals | 88.4% | 8 | 190 | 215 | +| runtimes | 78.9% | 6 | 75 | 95 | +| compatibility | 66.7% | 8 | 70 | 105 | + +### 2) hf.co/BasedBase/Qwen3-Coder-30B-A3B-Instruct-480B-Distill-V2:Q4_K_M +- **Aggregated:** 88.2% - **Count:** 49 - **Sum Total:** 726 +| Category | % | Count | Total | Max | +|---|---:|---:|---:|---:| +| syntax | 100.0% | 8 | 95 | 95 | +| types | 95.5% | 11 | 189 | 198 | +| advanced | 93.0% | 8 | 107 | 115 | +| internals | 88.4% | 8 | 190 | 215 | +| runtimes | 78.9% | 6 | 75 | 95 | +| compatibility | 66.7% | 8 | 70 | 105 | + +### 3) hf.co/bartowski/NousResearch_Hermes-4-14B-GGUF:Q4_K_M +- **Aggregated:** 73.6% - **Count:** 49 - **Sum Total:** 606 +| Category | % | Count | Total | Max | +|---|---:|---:|---:|---:| +| syntax | 82.1% | 8 | 78 | 95 | +| types | 80.8% | 11 | 160 | 198 | +| advanced | 78.3% | 8 | 90 | 115 | +| internals | 75.8% | 8 | 163 | 215 | +| runtimes | 57.9% | 6 | 55 | 95 | +| compatibility | 57.1% | 8 | 60 | 105 | + +### 4) hf.co/unsloth/gemma-3n-E4B-it-GGUF:Q8_0 +- **Aggregated:** 66.0% - **Count:** 49 - **Sum Total:** 543 +| Category | % | Count | Total | Max | +|---|---:|---:|---:|---:| +| syntax | 76.8% | 8 | 73 | 95 | +| advanced | 73.0% | 8 | 84 | 115 | +| internals | 73.0% | 8 | 157 | 215 | +| types | 59.6% | 11 | 118 | 198 | +| runtimes | 55.8% | 6 | 53 | 95 | +| compatibility | 55.2% | 8 | 58 | 105 | + +### 5) hf.co/unsloth/Phi-4-mini-reasoning-GGUF:Q8_0 +- **Aggregated:** 65.5% - **Count:** 49 - **Sum Total:** 539 +| Category | % | Count | Total | Max | +|---|---:|---:|---:|---:| +| syntax | 76.8% | 8 | 73 | 95 | +| internals | 76.7% | 8 | 165 | 215 | +| types | 70.7% | 11 | 140 | 198 | +| runtimes | 55.8% | 6 | 53 | 95 | +| advanced | 50.4% | 8 | 58 | 115 | +| compatibility | 47.6% | 8 | 50 | 105 | + +### 6) hf.co/unsloth/gemma-3n-E2B-it-GGUF:Q8_0 +- **Aggregated:** 49.9% - **Count:** 49 - **Sum Total:** 411 +| Category | % | Count | Total | Max | +|---|---:|---:|---:|---:| +| syntax | 62.1% | 8 | 59 | 95 | +| internals | 54.4% | 8 | 117 | 215 | +| advanced | 48.7% | 8 | 56 | 115 | +| types | 47.5% | 11 | 94 | 198 | +| compatibility | 44.8% | 8 | 47 | 105 | +| runtimes | 40.0% | 6 | 38 | 95 | + +### 7) hf.co/unsloth/Qwen3-0.6B-GGUF:BF16 +- **Aggregated:** 45.8% - **Count:** 49 - **Sum Total:** 377 +| Category | % | Count | Total | Max | +|---|---:|---:|---:|---:| +| advanced | 60.9% | 8 | 70 | 115 | +| internals | 49.8% | 8 | 107 | 215 | +| compatibility | 45.7% | 8 | 48 | 105 | +| types | 42.4% | 11 | 84 | 198 | +| runtimes | 36.8% | 6 | 35 | 95 | +| syntax | 34.7% | 8 | 33 | 95 | + +### 8) hf.co/unsloth/gemma-3-4b-it-GGUF:Q8_0 +- **Aggregated:** 44.2% - **Count:** 49 - **Sum Total:** 364 +| Category | % | Count | Total | Max | +|---|---:|---:|---:|---:| +| syntax | 58.9% | 8 | 56 | 95 | +| advanced | 53.0% | 8 | 61 | 115 | +| types | 47.5% | 11 | 94 | 198 | +| internals | 44.2% | 8 | 95 | 215 | +| compatibility | 31.4% | 8 | 33 | 105 | +| runtimes | 26.3% | 6 | 25 | 95 | + +### 9) hf.co/bartowski/Llama-3.2-3B-Instruct-GGUF:Q8_0 +- **Aggregated:** 42.2% - **Count:** 49 - **Sum Total:** 347 +| Category | % | Count | Total | Max | +|---|---:|---:|---:|---:| +| advanced | 65.2% | 8 | 75 | 115 | +| syntax | 46.3% | 8 | 44 | 95 | +| internals | 40.5% | 8 | 87 | 215 | +| runtimes | 37.9% | 6 | 36 | 95 | +| types | 35.4% | 11 | 70 | 198 | +| compatibility | 33.3% | 8 | 35 | 105 | + +### 10) hf.co/unsloth/gemma-3-1b-it-GGUF:BF16 +- **Aggregated:** 35.0% - **Count:** 49 - **Sum Total:** 288 +| Category | % | Count | Total | Max | +|---|---:|---:|---:|---:| +| advanced | 43.5% | 8 | 50 | 115 | +| runtimes | 38.9% | 6 | 37 | 95 | +| internals | 37.2% | 8 | 80 | 215 | +| types | 33.3% | 11 | 66 | 198 | +| syntax | 31.6% | 8 | 30 | 95 | +| compatibility | 23.8% | 8 | 25 | 105 | \ No newline at end of file diff --git a/src/init.luau b/src/init.luau index a911566..27b5fb3 100644 --- a/src/init.luau +++ b/src/init.luau @@ -1,78 +1,134 @@ -local h = require("@lib/html") - -return h.html({ lang = "en" })({ - h.head()({ - h.meta({ charset = "utf-8" }), - h.meta({ name = "viewport", content = "width=device-width, initial-scale=1" }), - h.meta({ name = "color-scheme", content = "dark light" }), - h.title()({ - "luau software", - }), - h.link({ - rel = "icon", - href = "", - }), - h.link({ - rel = "stylesheet", - href = "https://cdn.jsdelivr.net/npm/@picocss/pico@2.1.1/css/pico.classless.min.css", - }), - }), - h.body()({ - h.header()({ - h.h1()({ - "luau software", - }), - h.p()({ - "my personal website. i make software using luau. this website is written in luau.", - }), - }), - h.main()({ - h.p()({ - "you can find most of what you're looking for here:", - }), - h.div({ style = "display: flex; flex-direction: row; flex-wrap: wrap; column-gap: 10px;" })({ - h.article()({ - h.header()({ - "my email", - }), - h.p()({ - "cyclic@luau.software", - }), - }), - h.article()({ - h.header()({ - "my git (including projects)", - }), - h.a({ href = "https://git.luau.software/cyclic" })({ - "profile", - }), - }), - h.article()({ - h.header()({ - "my matrix", - }), - h.a({ href = "https://matrix.to/#/@cyclic:luau.software" })({ - "@cyclic:luau.software", - }), - }), - h.article()({ - h.header()({ - "my public pgp key", - }), - h.a({ href = "https://git.luau.software/cyclic/public-keys/raw/branch/main/publickey.asc" })({ - "pgp key", - }), - }), - h.article()({ - h.header()({ - "my openalias (xmr)", - }), - h.p()({ - "cyclic@luau.software", - }), - }), - h.img({src="https://git.luau.software/luau.software/luau.software/raw/branch/main/public/luauchan.png", style = "position: absolute; bottom: 0px; right: 0px; width: 300px; height: auto;"})({}), -- this is a stupid way of doing things - }), - }), - }), -}) +local h = require("@lib/html") +local fs = require("@lune/fs") + +local function b64encode(data: string): string + local b = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/" + local bytes = { string.byte(data, 1, #data) } + local out = {} + local i = 1 + while i <= #bytes do + local b1 = bytes[i] + local b2 = bytes[i + 1] + local b3 = bytes[i + 2] + local n = b1 * 65536 + (b2 or 0) * 256 + (b3 or 0) + local c1 = math.floor(n / 262144) % 64 + 1 + local c2 = math.floor(n / 4096) % 64 + 1 + local c3 = math.floor(n / 64) % 64 + 1 + local c4 = (n % 64) + 1 + if not b2 then + table.insert(out, string.sub(b, c1, c1)) + table.insert(out, string.sub(b, c2, c2)) + table.insert(out, "==") + elseif not b3 then + table.insert(out, string.sub(b, c1, c1)) + table.insert(out, string.sub(b, c2, c2)) + table.insert(out, string.sub(b, c3, c3)) + table.insert(out, "=") + else + table.insert(out, string.sub(b, c1, c1)) + table.insert(out, string.sub(b, c2, c2)) + table.insert(out, string.sub(b, c3, c3)) + table.insert(out, string.sub(b, c4, c4)) + end + i += 3 + end + return table.concat(out) +end + +local resultsMarkdown = "" +do + local ok, content = pcall(fs.readFile, "results.md") + if ok and typeof(content) == "string" then + resultsMarkdown = content + else + resultsMarkdown = "(results.md not found or could not be read)" + end +end + +local resultsMarkdownB64 = b64encode(resultsMarkdown) +local renderScript = [[ + (function(){ + var el = document.getElementById('results-rendered'); + if (!el || !window.marked) { return; } + var b64 = el.getAttribute('data-md-b64') || ''; + try { + var md = atob(b64); + el.innerHTML = window.marked.parse(md, { gfm: true, breaks: false }); + } catch (e) { + el.textContent = 'Failed to render markdown: ' + e; + } + })(); +]] +local renderScriptSrc = "data:text/javascript;base64," .. b64encode(renderScript) + +return h.html({ lang = "en" })({ + h.head()({ + h.meta({ charset = "utf-8" }), + h.meta({ name = "viewport", content = "width=device-width, initial-scale=1" }), + h.meta({ name = "color-scheme", content = "dark light" }), + h.title()({ + "lsf bench", + }), + h.link({ + rel = "icon", + href = "", + }), + h.link({ + rel = "stylesheet", + href = "https://cdn.jsdelivr.net/npm/@picocss/pico@2.1.1/css/pico.classless.min.css", + }), + h.script({ src = "https://cdn.jsdelivr.net/npm/marked/marked.min.js", defer = "defer" })({}), + h.script({ src = renderScriptSrc, defer = "defer" })({}), + }), + h.body()({ + h.header()({ + h.h1()({ + "lsf bench (luau.software bench)", + }), + h.p()({ + "a benchmark for testing local LLMs capability with Luau programming - written in Luau", + }), + h.p()({ + "ollama was used for inference, all model names are the exact model names you'd ollama run", + }), + }), + h.main()({ + h.section()({ + h.h2()({ "Benchmark Results" }), + h.div({ id = "results-rendered", ["data-md-b64"] = resultsMarkdownB64 })({}), + h.noscript()({ + "JavaScript is required to render Markdown. Showing raw contents:", + h.pre({ style = "white-space: pre; overflow-x: auto;" })({ + h.code({ class = "language-markdown" })({ resultsMarkdown }), + }), + }), + h.hr()({}), + }), + h.header()({ + h.h1()({ "info for this benchmark:" }), + }), + h.div({ style = "display: flex; flex-direction: row; flex-wrap: wrap; column-gap: 10px;" })({ + h.article()({ + h.header()({ + "my email (contact for questions about the benchmark)", + }), + h.p()({ + "cyclic@luau.software", + }), + }), + h.article()({ + h.header()({ + "the git repo for the source and partial technical details", + }), + h.a({ href = "https://git.luau.software/cyclic/lsfbench" })({ + "lsfbench", + }), + }), + h.img({ + src = "https://git.luau.software/luau.software/luau.software/raw/branch/main/public/luauchan.png", + style = "position: fixed; bottom: 0px; right: 0px; width: 300px; height: auto; z-index: 9999;", + })({}) + }), + }), + }), +})