MediaWiki:Gadget-LabelScanIndexer.js: Unterschied zwischen den Versionen
Erscheinungsbild
Admin (Diskussion | Beiträge) Die Seite wurde neu angelegt: „→Gadget: LabelScanIndexer * Lädt auf der Seite Hilfe:LabelScan-Indexer * Erzeugt Embeddings lokal (CLIP) und speichert automatisch in MediaWiki:Gadget-LabelScan-index.json: if (mw.config.get('wgPageName') !== 'Hilfe:LabelScan-Indexer') { // Läuft nur auf der Indexer-Seite return; } (function(){ const INDEX_TITLE = 'MediaWiki:Gadget-LabelScan-index.json'; // Modell / Pfade (müssen zu deinem Setup passen) const transformersURL = 'http…“ |
Admin (Diskussion | Beiträge) Keine Bearbeitungszusammenfassung |
||
| Zeile 1: | Zeile 1: | ||
/* Gadget: LabelScanIndexer | /* Gadget: LabelScanIndexer (Auto-Save) | ||
* | * Läuft nur auf der Seite "Hilfe:LabelScan-Indexer" (Namespace Help = 12) | ||
* Erzeugt Embeddings lokal (CLIP) und speichert | * Erzeugt Embeddings lokal (CLIP) und speichert in MediaWiki:Gadget-LabelScan-index.json | ||
*/ | */ | ||
/* global mw */ | |||
(function () { | |||
// ---------- Seitenerkennung (robust) ---------- | |||
} | const NS = mw.config.get('wgNamespaceNumber'); // 12 = Help/Hilfe | ||
const TITLE = mw.config.get('wgTitle'); // nur der Titel ohne Namespace | |||
if (!(NS === 12 && TITLE === 'LabelScan-Indexer')) { | |||
// Debug-Hinweis, falls du auf der falschen Seite testest | |||
// console.debug('[LabelScanIndexer] nicht aktiv auf', NS, TITLE); | |||
return; | |||
} | |||
const INDEX_TITLE = 'MediaWiki:Gadget-LabelScan-index.json'; | const INDEX_TITLE = 'MediaWiki:Gadget-LabelScan-index.json'; | ||
// Modell / Pfade | // ---------- Modell / Pfade ---------- | ||
const | const TRANSFORMERS_URL = 'https://cdn.jsdelivr.net/npm/@xenova/transformers@2.15.0'; | ||
const MODEL_ID = 'Xenova/clip-vit-base-patch32'; | const MODEL_ID = 'Xenova/clip-vit-base-patch32'; | ||
const LOCAL_MODEL_PATH = '/models'; | const LOCAL_MODEL_PATH = '/models'; | ||
const $ = id => document.getElementById(id); | // ---------- UI helpers ---------- | ||
const status = (t) => { const el=$('idx-status'); if(el) el.textContent=t||''; }; | const $ = (id) => document.getElementById(id); | ||
const status = (t) => { const el = $('idx-status'); if (el) el.textContent = t || ''; }; | |||
function | function hasInterfaceRight() { | ||
const | const groups = mw.config.get('wgUserGroups') || []; | ||
return | return groups.includes('interface-admin') || groups.includes('sysop'); | ||
} | } | ||
function float32ToBase64(vec){ | function float32ToBase64(vec) { | ||
const bytes = new Uint8Array(vec.buffer); | const bytes = new Uint8Array(vec.buffer); | ||
let bin = '', chunk = 0x8000; | let bin = '', chunk = 0x8000; | ||
for (let i=0; i<bytes.length; i+=chunk) { | for (let i = 0; i < bytes.length; i += chunk) { | ||
bin += String.fromCharCode.apply(null, bytes.subarray(i, i+chunk)); | bin += String.fromCharCode.apply(null, bytes.subarray(i, i + chunk)); | ||
} | } | ||
return btoa(bin); | return btoa(bin); | ||
} | } | ||
async function fileToCanvasExif(file){ | // EXIF-korrekte Canvas-Erzeugung | ||
async function fileToCanvasExif(file) { | |||
if ('createImageBitmap' in window) { | if ('createImageBitmap' in window) { | ||
const bmp = await createImageBitmap(file, { imageOrientation: 'from-image' }); | const bmp = await createImageBitmap(file, { imageOrientation: 'from-image' }); | ||
| Zeile 47: | Zeile 55: | ||
return c; | return c; | ||
} | } | ||
} | } | ||
// Fallback klassisch | |||
const url = URL.createObjectURL(file); | |||
try { | |||
const img = await new Promise((res, rej) => { | |||
const im = new Image(); | |||
im.onload = () => res(im); | |||
im.onerror = rej; | |||
im.src = url; | |||
}); | |||
const c = document.createElement('canvas'); | |||
c.width = img.width; c.height = img.height; | |||
c.getContext('2d').drawImage(img, 0, 0); | |||
return c; | |||
} finally { | |||
URL.revokeObjectURL(url); | |||
} | } | ||
} | } | ||
// ---------- Transformers laden (einmalig) ---------- | |||
let _modelPromise; | let _modelPromise; | ||
async function ensureModel(){ | async function ensureModel() { | ||
if (_modelPromise) return _modelPromise; | if (_modelPromise) return _modelPromise; | ||
_modelPromise = (async()=>{ | _modelPromise = (async () => { | ||
const mod = await import(/* webpackIgnore: true */ | const mod = await import(/* webpackIgnore: true */ TRANSFORMERS_URL); | ||
mod.env.allowLocalModels = true; | mod.env.allowLocalModels = true; | ||
| Zeile 76: | Zeile 85: | ||
mod.env.localModelPath = LOCAL_MODEL_PATH; | mod.env.localModelPath = LOCAL_MODEL_PATH; | ||
// Optional WebGPU bevorzugen: | |||
// mod.env.backends = mod.env.backends || {}; | |||
// mod.env.backends.onnx = mod.env.backends.onnx || {}; | |||
// mod.env.backends.onnx.preferredBackend = 'webgpu'; | |||
// WASM-Runtime-Pfade | |||
mod.env.backends = mod.env.backends || {}; | mod.env.backends = mod.env.backends || {}; | ||
mod.env.backends.onnx = mod.env.backends.onnx || {}; | mod.env.backends.onnx = mod.env.backends.onnx || {}; | ||
| Zeile 84: | Zeile 99: | ||
const [processor, model] = await Promise.all([ | const [processor, model] = await Promise.all([ | ||
mod.AutoProcessor.from_pretrained(MODEL_ID), | mod.AutoProcessor.from_pretrained(MODEL_ID), | ||
mod.CLIPVisionModelWithProjection.from_pretrained(MODEL_ID, { quantized: true }) | mod.CLIPVisionModelWithProjection.from_pretrained(MODEL_ID, { quantized: true }), | ||
]); | ]); | ||
| Zeile 93: | Zeile 108: | ||
} | } | ||
async function buildEmbeddingFromFile(file){ | async function buildEmbeddingFromFile(file) { | ||
const { mod, processor, model } = await ensureModel(); | const { mod, processor, model } = await ensureModel(); | ||
const canvas = await fileToCanvasExif(file); | const canvas = await fileToCanvasExif(file); | ||
const blob = (canvas.convertToBlob) | const blob = (canvas.convertToBlob) | ||
? await canvas.convertToBlob({ type:'image/jpeg', quality:0.95 }) | ? await canvas.convertToBlob({ type: 'image/jpeg', quality: 0.95 }) | ||
: await new Promise(r => canvas.toBlob(r, 'image/jpeg', 0.95)); | : await new Promise((r) => canvas.toBlob(r, 'image/jpeg', 0.95)); | ||
const raw = await mod.RawImage.fromBlob(blob); | const raw = await mod.RawImage.fromBlob(blob); | ||
const inputs = await processor(raw, { return_tensors: 'pt' }); | const inputs = await processor(raw, { return_tensors: 'pt' }); | ||
const out = await model.forward({ pixel_values: inputs.pixel_values }); | const out = await model.forward({ pixel_values: inputs.pixel_values }); | ||
const vec = out?.image_embeds?.data || out?.image_embeds; | const vec = out?.image_embeds?.data || out?.image_embeds; | ||
if (!(vec instanceof Float32Array)) throw new Error('Embedding-Format unerwartet'); | if (!(vec instanceof Float32Array)) throw new Error('Embedding-Format unerwartet'); | ||
let n=0; for(let i=0;i<vec.length;i++) n+=vec[i]*vec[i]; | // Normieren | ||
const norm = Math.sqrt(n)||1; | let n = 0; for (let i = 0; i < vec.length; i++) n += vec[i] * vec[i]; | ||
const norm = Math.sqrt(n) || 1; | |||
const v = new Float32Array(vec.length); | const v = new Float32Array(vec.length); | ||
for(let i=0;i<vec.length;i++) v[i]=vec[i]/norm; | for (let i = 0; i < vec.length; i++) v[i] = vec[i] / norm; | ||
return v; | return v; | ||
} | } | ||
async function fetchIndexJSON(){ | // ---------- Index laden/speichern ---------- | ||
const url = mw.util.getUrl(INDEX_TITLE, { action:'raw', ctype:'application/json' }); | async function fetchIndexJSON() { | ||
const res = await fetch(url, { cache:'no-store' }); | const url = mw.util.getUrl(INDEX_TITLE, { action: 'raw', ctype: 'application/json' }); | ||
if (!res.ok) throw new Error('Index nicht ladbar: '+res.status); | const res = await fetch(url, { cache: 'no-store' }); | ||
if (!res.ok) throw new Error('Index nicht ladbar: ' + res.status); | |||
try { return JSON.parse(await res.text()) || []; } | try { return JSON.parse(await res.text()) || []; } | ||
catch(_){ return []; } | catch (_) { return []; } | ||
} | } | ||
async function saveIndexJSON(newArray){ | async function saveIndexJSON(newArray, summary) { | ||
await mw.loader.using(['mediawiki.api']); | await mw.loader.using(['mediawiki.api']); | ||
const api = new mw.Api(); | const api = new mw.Api(); | ||
const text = JSON.stringify(newArray, null, 2) + '\n'; | const text = JSON.stringify(newArray, null, 2) + '\n'; | ||
return api.postWithToken('csrf', { | return api.postWithToken('csrf', { | ||
action: 'edit', | action: 'edit', | ||
title: INDEX_TITLE, | title: INDEX_TITLE, | ||
text, | text, | ||
summary: 'LabelScan: +1 embedding (Auto-Indexer)', | summary: summary || 'LabelScan: +1 embedding (Auto-Indexer)', | ||
nocreate: 0, | nocreate: 0, | ||
bot: 1 | bot: 1 | ||
| Zeile 136: | Zeile 151: | ||
} | } | ||
// ---------- Click-Handler ---------- | |||
try{ | const runBtn = document.getElementById('idx-run'); | ||
if (! | if (!runBtn) { | ||
console.warn('[LabelScanIndexer] Button #idx-run nicht gefunden – ist das HTML auf der Seite eingebunden?'); | |||
return; | |||
} | |||
runBtn.addEventListener('click', async () => { | |||
try { | |||
if (!hasInterfaceRight()) { | |||
alert('⚠️ Du brauchst Admin/Interface-Rechte (editinterface).'); | |||
return; | |||
} | |||
const title = $('idx-title').value.trim(); | const title = $('idx-title')?.value.trim(); | ||
const thumb = $('idx-thumb').value.trim(); | const thumb = $('idx-thumb')?.value.trim(); | ||
const file | const file = $('idx-file')?.files?.[0]; | ||
if (!title) return alert('Titel fehlt.'); | if (!title) return alert('Titel fehlt.'); | ||
if (!file) | if (!file) return alert('Bitte eine Bilddatei wählen.'); | ||
runBtn.disabled = true; | |||
status('Embedding berechnen …'); | status('Embedding berechnen …'); | ||
| Zeile 151: | Zeile 178: | ||
const b64 = float32ToBase64(vec); | const b64 = float32ToBase64(vec); | ||
$('idx-out').value = JSON.stringify({title, thumb, embed:b64}, null, 2); | $('idx-out').value = JSON.stringify({ title, thumb, embed: b64 }, null, 2); | ||
status('Index laden …'); | |||
const arr = await fetchIndexJSON(); | const arr = await fetchIndexJSON(); | ||
arr.push({ title, thumb, embed:b64 }); | arr.push({ title, thumb, embed: b64 }); | ||
status('Speichern …'); | status('Speichern …'); | ||
await saveIndexJSON(arr); | await saveIndexJSON(arr, `LabelScan: +1 embedding für "${title}"`); | ||
status('Gespeichert ✅'); | status('Gespeichert ✅'); | ||
} catch(e){ | } catch (e) { | ||
console.error(e); | console.error(e); | ||
status('Fehler ❌ ' + (e?.message || e)); | |||
alert('Fehler: ' + (e?.message || e)); | |||
} finally { | |||
runBtn.disabled = false; | |||
} | } | ||
}); | }); | ||
Version vom 9. November 2025, 16:26 Uhr
/* Gadget: LabelScanIndexer (Auto-Save)
* Läuft nur auf der Seite "Hilfe:LabelScan-Indexer" (Namespace Help = 12)
* Erzeugt Embeddings lokal (CLIP) und speichert in MediaWiki:Gadget-LabelScan-index.json
*/
/* global mw */
(function () {
// ---------- Seitenerkennung (robust) ----------
const NS = mw.config.get('wgNamespaceNumber'); // 12 = Help/Hilfe
const TITLE = mw.config.get('wgTitle'); // nur der Titel ohne Namespace
if (!(NS === 12 && TITLE === 'LabelScan-Indexer')) {
// Debug-Hinweis, falls du auf der falschen Seite testest
// console.debug('[LabelScanIndexer] nicht aktiv auf', NS, TITLE);
return;
}
const INDEX_TITLE = 'MediaWiki:Gadget-LabelScan-index.json';
// ---------- Modell / Pfade ----------
const TRANSFORMERS_URL = 'https://cdn.jsdelivr.net/npm/@xenova/transformers@2.15.0';
const MODEL_ID = 'Xenova/clip-vit-base-patch32';
const LOCAL_MODEL_PATH = '/models';
// ---------- UI helpers ----------
const $ = (id) => document.getElementById(id);
const status = (t) => { const el = $('idx-status'); if (el) el.textContent = t || ''; };
function hasInterfaceRight() {
const groups = mw.config.get('wgUserGroups') || [];
return groups.includes('interface-admin') || groups.includes('sysop');
}
function float32ToBase64(vec) {
const bytes = new Uint8Array(vec.buffer);
let bin = '', chunk = 0x8000;
for (let i = 0; i < bytes.length; i += chunk) {
bin += String.fromCharCode.apply(null, bytes.subarray(i, i + chunk));
}
return btoa(bin);
}
// EXIF-korrekte Canvas-Erzeugung
async function fileToCanvasExif(file) {
if ('createImageBitmap' in window) {
const bmp = await createImageBitmap(file, { imageOrientation: 'from-image' });
if ('OffscreenCanvas' in window) {
const c = new OffscreenCanvas(bmp.width, bmp.height);
c.getContext('2d').drawImage(bmp, 0, 0);
return c;
} else {
const c = document.createElement('canvas');
c.width = bmp.width; c.height = bmp.height;
c.getContext('2d').drawImage(bmp, 0, 0);
return c;
}
}
// Fallback klassisch
const url = URL.createObjectURL(file);
try {
const img = await new Promise((res, rej) => {
const im = new Image();
im.onload = () => res(im);
im.onerror = rej;
im.src = url;
});
const c = document.createElement('canvas');
c.width = img.width; c.height = img.height;
c.getContext('2d').drawImage(img, 0, 0);
return c;
} finally {
URL.revokeObjectURL(url);
}
}
// ---------- Transformers laden (einmalig) ----------
let _modelPromise;
async function ensureModel() {
if (_modelPromise) return _modelPromise;
_modelPromise = (async () => {
const mod = await import(/* webpackIgnore: true */ TRANSFORMERS_URL);
mod.env.allowLocalModels = true;
mod.env.allowRemoteModels = false;
mod.env.localModelPath = LOCAL_MODEL_PATH;
// Optional WebGPU bevorzugen:
// mod.env.backends = mod.env.backends || {};
// mod.env.backends.onnx = mod.env.backends.onnx || {};
// mod.env.backends.onnx.preferredBackend = 'webgpu';
// WASM-Runtime-Pfade
mod.env.backends = mod.env.backends || {};
mod.env.backends.onnx = mod.env.backends.onnx || {};
mod.env.backends.onnx.wasm = mod.env.backends.onnx.wasm || {};
mod.env.backends.onnx.wasm.wasmPaths =
'https://cdn.jsdelivr.net/npm/@xenova/transformers@2.15.0/dist/';
const [processor, model] = await Promise.all([
mod.AutoProcessor.from_pretrained(MODEL_ID),
mod.CLIPVisionModelWithProjection.from_pretrained(MODEL_ID, { quantized: true }),
]);
console.log('[LabelScanIndexer] Modell geladen');
return { mod, processor, model };
})();
return _modelPromise;
}
async function buildEmbeddingFromFile(file) {
const { mod, processor, model } = await ensureModel();
const canvas = await fileToCanvasExif(file);
const blob = (canvas.convertToBlob)
? await canvas.convertToBlob({ type: 'image/jpeg', quality: 0.95 })
: await new Promise((r) => canvas.toBlob(r, 'image/jpeg', 0.95));
const raw = await mod.RawImage.fromBlob(blob);
const inputs = await processor(raw, { return_tensors: 'pt' });
const out = await model.forward({ pixel_values: inputs.pixel_values });
const vec = out?.image_embeds?.data || out?.image_embeds;
if (!(vec instanceof Float32Array)) throw new Error('Embedding-Format unerwartet');
// Normieren
let n = 0; for (let i = 0; i < vec.length; i++) n += vec[i] * vec[i];
const norm = Math.sqrt(n) || 1;
const v = new Float32Array(vec.length);
for (let i = 0; i < vec.length; i++) v[i] = vec[i] / norm;
return v;
}
// ---------- Index laden/speichern ----------
async function fetchIndexJSON() {
const url = mw.util.getUrl(INDEX_TITLE, { action: 'raw', ctype: 'application/json' });
const res = await fetch(url, { cache: 'no-store' });
if (!res.ok) throw new Error('Index nicht ladbar: ' + res.status);
try { return JSON.parse(await res.text()) || []; }
catch (_) { return []; }
}
async function saveIndexJSON(newArray, summary) {
await mw.loader.using(['mediawiki.api']);
const api = new mw.Api();
const text = JSON.stringify(newArray, null, 2) + '\n';
return api.postWithToken('csrf', {
action: 'edit',
title: INDEX_TITLE,
text,
summary: summary || 'LabelScan: +1 embedding (Auto-Indexer)',
nocreate: 0,
bot: 1
});
}
// ---------- Click-Handler ----------
const runBtn = document.getElementById('idx-run');
if (!runBtn) {
console.warn('[LabelScanIndexer] Button #idx-run nicht gefunden – ist das HTML auf der Seite eingebunden?');
return;
}
runBtn.addEventListener('click', async () => {
try {
if (!hasInterfaceRight()) {
alert('⚠️ Du brauchst Admin/Interface-Rechte (editinterface).');
return;
}
const title = $('idx-title')?.value.trim();
const thumb = $('idx-thumb')?.value.trim();
const file = $('idx-file')?.files?.[0];
if (!title) return alert('Titel fehlt.');
if (!file) return alert('Bitte eine Bilddatei wählen.');
runBtn.disabled = true;
status('Embedding berechnen …');
const vec = await buildEmbeddingFromFile(file);
const b64 = float32ToBase64(vec);
$('idx-out').value = JSON.stringify({ title, thumb, embed: b64 }, null, 2);
status('Index laden …');
const arr = await fetchIndexJSON();
arr.push({ title, thumb, embed: b64 });
status('Speichern …');
await saveIndexJSON(arr, `LabelScan: +1 embedding für "${title}"`);
status('Gespeichert ✅');
} catch (e) {
console.error(e);
status('Fehler ❌ ' + (e?.message || e));
alert('Fehler: ' + (e?.message || e));
} finally {
runBtn.disabled = false;
}
});
console.log('[LabelScanIndexer] bereit');
})();