Hilfe:LabelScan-Indexer
📦 LabelScan – Indexer (Auto-Save)
Erzeugt Embeddings lokal im Browser (CLIP) und schreibt sie automatisch nach MediaWiki:Gadget-LabelScan-index.json.
<label>Artikel-Titel (genau wie im Wiki): <input id="idx-title" type="text" style="width:100%;padding:.5rem;margin:.25rem 0 .75rem;border:1px solid #ddd;border-radius:8px"> </label>
<label>Thumb-URL (optional, 120–300px breit): <input id="idx-thumb" type="url" placeholder="" style="width:100%;padding:.5rem;margin:.25rem 0 .75rem;border:1px solid #ddd;border-radius:8px"> </label>
<label>Bilddatei (Frontlabel-Foto): <input id="idx-file" type="file" accept="image/*" style="display:block;margin:.25rem 0 .75rem;"> </label>
<button id="idx-run" style="padding:.6rem .9rem;border-radius:10px;background:#2a4b8d;color:#fff;border:none;cursor:pointer">Embedding erzeugen & speichern</button>
Zuletzt erzeugter JSON-Eintrag
<textarea id="idx-out" rows="5" style="width:100%;font-family:ui-monospace,Consolas,monospace;padding:.6rem;border:1px solid #ddd;border-radius:10px"></textarea>
<script> /* global mw */ (function(){
const INDEX_TITLE = 'MediaWiki:Gadget-LabelScan-index.json';
// ---- Modell-Config (wie in deinem Gadget) ---- const transformersURL = 'https://cdn.jsdelivr.net/npm/@xenova/transformers@2.15.0'; const MODEL_ID = 'Xenova/clip-vit-base-patch32'; const LOCAL_MODEL_PATH = '/models';
// ---- UI helpers ----
const $ = id => document.getElementById(id);
const status = (t) => { const el=$('idx-status'); if(el) el.textContent=t||; };
// Rechtecheck
function hasSysop(){
const groups = mw.config.get('wgUserGroups') || [];
return groups.includes('sysop') || groups.includes('interface-admin');
}
// Float32 → base64
function float32ToBase64(vec){
const bytes = new Uint8Array(vec.buffer);
let bin = , chunk = 0x8000;
for (let i=0; i<bytes.length; i+=chunk) {
bin += String.fromCharCode.apply(null, bytes.subarray(i, i+chunk));
}
return btoa(bin);
}
// Optional: EXIF-korrekte Canvas-Erzeugung (Fallback ohne OffscreenCanvas)
async function fileToCanvasExif(file){
if ('createImageBitmap' in window) {
const bmp = await createImageBitmap(file, { imageOrientation: 'from-image' });
// OffscreenCanvas bevorzugen, fallback auf <canvas>
if ('OffscreenCanvas' in window) {
const c = new OffscreenCanvas(bmp.width, bmp.height);
c.getContext('2d').drawImage(bmp, 0, 0);
return c;
} else {
const c = document.createElement('canvas');
c.width = bmp.width; c.height = bmp.height;
c.getContext('2d').drawImage(bmp, 0, 0);
return c;
}
} else {
// klassischer Weg
const url = URL.createObjectURL(file);
try {
const img = await new Promise((res, rej)=>{
const im = new Image();
im.onload = ()=>res(im);
im.onerror = rej;
im.src = url;
});
const c = document.createElement('canvas');
c.width = img.width; c.height = img.height;
c.getContext('2d').drawImage(img, 0, 0);
return c;
} finally {
URL.revokeObjectURL(url);
}
}
}
// ---- Transformers laden (einmalig) ----
let _load;
async function ensureModel(){
if (_load) return _load;
_load = (async()=>{
const mod = await import(/* webpackIgnore: true */ transformersURL);
// Nur lokale Modelle (wie beim Gadget)
mod.env.allowLocalModels = true;
mod.env.allowRemoteModels = false;
mod.env.localModelPath = LOCAL_MODEL_PATH;
// (Optional) WebGPU bevorzugen – fallback bleibt wasm
// mod.env.backends = mod.env.backends || {};
// mod.env.backends.onnx = mod.env.backends.onnx || {};
// mod.env.backends.onnx.preferredBackend = 'webgpu';
// WASM-Runtime-Pfad (ort-wasm-simd.wasm)
mod.env.backends = mod.env.backends || {};
mod.env.backends.onnx = mod.env.backends.onnx || {};
mod.env.backends.onnx.wasm = mod.env.backends.onnx.wasm || {};
mod.env.backends.onnx.wasm.wasmPaths =
'https://cdn.jsdelivr.net/npm/@xenova/transformers@2.15.0/dist/';
const [processor, model] = await Promise.all([
mod.AutoProcessor.from_pretrained(MODEL_ID),
mod.CLIPVisionModelWithProjection.from_pretrained(MODEL_ID, { quantized: true })
]);
return { mod, processor, model };
})();
return _load;
}
async function buildEmbeddingFromFile(file){
const { mod, processor, model } = await ensureModel();
// Canvas (EXIF-korrigiert)
const canvas = await fileToCanvasExif(file);
// Canvas → Blob → RawImage (robust für Processor)
const blob = (canvas.convertToBlob)
? await canvas.convertToBlob({ type:'image/jpeg', quality:0.95 })
: await new Promise(r => canvas.toBlob(r, 'image/jpeg', 0.95));
const imageRaw = await mod.RawImage.fromBlob(blob);
const inputs = await processor(imageRaw, { return_tensors: 'pt' });
const out = await model.forward({ pixel_values: inputs.pixel_values });
const vec = out?.image_embeds?.data || out?.image_embeds;
if (!(vec instanceof Float32Array)) throw new Error('Embedding-Format unerwartet');
// Normieren let n=0; for(let i=0;i<vec.length;i++) n+=vec[i]*vec[i]; const norm = Math.sqrt(n)||1; const v = new Float32Array(vec.length); for(let i=0;i<vec.length;i++) v[i]=vec[i]/norm; return v; }
// ---- Index laden & speichern ----
async function fetchIndexJSON(){
const url = mw.util.getUrl(INDEX_TITLE, { action:'raw', ctype:'application/json' });
const res = await fetch(url, { cache: 'no-store' });
if (!res.ok) throw new Error('Index nicht ladbar: '+res.status);
const txt = await res.text();
// robust gegen leere/kaputte Inhalte
let arr;
try { arr = JSON.parse(txt || '[]'); }
catch(_){ arr = []; }
if (!Array.isArray(arr)) arr = [];
return arr;
}
async function saveIndexJSON(newArray, summary){
await mw.loader.using(['mediawiki.api']);
const api = new mw.Api();
// Hole aktuelle Seite, um Timestamp für Konflikt-Schutz zu haben
const meta = await api.get({
action: 'query',
prop: 'revisions',
titles: INDEX_TITLE,
rvprop: 'timestamp|content',
format: 'json'
});
const pages = meta?.query?.pages || {};
const page = pages[Object.keys(pages)[0]];
const baseTimestamp = page?.revisions?.[0]?.timestamp;
const text = JSON.stringify(newArray, null, 2) + '\n';
try {
const res = await api.postWithToken('csrf', {
action: 'edit',
title: INDEX_TITLE,
text,
summary: summary || 'LabelScan: +1 embedding (Auto-Indexer)',
nocreate: 0,
bot: 1,
basetimestamp: baseTimestamp
});
return res;
} catch (e) {
// einfacher Retry bei Konflikt: neu holen und erneut schreiben
if ((e?.details||).includes('editconflict')) {
const fresh = await fetchIndexJSON();
const merged = mergeArraysUnique(fresh, newArray); // simple Merge, Dedupe
const text2 = JSON.stringify(merged, null, 2) + '\n';
return api.postWithToken('csrf', {
action: 'edit',
title: INDEX_TITLE,
text: text2,
summary: (summary || 'LabelScan: +1 embedding (Auto-Indexer)') + ' (merge)',
nocreate: 0,
bot: 1
});
}
throw e;
}
}
// einfache Duplikat-Entfernung (identische title+embed)
function mergeArraysUnique(base, add){
const seen = new Set(base.map(x => (x.title||)+'|'+(x.embed||)));
for (const it of add) {
const key = (it.title||)+'|'+(it.embed||);
if (!seen.has(key)) { base.push(it); seen.add(key); }
}
return base;
}
// ---- Klick-Handler ----
$('idx-run').addEventListener('click', async ()=>{
try{
if (!hasSysop()) {
alert('Du brauchst Admin-Rechte (sysop/interface-admin), um den Index automatisch zu speichern.');
return;
}
const title = $('idx-title').value.trim();
const thumb = $('idx-thumb').value.trim();
const file = $('idx-file').files?.[0];
if (!title) return alert('Bitte Artikel-Titel eingeben.');
if (!file) return alert('Bitte Bilddatei wählen.');
status('Modell laden …');
await ensureModel();
status('Embedding berechnen …');
const vec = await buildEmbeddingFromFile(file);
const b64 = float32ToBase64(vec);
// Vorschau
$('idx-preview').innerHTML = ;
const u = URL.createObjectURL(file);
const img = document.createElement('img');
img.src = u; img.style.maxWidth='280px'; img.style.borderRadius='10px';
$('idx-preview').appendChild(img);
const newRow = { title, thumb: thumb || , embed: b64 };
$('idx-out').value = JSON.stringify(newRow);
status('Index laden …');
const arr = await fetchIndexJSON();
const merged = mergeArraysUnique(arr, [newRow]);
status('Speichern …');
await saveIndexJSON(merged, `LabelScan: +1 embedding für "${title}"`);
status('Gespeichert ✅');
} catch(e){
console.error(e);
status('Fehler: ' + (e?.message || e));
alert('Fehler beim Speichern:\n' + (e?.message || e));
}
});
})(); </script>