|
|
| Zeile 2: |
Zeile 2: |
| <div class="box" style="max-width:820px;margin:1rem auto;padding:1rem;border:1px solid #e5e7eb;border-radius:12px;"> | | <div class="box" style="max-width:820px;margin:1rem auto;padding:1rem;border:1px solid #e5e7eb;border-radius:12px;"> |
| <h2>📦 LabelScan – Indexer (Auto-Save)</h2> | | <h2>📦 LabelScan – Indexer (Auto-Save)</h2> |
| <p>Erzeugt Embeddings lokal im Browser (CLIP) und schreibt sie automatisch nach <code>MediaWiki:Gadget-LabelScan-index.json</code>.</p>
| |
|
| |
|
| <label><b>Artikel-Titel</b> (genau wie im Wiki): | | <label><b>Artikel-Titel</b><br> |
| <input id="idx-title" type="text" style="width:100%;padding:.5rem;margin:.25rem 0 .75rem;border:1px solid #ddd;border-radius:8px"> | | <input id="idx-title" type="text" style="width:100%;padding:.5rem;margin:.3rem 0;border:1px solid #ddd;border-radius:8px"> |
| </label> | | </label> |
|
| |
|
| <label><b>Thumb-URL</b> (optional, 120–300px breit): | | <label><b>Thumb-URL</b> (optional)<br> |
| <input id="idx-thumb" type="url" placeholder="https://ados-wiki.de/images/.../thumb.jpg" | | <input id="idx-thumb" type="url" style="width:100%;padding:.5rem;margin:.3rem 0;border:1px solid #ddd;border-radius:8px"> |
| style="width:100%;padding:.5rem;margin:.25rem 0 .75rem;border:1px solid #ddd;border-radius:8px">
| |
| </label> | | </label> |
|
| |
|
| <label><b>Bilddatei</b> (Frontlabel-Foto): | | <label><b>Bilddatei</b><br> |
| <input id="idx-file" type="file" accept="image/*" style="display:block;margin:.25rem 0 .75rem;"> | | <input id="idx-file" type="file" accept="image/*" style="display:block;margin:.3rem 0 1rem;"> |
| </label> | | </label> |
|
| |
|
| <button id="idx-run" style="padding:.6rem .9rem;border-radius:10px;background:#2a4b8d;color:#fff;border:none;cursor:pointer">Embedding erzeugen & speichern</button> | | <button id="idx-run" style="padding:.6rem .9rem;border-radius:10px;background:#2a4b8d;color:#fff;border:none;cursor:pointer"> |
| | Embedding erzeugen & speichern |
| | </button> |
| <span id="idx-status" style="margin-left:.75rem;color:#555;"></span> | | <span id="idx-status" style="margin-left:.75rem;color:#555;"></span> |
|
| |
|
| Zeile 23: |
Zeile 23: |
|
| |
|
| <h3>Zuletzt erzeugter JSON-Eintrag</h3> | | <h3>Zuletzt erzeugter JSON-Eintrag</h3> |
| <textarea id="idx-out" rows="5" style="width:100%;font-family:ui-monospace,Consolas,monospace;padding:.6rem;border:1px solid #ddd;border-radius:10px"></textarea> | | <textarea id="idx-out" rows="5" style="width:100%;font-family:monospace;padding:.6rem;border:1px solid #ddd;border-radius:10px"></textarea> |
| </div> | | </div> |
|
| |
| <script>
| |
| /* global mw */
| |
| (function(){
| |
| const INDEX_TITLE = 'MediaWiki:Gadget-LabelScan-index.json';
| |
|
| |
| // ---- Modell-Config (wie in deinem Gadget) ----
| |
| const transformersURL = 'https://cdn.jsdelivr.net/npm/@xenova/transformers@2.15.0';
| |
| const MODEL_ID = 'Xenova/clip-vit-base-patch32';
| |
| const LOCAL_MODEL_PATH = '/models';
| |
|
| |
| // ---- UI helpers ----
| |
| const $ = id => document.getElementById(id);
| |
| const status = (t) => { const el=$('idx-status'); if(el) el.textContent=t||''; };
| |
|
| |
| // Rechtecheck
| |
| function hasSysop(){
| |
| const groups = mw.config.get('wgUserGroups') || [];
| |
| return groups.includes('sysop') || groups.includes('interface-admin');
| |
| }
| |
|
| |
| // Float32 → base64
| |
| function float32ToBase64(vec){
| |
| const bytes = new Uint8Array(vec.buffer);
| |
| let bin = '', chunk = 0x8000;
| |
| for (let i=0; i<bytes.length; i+=chunk) {
| |
| bin += String.fromCharCode.apply(null, bytes.subarray(i, i+chunk));
| |
| }
| |
| return btoa(bin);
| |
| }
| |
|
| |
| // Optional: EXIF-korrekte Canvas-Erzeugung (Fallback ohne OffscreenCanvas)
| |
| async function fileToCanvasExif(file){
| |
| if ('createImageBitmap' in window) {
| |
| const bmp = await createImageBitmap(file, { imageOrientation: 'from-image' });
| |
| // OffscreenCanvas bevorzugen, fallback auf <canvas>
| |
| if ('OffscreenCanvas' in window) {
| |
| const c = new OffscreenCanvas(bmp.width, bmp.height);
| |
| c.getContext('2d').drawImage(bmp, 0, 0);
| |
| return c;
| |
| } else {
| |
| const c = document.createElement('canvas');
| |
| c.width = bmp.width; c.height = bmp.height;
| |
| c.getContext('2d').drawImage(bmp, 0, 0);
| |
| return c;
| |
| }
| |
| } else {
| |
| // klassischer Weg
| |
| const url = URL.createObjectURL(file);
| |
| try {
| |
| const img = await new Promise((res, rej)=>{
| |
| const im = new Image();
| |
| im.onload = ()=>res(im);
| |
| im.onerror = rej;
| |
| im.src = url;
| |
| });
| |
| const c = document.createElement('canvas');
| |
| c.width = img.width; c.height = img.height;
| |
| c.getContext('2d').drawImage(img, 0, 0);
| |
| return c;
| |
| } finally {
| |
| URL.revokeObjectURL(url);
| |
| }
| |
| }
| |
| }
| |
|
| |
| // ---- Transformers laden (einmalig) ----
| |
| let _load;
| |
| async function ensureModel(){
| |
| if (_load) return _load;
| |
| _load = (async()=>{
| |
| const mod = await import(/* webpackIgnore: true */ transformersURL);
| |
|
| |
| // Nur lokale Modelle (wie beim Gadget)
| |
| mod.env.allowLocalModels = true;
| |
| mod.env.allowRemoteModels = false;
| |
| mod.env.localModelPath = LOCAL_MODEL_PATH;
| |
|
| |
| // (Optional) WebGPU bevorzugen – fallback bleibt wasm
| |
| // mod.env.backends = mod.env.backends || {};
| |
| // mod.env.backends.onnx = mod.env.backends.onnx || {};
| |
| // mod.env.backends.onnx.preferredBackend = 'webgpu';
| |
|
| |
| // WASM-Runtime-Pfad (ort-wasm-simd.wasm)
| |
| mod.env.backends = mod.env.backends || {};
| |
| mod.env.backends.onnx = mod.env.backends.onnx || {};
| |
| mod.env.backends.onnx.wasm = mod.env.backends.onnx.wasm || {};
| |
| mod.env.backends.onnx.wasm.wasmPaths =
| |
| 'https://cdn.jsdelivr.net/npm/@xenova/transformers@2.15.0/dist/';
| |
|
| |
| const [processor, model] = await Promise.all([
| |
| mod.AutoProcessor.from_pretrained(MODEL_ID),
| |
| mod.CLIPVisionModelWithProjection.from_pretrained(MODEL_ID, { quantized: true })
| |
| ]);
| |
|
| |
| return { mod, processor, model };
| |
| })();
| |
| return _load;
| |
| }
| |
|
| |
| async function buildEmbeddingFromFile(file){
| |
| const { mod, processor, model } = await ensureModel();
| |
|
| |
| // Canvas (EXIF-korrigiert)
| |
| const canvas = await fileToCanvasExif(file);
| |
| // Canvas → Blob → RawImage (robust für Processor)
| |
| const blob = (canvas.convertToBlob)
| |
| ? await canvas.convertToBlob({ type:'image/jpeg', quality:0.95 })
| |
| : await new Promise(r => canvas.toBlob(r, 'image/jpeg', 0.95));
| |
|
| |
| const imageRaw = await mod.RawImage.fromBlob(blob);
| |
| const inputs = await processor(imageRaw, { return_tensors: 'pt' });
| |
| const out = await model.forward({ pixel_values: inputs.pixel_values });
| |
| const vec = out?.image_embeds?.data || out?.image_embeds;
| |
| if (!(vec instanceof Float32Array)) throw new Error('Embedding-Format unerwartet');
| |
|
| |
| // Normieren
| |
| let n=0; for(let i=0;i<vec.length;i++) n+=vec[i]*vec[i];
| |
| const norm = Math.sqrt(n)||1;
| |
| const v = new Float32Array(vec.length);
| |
| for(let i=0;i<vec.length;i++) v[i]=vec[i]/norm;
| |
| return v;
| |
| }
| |
|
| |
| // ---- Index laden & speichern ----
| |
| async function fetchIndexJSON(){
| |
| const url = mw.util.getUrl(INDEX_TITLE, { action:'raw', ctype:'application/json' });
| |
| const res = await fetch(url, { cache: 'no-store' });
| |
| if (!res.ok) throw new Error('Index nicht ladbar: '+res.status);
| |
| const txt = await res.text();
| |
| // robust gegen leere/kaputte Inhalte
| |
| let arr;
| |
| try { arr = JSON.parse(txt || '[]'); }
| |
| catch(_){ arr = []; }
| |
| if (!Array.isArray(arr)) arr = [];
| |
| return arr;
| |
| }
| |
|
| |
| async function saveIndexJSON(newArray, summary){
| |
| await mw.loader.using(['mediawiki.api']);
| |
| const api = new mw.Api();
| |
|
| |
| // Hole aktuelle Seite, um Timestamp für Konflikt-Schutz zu haben
| |
| const meta = await api.get({
| |
| action: 'query',
| |
| prop: 'revisions',
| |
| titles: INDEX_TITLE,
| |
| rvprop: 'timestamp|content',
| |
| format: 'json'
| |
| });
| |
|
| |
| const pages = meta?.query?.pages || {};
| |
| const page = pages[Object.keys(pages)[0]];
| |
| const baseTimestamp = page?.revisions?.[0]?.timestamp;
| |
|
| |
| const text = JSON.stringify(newArray, null, 2) + '\n';
| |
|
| |
| try {
| |
| const res = await api.postWithToken('csrf', {
| |
| action: 'edit',
| |
| title: INDEX_TITLE,
| |
| text,
| |
| summary: summary || 'LabelScan: +1 embedding (Auto-Indexer)',
| |
| nocreate: 0,
| |
| bot: 1,
| |
| basetimestamp: baseTimestamp
| |
| });
| |
| return res;
| |
| } catch (e) {
| |
| // einfacher Retry bei Konflikt: neu holen und erneut schreiben
| |
| if ((e?.details||'').includes('editconflict')) {
| |
| const fresh = await fetchIndexJSON();
| |
| const merged = mergeArraysUnique(fresh, newArray); // simple Merge, Dedupe
| |
| const text2 = JSON.stringify(merged, null, 2) + '\n';
| |
| return api.postWithToken('csrf', {
| |
| action: 'edit',
| |
| title: INDEX_TITLE,
| |
| text: text2,
| |
| summary: (summary || 'LabelScan: +1 embedding (Auto-Indexer)') + ' (merge)',
| |
| nocreate: 0,
| |
| bot: 1
| |
| });
| |
| }
| |
| throw e;
| |
| }
| |
| }
| |
|
| |
| // einfache Duplikat-Entfernung (identische title+embed)
| |
| function mergeArraysUnique(base, add){
| |
| const seen = new Set(base.map(x => (x.title||'')+'|'+(x.embed||'')));
| |
| for (const it of add) {
| |
| const key = (it.title||'')+'|'+(it.embed||'');
| |
| if (!seen.has(key)) { base.push(it); seen.add(key); }
| |
| }
| |
| return base;
| |
| }
| |
|
| |
| // ---- Klick-Handler ----
| |
| $('idx-run').addEventListener('click', async ()=>{
| |
| try{
| |
| if (!hasSysop()) {
| |
| alert('Du brauchst Admin-Rechte (sysop/interface-admin), um den Index automatisch zu speichern.');
| |
| return;
| |
| }
| |
|
| |
| const title = $('idx-title').value.trim();
| |
| const thumb = $('idx-thumb').value.trim();
| |
| const file = $('idx-file').files?.[0];
| |
|
| |
| if (!title) return alert('Bitte Artikel-Titel eingeben.');
| |
| if (!file) return alert('Bitte Bilddatei wählen.');
| |
|
| |
| status('Modell laden …');
| |
| await ensureModel();
| |
|
| |
| status('Embedding berechnen …');
| |
| const vec = await buildEmbeddingFromFile(file);
| |
| const b64 = float32ToBase64(vec);
| |
|
| |
| // Vorschau
| |
| $('idx-preview').innerHTML = '';
| |
| const u = URL.createObjectURL(file);
| |
| const img = document.createElement('img');
| |
| img.src = u; img.style.maxWidth='280px'; img.style.borderRadius='10px';
| |
| $('idx-preview').appendChild(img);
| |
|
| |
| const newRow = { title, thumb: thumb || '', embed: b64 };
| |
| $('idx-out').value = JSON.stringify(newRow);
| |
|
| |
| status('Index laden …');
| |
| const arr = await fetchIndexJSON();
| |
|
| |
| const merged = mergeArraysUnique(arr, [newRow]);
| |
|
| |
| status('Speichern …');
| |
| await saveIndexJSON(merged, `LabelScan: +1 embedding für "${title}"`);
| |
|
| |
| status('Gespeichert ✅');
| |
| } catch(e){
| |
| console.error(e);
| |
| status('Fehler: ' + (e?.message || e));
| |
| alert('Fehler beim Speichern:\n' + (e?.message || e));
| |
| }
| |
| });
| |
| })();
| |
| </script>
| |
| }} | | }} |