Zum Inhalt springen

MediaWiki:Gadget-LabelScanIndexer.js: Unterschied zwischen den Versionen

Aus ADOS Wiki
Die Seite wurde neu angelegt: „Gadget: LabelScanIndexer * Lädt auf der Seite Hilfe:LabelScan-Indexer * Erzeugt Embeddings lokal (CLIP) und speichert automatisch in MediaWiki:Gadget-LabelScan-index.json: if (mw.config.get('wgPageName') !== 'Hilfe:LabelScan-Indexer') { // Läuft nur auf der Indexer-Seite return; } (function(){ const INDEX_TITLE = 'MediaWiki:Gadget-LabelScan-index.json'; // Modell / Pfade (müssen zu deinem Setup passen) const transformersURL = 'http…“
 
Keine Bearbeitungszusammenfassung
Zeile 1: Zeile 1:
/* Gadget: LabelScanIndexer
/* Gadget: LabelScanIndexer (Auto-Save)
  * Lädt auf der Seite Hilfe:LabelScan-Indexer
  * Läuft nur auf der Seite "Hilfe:LabelScan-Indexer" (Namespace Help = 12)
  * Erzeugt Embeddings lokal (CLIP) und speichert automatisch in MediaWiki:Gadget-LabelScan-index.json
  * Erzeugt Embeddings lokal (CLIP) und speichert in MediaWiki:Gadget-LabelScan-index.json
  */
  */


if (mw.config.get('wgPageName') !== 'Hilfe:LabelScan-Indexer') {
/* global mw */
  // Läuft nur auf der Indexer-Seite
(function () {
  return;
  // ---------- Seitenerkennung (robust) ----------
}
  const NS = mw.config.get('wgNamespaceNumber'); // 12 = Help/Hilfe
  const TITLE = mw.config.get('wgTitle');        // nur der Titel ohne Namespace
 
  if (!(NS === 12 && TITLE === 'LabelScan-Indexer')) {
    // Debug-Hinweis, falls du auf der falschen Seite testest
    // console.debug('[LabelScanIndexer] nicht aktiv auf', NS, TITLE);
    return;
  }


(function(){
   const INDEX_TITLE = 'MediaWiki:Gadget-LabelScan-index.json';
   const INDEX_TITLE = 'MediaWiki:Gadget-LabelScan-index.json';


   // Modell / Pfade (müssen zu deinem Setup passen)
   // ---------- Modell / Pfade ----------
   const transformersURL = 'https://cdn.jsdelivr.net/npm/@xenova/transformers@2.15.0';
   const TRANSFORMERS_URL = 'https://cdn.jsdelivr.net/npm/@xenova/transformers@2.15.0';
   const MODEL_ID = 'Xenova/clip-vit-base-patch32';
   const MODEL_ID = 'Xenova/clip-vit-base-patch32';
   const LOCAL_MODEL_PATH = '/models'; // WICHTIG: Du hast deine Modelle unter /models/… liegen
   const LOCAL_MODEL_PATH = '/models';


   const $ = id => document.getElementById(id);
  // ---------- UI helpers ----------
   const status = (t) => { const el=$('idx-status'); if(el) el.textContent=t||''; };
   const $ = (id) => document.getElementById(id);
   const status = (t) => { const el = $('idx-status'); if (el) el.textContent = t || ''; };


   function hasSysop(){
   function hasInterfaceRight() {
     const g = mw.config.get('wgUserGroups') || [];
     const groups = mw.config.get('wgUserGroups') || [];
     return g.includes('sysop') || g.includes('interface-admin');
     return groups.includes('interface-admin') || groups.includes('sysop');
   }
   }


   function float32ToBase64(vec){
   function float32ToBase64(vec) {
     const bytes = new Uint8Array(vec.buffer);
     const bytes = new Uint8Array(vec.buffer);
     let bin = '', chunk = 0x8000;
     let bin = '', chunk = 0x8000;
     for (let i=0; i<bytes.length; i+=chunk) {
     for (let i = 0; i < bytes.length; i += chunk) {
       bin += String.fromCharCode.apply(null, bytes.subarray(i, i+chunk));
       bin += String.fromCharCode.apply(null, bytes.subarray(i, i + chunk));
     }
     }
     return btoa(bin);
     return btoa(bin);
   }
   }


   async function fileToCanvasExif(file){
  // EXIF-korrekte Canvas-Erzeugung
   async function fileToCanvasExif(file) {
     if ('createImageBitmap' in window) {
     if ('createImageBitmap' in window) {
       const bmp = await createImageBitmap(file, { imageOrientation: 'from-image' });
       const bmp = await createImageBitmap(file, { imageOrientation: 'from-image' });
Zeile 47: Zeile 55:
         return c;
         return c;
       }
       }
     } else {
     }
      const url = URL.createObjectURL(file);
    // Fallback klassisch
      try {
    const url = URL.createObjectURL(file);
        const img = await new Promise((res, rej)=>{
    try {
          const im = new Image();
      const img = await new Promise((res, rej) => {
          im.onload = ()=>res(im);
        const im = new Image();
          im.onerror = rej;
        im.onload = () => res(im);
          im.src = url;
        im.onerror = rej;
        });
        im.src = url;
        const c = document.createElement('canvas');
      });
        c.width = img.width; c.height = img.height;
      const c = document.createElement('canvas');
        c.getContext('2d').drawImage(img, 0, 0);
      c.width = img.width; c.height = img.height;
        return c;
      c.getContext('2d').drawImage(img, 0, 0);
      } finally {
      return c;
        URL.revokeObjectURL(url);
    } finally {
      }
      URL.revokeObjectURL(url);
     }
     }
   }
   }


  // ---------- Transformers laden (einmalig) ----------
   let _modelPromise;
   let _modelPromise;
   async function ensureModel(){
   async function ensureModel() {
     if (_modelPromise) return _modelPromise;
     if (_modelPromise) return _modelPromise;
     _modelPromise = (async()=>{
     _modelPromise = (async () => {
       const mod = await import(/* webpackIgnore: true */ transformersURL);
       const mod = await import(/* webpackIgnore: true */ TRANSFORMERS_URL);


       mod.env.allowLocalModels = true;
       mod.env.allowLocalModels = true;
Zeile 76: Zeile 85:
       mod.env.localModelPath = LOCAL_MODEL_PATH;
       mod.env.localModelPath = LOCAL_MODEL_PATH;


      // Optional WebGPU bevorzugen:
      // mod.env.backends = mod.env.backends || {};
      // mod.env.backends.onnx = mod.env.backends.onnx || {};
      // mod.env.backends.onnx.preferredBackend = 'webgpu';
      // WASM-Runtime-Pfade
       mod.env.backends = mod.env.backends || {};
       mod.env.backends = mod.env.backends || {};
       mod.env.backends.onnx = mod.env.backends.onnx || {};
       mod.env.backends.onnx = mod.env.backends.onnx || {};
Zeile 84: Zeile 99:
       const [processor, model] = await Promise.all([
       const [processor, model] = await Promise.all([
         mod.AutoProcessor.from_pretrained(MODEL_ID),
         mod.AutoProcessor.from_pretrained(MODEL_ID),
         mod.CLIPVisionModelWithProjection.from_pretrained(MODEL_ID, { quantized: true })
         mod.CLIPVisionModelWithProjection.from_pretrained(MODEL_ID, { quantized: true }),
       ]);
       ]);


Zeile 93: Zeile 108:
   }
   }


   async function buildEmbeddingFromFile(file){
   async function buildEmbeddingFromFile(file) {
     const { mod, processor, model } = await ensureModel();
     const { mod, processor, model } = await ensureModel();
     const canvas = await fileToCanvasExif(file);
     const canvas = await fileToCanvasExif(file);
     const blob = (canvas.convertToBlob)
     const blob = (canvas.convertToBlob)
       ? await canvas.convertToBlob({ type:'image/jpeg', quality:0.95 })
       ? await canvas.convertToBlob({ type: 'image/jpeg', quality: 0.95 })
       : await new Promise(r => canvas.toBlob(r, 'image/jpeg', 0.95));
       : await new Promise((r) => canvas.toBlob(r, 'image/jpeg', 0.95));
     const raw = await mod.RawImage.fromBlob(blob);
     const raw = await mod.RawImage.fromBlob(blob);
     const inputs = await processor(raw, { return_tensors: 'pt' });
     const inputs = await processor(raw, { return_tensors: 'pt' });
     const out = await model.forward({ pixel_values: inputs.pixel_values });
     const out = await model.forward({ pixel_values: inputs.pixel_values });
     const vec = out?.image_embeds?.data || out?.image_embeds;
     const vec = out?.image_embeds?.data || out?.image_embeds;
     if (!(vec instanceof Float32Array)) throw new Error('Embedding-Format unerwartet');
     if (!(vec instanceof Float32Array)) throw new Error('Embedding-Format unerwartet');


     let n=0; for(let i=0;i<vec.length;i++) n+=vec[i]*vec[i];
    // Normieren
     const norm = Math.sqrt(n)||1;
     let n = 0; for (let i = 0; i < vec.length; i++) n += vec[i] * vec[i];
     const norm = Math.sqrt(n) || 1;
     const v = new Float32Array(vec.length);
     const v = new Float32Array(vec.length);
     for(let i=0;i<vec.length;i++) v[i]=vec[i]/norm;
     for (let i = 0; i < vec.length; i++) v[i] = vec[i] / norm;
     return v;
     return v;
   }
   }


   async function fetchIndexJSON(){
  // ---------- Index laden/speichern ----------
     const url = mw.util.getUrl(INDEX_TITLE, { action:'raw', ctype:'application/json' });
   async function fetchIndexJSON() {
     const res = await fetch(url, { cache:'no-store' });
     const url = mw.util.getUrl(INDEX_TITLE, { action: 'raw', ctype: 'application/json' });
     if (!res.ok) throw new Error('Index nicht ladbar: '+res.status);
     const res = await fetch(url, { cache: 'no-store' });
     if (!res.ok) throw new Error('Index nicht ladbar: ' + res.status);
     try { return JSON.parse(await res.text()) || []; }
     try { return JSON.parse(await res.text()) || []; }
     catch(_){ return []; }
     catch (_) { return []; }
   }
   }


   async function saveIndexJSON(newArray){
   async function saveIndexJSON(newArray, summary) {
     await mw.loader.using(['mediawiki.api']);
     await mw.loader.using(['mediawiki.api']);
     const api = new mw.Api();
     const api = new mw.Api();
     const text = JSON.stringify(newArray, null, 2) + '\n';
     const text = JSON.stringify(newArray, null, 2) + '\n';
     return api.postWithToken('csrf', {
     return api.postWithToken('csrf', {
       action: 'edit',
       action: 'edit',
       title: INDEX_TITLE,
       title: INDEX_TITLE,
       text,
       text,
       summary: 'LabelScan: +1 embedding (Auto-Indexer)',
       summary: summary || 'LabelScan: +1 embedding (Auto-Indexer)',
       nocreate: 0,
       nocreate: 0,
       bot: 1
       bot: 1
Zeile 136: Zeile 151:
   }
   }


   $('idx-run').addEventListener('click', async ()=>{
   // ---------- Click-Handler ----------
     try{
  const runBtn = document.getElementById('idx-run');
       if (!hasSysop()) return alert('⚠️ Du brauchst Admin/Interface-Rechte.');
  if (!runBtn) {
    console.warn('[LabelScanIndexer] Button #idx-run nicht gefunden – ist das HTML auf der Seite eingebunden?');
    return;
  }
 
  runBtn.addEventListener('click', async () => {
     try {
       if (!hasInterfaceRight()) {
        alert('⚠️ Du brauchst Admin/Interface-Rechte (editinterface).');
        return;
      }


       const title = $('idx-title').value.trim();
       const title = $('idx-title')?.value.trim();
       const thumb = $('idx-thumb').value.trim();
       const thumb = $('idx-thumb')?.value.trim();
       const file = $('idx-file').files?.[0];
       const file = $('idx-file')?.files?.[0];


       if (!title) return alert('Titel fehlt.');
       if (!title) return alert('Titel fehlt.');
       if (!file) return alert('Bitte Bild wählen.');
       if (!file) return alert('Bitte eine Bilddatei wählen.');
 
      runBtn.disabled = true;


       status('Embedding berechnen …');
       status('Embedding berechnen …');
Zeile 151: Zeile 178:
       const b64 = float32ToBase64(vec);
       const b64 = float32ToBase64(vec);


       $('idx-out').value = JSON.stringify({title, thumb, embed:b64}, null, 2);
       $('idx-out').value = JSON.stringify({ title, thumb, embed: b64 }, null, 2);


      status('Index laden …');
       const arr = await fetchIndexJSON();
       const arr = await fetchIndexJSON();
       arr.push({ title, thumb, embed:b64 });
       arr.push({ title, thumb, embed: b64 });


       status('Speichern …');
       status('Speichern …');
       await saveIndexJSON(arr);
       await saveIndexJSON(arr, `LabelScan: +1 embedding für "${title}"`);


       status('Gespeichert ✅');
       status('Gespeichert ✅');
     } catch(e){
     } catch (e) {
       console.error(e);
       console.error(e);
       alert('Fehler: '+e.message);
       status('Fehler ' + (e?.message || e));
       status('Fehler ');
       alert('Fehler: ' + (e?.message || e));
    } finally {
      runBtn.disabled = false;
     }
     }
   });
   });

Version vom 9. November 2025, 16:26 Uhr

/* Gadget: LabelScanIndexer (Auto-Save)
 * Läuft nur auf der Seite "Hilfe:LabelScan-Indexer" (Namespace Help = 12)
 * Erzeugt Embeddings lokal (CLIP) und speichert in MediaWiki:Gadget-LabelScan-index.json
 */

/* global mw */
(function () {
  // ---------- Seitenerkennung (robust) ----------
  const NS = mw.config.get('wgNamespaceNumber'); // 12 = Help/Hilfe
  const TITLE = mw.config.get('wgTitle');        // nur der Titel ohne Namespace

  if (!(NS === 12 && TITLE === 'LabelScan-Indexer')) {
    // Debug-Hinweis, falls du auf der falschen Seite testest
    // console.debug('[LabelScanIndexer] nicht aktiv auf', NS, TITLE);
    return;
  }

  const INDEX_TITLE = 'MediaWiki:Gadget-LabelScan-index.json';

  // ---------- Modell / Pfade ----------
  const TRANSFORMERS_URL = 'https://cdn.jsdelivr.net/npm/@xenova/transformers@2.15.0';
  const MODEL_ID = 'Xenova/clip-vit-base-patch32';
  const LOCAL_MODEL_PATH = '/models';

  // ---------- UI helpers ----------
  const $ = (id) => document.getElementById(id);
  const status = (t) => { const el = $('idx-status'); if (el) el.textContent = t || ''; };

  function hasInterfaceRight() {
    const groups = mw.config.get('wgUserGroups') || [];
    return groups.includes('interface-admin') || groups.includes('sysop');
  }

  function float32ToBase64(vec) {
    const bytes = new Uint8Array(vec.buffer);
    let bin = '', chunk = 0x8000;
    for (let i = 0; i < bytes.length; i += chunk) {
      bin += String.fromCharCode.apply(null, bytes.subarray(i, i + chunk));
    }
    return btoa(bin);
  }

  // EXIF-korrekte Canvas-Erzeugung
  async function fileToCanvasExif(file) {
    if ('createImageBitmap' in window) {
      const bmp = await createImageBitmap(file, { imageOrientation: 'from-image' });
      if ('OffscreenCanvas' in window) {
        const c = new OffscreenCanvas(bmp.width, bmp.height);
        c.getContext('2d').drawImage(bmp, 0, 0);
        return c;
      } else {
        const c = document.createElement('canvas');
        c.width = bmp.width; c.height = bmp.height;
        c.getContext('2d').drawImage(bmp, 0, 0);
        return c;
      }
    }
    // Fallback klassisch
    const url = URL.createObjectURL(file);
    try {
      const img = await new Promise((res, rej) => {
        const im = new Image();
        im.onload = () => res(im);
        im.onerror = rej;
        im.src = url;
      });
      const c = document.createElement('canvas');
      c.width = img.width; c.height = img.height;
      c.getContext('2d').drawImage(img, 0, 0);
      return c;
    } finally {
      URL.revokeObjectURL(url);
    }
  }

  // ---------- Transformers laden (einmalig) ----------
  let _modelPromise;
  async function ensureModel() {
    if (_modelPromise) return _modelPromise;
    _modelPromise = (async () => {
      const mod = await import(/* webpackIgnore: true */ TRANSFORMERS_URL);

      mod.env.allowLocalModels = true;
      mod.env.allowRemoteModels = false;
      mod.env.localModelPath = LOCAL_MODEL_PATH;

      // Optional WebGPU bevorzugen:
      // mod.env.backends = mod.env.backends || {};
      // mod.env.backends.onnx = mod.env.backends.onnx || {};
      // mod.env.backends.onnx.preferredBackend = 'webgpu';

      // WASM-Runtime-Pfade
      mod.env.backends = mod.env.backends || {};
      mod.env.backends.onnx = mod.env.backends.onnx || {};
      mod.env.backends.onnx.wasm = mod.env.backends.onnx.wasm || {};
      mod.env.backends.onnx.wasm.wasmPaths =
        'https://cdn.jsdelivr.net/npm/@xenova/transformers@2.15.0/dist/';

      const [processor, model] = await Promise.all([
        mod.AutoProcessor.from_pretrained(MODEL_ID),
        mod.CLIPVisionModelWithProjection.from_pretrained(MODEL_ID, { quantized: true }),
      ]);

      console.log('[LabelScanIndexer] Modell geladen');
      return { mod, processor, model };
    })();
    return _modelPromise;
  }

  async function buildEmbeddingFromFile(file) {
    const { mod, processor, model } = await ensureModel();
    const canvas = await fileToCanvasExif(file);
    const blob = (canvas.convertToBlob)
      ? await canvas.convertToBlob({ type: 'image/jpeg', quality: 0.95 })
      : await new Promise((r) => canvas.toBlob(r, 'image/jpeg', 0.95));
    const raw = await mod.RawImage.fromBlob(blob);
    const inputs = await processor(raw, { return_tensors: 'pt' });
    const out = await model.forward({ pixel_values: inputs.pixel_values });
    const vec = out?.image_embeds?.data || out?.image_embeds;
    if (!(vec instanceof Float32Array)) throw new Error('Embedding-Format unerwartet');

    // Normieren
    let n = 0; for (let i = 0; i < vec.length; i++) n += vec[i] * vec[i];
    const norm = Math.sqrt(n) || 1;
    const v = new Float32Array(vec.length);
    for (let i = 0; i < vec.length; i++) v[i] = vec[i] / norm;
    return v;
  }

  // ---------- Index laden/speichern ----------
  async function fetchIndexJSON() {
    const url = mw.util.getUrl(INDEX_TITLE, { action: 'raw', ctype: 'application/json' });
    const res = await fetch(url, { cache: 'no-store' });
    if (!res.ok) throw new Error('Index nicht ladbar: ' + res.status);
    try { return JSON.parse(await res.text()) || []; }
    catch (_) { return []; }
  }

  async function saveIndexJSON(newArray, summary) {
    await mw.loader.using(['mediawiki.api']);
    const api = new mw.Api();
    const text = JSON.stringify(newArray, null, 2) + '\n';
    return api.postWithToken('csrf', {
      action: 'edit',
      title: INDEX_TITLE,
      text,
      summary: summary || 'LabelScan: +1 embedding (Auto-Indexer)',
      nocreate: 0,
      bot: 1
    });
  }

  // ---------- Click-Handler ----------
  const runBtn = document.getElementById('idx-run');
  if (!runBtn) {
    console.warn('[LabelScanIndexer] Button #idx-run nicht gefunden – ist das HTML auf der Seite eingebunden?');
    return;
  }

  runBtn.addEventListener('click', async () => {
    try {
      if (!hasInterfaceRight()) {
        alert('⚠️ Du brauchst Admin/Interface-Rechte (editinterface).');
        return;
      }

      const title = $('idx-title')?.value.trim();
      const thumb = $('idx-thumb')?.value.trim();
      const file = $('idx-file')?.files?.[0];

      if (!title) return alert('Titel fehlt.');
      if (!file) return alert('Bitte eine Bilddatei wählen.');

      runBtn.disabled = true;

      status('Embedding berechnen …');
      const vec = await buildEmbeddingFromFile(file);
      const b64 = float32ToBase64(vec);

      $('idx-out').value = JSON.stringify({ title, thumb, embed: b64 }, null, 2);

      status('Index laden …');
      const arr = await fetchIndexJSON();
      arr.push({ title, thumb, embed: b64 });

      status('Speichern …');
      await saveIndexJSON(arr, `LabelScan: +1 embedding für "${title}"`);

      status('Gespeichert ✅');
    } catch (e) {
      console.error(e);
      status('Fehler ❌ ' + (e?.message || e));
      alert('Fehler: ' + (e?.message || e));
    } finally {
      runBtn.disabled = false;
    }
  });

  console.log('[LabelScanIndexer] bereit');
})();