Install dependencies: npm install playwright cheerio node-html-markdown
Install firefox browser: npx playwright install firefox
Run the script: tsx html-to-md.ts https://mistral.ai/fr/news/mixtral-8x22b/
| const YAML = require('yaml') | |
| import Path from 'node:path' | |
| import { existsSync, mkdirSync, readFileSync, writeFileSync } from 'node:fs' | |
| import { createHash } from 'node:crypto' | |
| export function vcr({ | |
| instance, | |
| methods, | |
| mode = 'cache', | |
| verbose = true, |
| Tu possède un excellent esprit d'analyse et de synthèse et tu es capable d'extraire les informations pertinente d'un document pour les résumer. | |
| Voici le document: | |
| # BEGIN | |
| // Document content | |
| # END DOCUMENT | |
| Répond en suivant ce format YAML: | |
| ```yaml |
| Tu possède un excellent esprit d'analyse et de synthèse et tu es capable d'extraire les informations pertinente d'un document pour les résumer. | |
| Voici le document: | |
| # BEGIN | |
| // Document content | |
| # END DOCUMENT | |
| Répond en suivant ce format YAML: | |
| ```yaml |
| import { encoding_for_model } from 'tiktoken' | |
| // Choose the model, it can be 'gpt4' for example | |
| const tokenEncoder = encoding_for_model('text-embedding-ada-002') | |
| // Contains an array of Uint32 ([-0.102, 0.62, ..]) | |
| const tokens = tokenEncoder.encode(text) | |
| // Number of tokens | |
| console.log(tokens.length) |
| class Foobar { | |
| private name = 'foobar' | |
| getName = () => { | |
| return this.name | |
| } | |
| } | |
| class Barfoo { | |
| private name = 'barfoor' |
| function createTask (req, res) { | |
| const newTask = req.body; | |
| verifyTask(newTask); | |
| const savedTask = database.addTask(newTask); | |
| res.status(201).send(`Task ${savedTask.metadata.id} saved successfully`); | |
| } | |
| function addTask(newTask) { | |
| const tasks = readTasksFromFile(); | |
| const taskWithId = { ...newTask, metadata: { id: generateId() } }; |
| node -e "const h = require('http');const s = h.createServer();const l = console.log;s.on('request', (rq, rs) => {let b = [];rq.on('data', (c) => {b.push(c);}).on('end', () => {b = Buffer.concat(b).toString();l('==== '+rq.method+' '+rq.url);l('> Headers');l(rq.headers);l('> Body');l(b);rs.end();});}).listen(8000);" |
Result:
Error: whoops
at last (/Users/adrien/projects/didask/odc/test.ts:43:9)
at middle (/Users/adrien/projects/didask/odc/test.ts:47:16)
at first (/Users/adrien/projects/didask/odc/test.ts:51:16)
at run (/Users/adrien/projects/didask/odc/test.ts:56:11)
at Object.<anonymous> (/Users/adrien/projects/didask/odc/test.ts:68:1)
at Module._compile (node:internal/modules/cjs/loader:1256:14)
This is a GIST for this issue in Transformer.js repository
Trying to classify the following texts as street address: