Skip to content

Instantly share code, notes, and snippets.

@stu43005
Last active September 2, 2023 16:10
Show Gist options
  • Save stu43005/31bd0b95d41c4f495cb0839f34878d6b to your computer and use it in GitHub Desktop.
Save stu43005/31bd0b95d41c4f495cb0839f34878d6b to your computer and use it in GitHub Desktop.
import { writeAll } from "https://deno.land/[email protected]/streams/write_all.ts";
import { load } from "npm:cheerio";
import OpenAI from "npm:openai";
// @deno-types="npm:@types/js-beautify"
import beautify from "npm:js-beautify";
const res = await fetch("https://www.google.com/"); // 網址
const html = await res.text();
const $ = load(html);
$(
'head, header:nth-child(1), menu, nav, footer:nth-last-child(1), meta, script, link, style, noscript, svg'
).remove();
$('*').each((i, el) => {
const $el = $(el);
if ($el.css('display') === 'none' || $el.css('visibility') === 'hidden') {
$el.remove();
return;
}
const attrs = $el.attr() ?? {};
for (const key of Object.keys(attrs)) {
if (key.startsWith('data-')) {
$el.removeAttr(key);
}
}
if (!$el.is('i')) {
$el.removeAttr('class');
}
$el.removeAttr('style').removeAttr('width').removeAttr('height');
});
$('img').each((i, el) => {
const $el = $(el);
$el.removeAttr('srcset')
.removeAttr('sizes')
.removeAttr('loading')
.removeAttr('decoding');
});
$('p').each((i, el) => {
const $el = $(el);
if (!$el.html()?.trim()) {
$el.remove();
}
});
const content = beautify.html(
$('article, .post').first().html() ??
$('main, #main').first().html() ??
$('body').html() ??
'',
{
indent_size: 0,
preserve_newlines: false,
}
); // 內文 selector
const openai = new OpenAI({
apiKey: "",
});
const stream = await openai.chat.completions.create({
messages: [
// { role: "system", content: "使用者提供一個HTML內容,請總結該文章的主題在講什麼。" },
// { role: "system", content: "You are a professional text summarizer, you can only summarize the text, don't interpret it." },
// { role: "system", content: "Please summarize this text in the most concise language and must use Traditional Chinese language!" },
// { role: "system", content: "user is providing a web page, please sumerize it and only provide 20 'topic tags' in Traditional Chinese and seperate the tags by half-spaced comma. Every tag should not be longer than 4 to 10 Chinese Characters." },
{ role: "system", content: "You are a professional content summarizer, you can only summerize the content in Traditional Chinese by only providing at least 12 tags of its topic, don't interpret it. Seperate the labels by half-spaced comma." },
{ role: "user", content: content },
],
// model: "gpt-3.5-turbo", // 4,096 tokens
model: "gpt-3.5-turbo-16k", // 16,384 tokens
// model: "gpt-4", // 8,192 tokens
// model: "gpt-4-32k", // 32,768 tokens
temperature: 0,
max_tokens: 1000,
top_p: 1,
frequency_penalty: 1,
presence_penalty: 1,
stream: true,
});
for await (const part of stream) {
const contentBytes = new TextEncoder().encode(part.choices[0]?.delta?.content || "");
await writeAll(Deno.stdout, contentBytes);
}
// Error: This model's maximum context length is 4097 tokens. However, your messages resulted in 92137 tokens. Please reduce the length of the messages.
// Error: Rate limit reached for 10KTPM-200RPM in organization org-3U7piMvbYBFn9HiRMKECXCIg on tokens per min. Limit: 10000 / min. Please try again in 6ms. Contact us through our help center at help.openai.com if you continue to have issues.
// Error: Invalid value for 'content': expected a string, got null.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment