mirror of
https://github.com/Andriy-Kulak/ai-naval.git
synced 2024-11-25 16:08:31 +08:00
init
This commit is contained in:
commit
4447888485
4
.env.example
Normal file
4
.env.example
Normal file
@ -0,0 +1,4 @@
|
||||
OPENAI_KEY=
|
||||
ELEVENLABS_KEY=
|
||||
OPENAI_ORGANIZATION=Personal
|
||||
ELEVENLABS_VOICE_ID=
|
33
.gitignore
vendored
Normal file
33
.gitignore
vendored
Normal file
@ -0,0 +1,33 @@
|
||||
# dependencies
|
||||
/node_modules
|
||||
/.pnp
|
||||
.pnp.js
|
||||
|
||||
# testing
|
||||
/coverage
|
||||
|
||||
# next.js
|
||||
/.next/
|
||||
/out/
|
||||
|
||||
# production
|
||||
/build
|
||||
|
||||
# misc
|
||||
.DS_Store
|
||||
*.pem
|
||||
|
||||
# debug
|
||||
npm-debug.log*
|
||||
yarn-debug.log*
|
||||
yarn-error.log*
|
||||
|
||||
# vercel
|
||||
.vercel
|
||||
|
||||
# typescript
|
||||
*.tsbuildinfo
|
||||
next-env.d.ts
|
||||
|
||||
# env
|
||||
.env
|
37
README.md
Normal file
37
README.md
Normal file
@ -0,0 +1,37 @@
|
||||
## Tu Tutor
|
||||
|
||||
Weekend project where I learned how to use open ai chatpot, eleven labs, prompt engineering to have an ai tutor to speak with and improve my spanish! I took some inspiration & code from Aleem's [Espanol Love Repo](https://github.com/aleemrehmtulla/espanol-love) :-)
|
||||
|
||||
For more content, you can follow me on twitter [here](https://twitter.com/emergingbits)
|
||||
|
||||
### Setup
|
||||
|
||||
1. Grab an openai api key from [here](https://beta.openai.com/) and add it to your .env file
|
||||
2. Grab an ElevenLabs api key from [here](https://beta.elevenlabs.io/speech-synthesis) and add it to your .env file
|
||||
3. Clone a voice with ElevenLabs and add the model id to your .env file
|
||||
4. Hit `npm install` to grab the necessary packages
|
||||
5. Run `npm run dev` to start your server on `http://localhost:3000`
|
||||
|
||||
### Deploy to the world
|
||||
|
||||
1. Push all your changes to Github (or another git provider)
|
||||
2. Head to vercel.app, import your repo, and hit deploy
|
||||
3. Go to settings of the deployment, add your .env, and rebuild
|
||||
|
||||
### Other Useful Notes
|
||||
|
||||
When setting up eleven labs, you need to configure voices to get the proper `ELEVENLABS_VOICE_ID`
|
||||
|
||||
- https://docs.elevenlabs.io/api-reference/voices
|
||||
|
||||
Open AI has rate limits. This repo is using open ai 3.5. if you have access to 4.0, you can switch the model!
|
||||
|
||||
- https://platform.openai.com/account/rate-limits
|
||||
|
||||
To properly configure open ai for best experience, refer to deep learning course. Specifically I used chapter 8
|
||||
|
||||
- improve prompting with: https://learn.deeplearning.ai/chatgpt-prompt-eng/lesson/8/chatbot
|
||||
|
||||
Open AI does a fairly good job with translating but it's not perfect. Here is another provider for more precise translation:
|
||||
|
||||
- https://www.deepl.com/translator
|
37
components/NameInput.tsx
Normal file
37
components/NameInput.tsx
Normal file
@ -0,0 +1,37 @@
|
||||
import React, { useState } from "react";
|
||||
import { Box, Button, FormControl, FormLabel, Input } from "@chakra-ui/react";
|
||||
import { handleEnterKeyPress } from "@/utils";
|
||||
|
||||
const NameInput = ({ onEnter }: { onEnter: (name: string) => void }) => {
|
||||
const [name, setName] = useState("");
|
||||
|
||||
const handleChange = (event: React.ChangeEvent<HTMLInputElement>) => {
|
||||
setName(event.target.value);
|
||||
};
|
||||
|
||||
return (
|
||||
<Box width="100%">
|
||||
<FormControl id="name" mt={4}>
|
||||
<FormLabel>What is your name?</FormLabel>
|
||||
<Input
|
||||
type="text"
|
||||
value={name}
|
||||
onChange={handleChange}
|
||||
onKeyDown={handleEnterKeyPress(() => {
|
||||
onEnter(name);
|
||||
})}
|
||||
placeholder="Enter your name"
|
||||
/>
|
||||
</FormControl>
|
||||
<Button
|
||||
colorScheme="blue"
|
||||
onClick={() => onEnter(name)}
|
||||
isDisabled={!name.trim()}
|
||||
>
|
||||
Submit
|
||||
</Button>
|
||||
</Box>
|
||||
);
|
||||
};
|
||||
|
||||
export default NameInput;
|
34
hooks/useIsChrome.ts
Normal file
34
hooks/useIsChrome.ts
Normal file
@ -0,0 +1,34 @@
|
||||
import { useEffect, useState } from "react";
|
||||
|
||||
const useIsChrome = (): boolean => {
|
||||
const [isChrome, setIsChrome] = useState(false);
|
||||
|
||||
const isBrave = async (): Promise<boolean> => {
|
||||
try {
|
||||
const isBrave = await (window.navigator as any).brave?.isBrave();
|
||||
return !!isBrave;
|
||||
} catch (error) {
|
||||
return false;
|
||||
}
|
||||
};
|
||||
|
||||
useEffect(() => {
|
||||
const checkBrowser = async () => {
|
||||
const userAgent = window.navigator.userAgent.toLowerCase();
|
||||
const isBraveBrowser = await isBrave();
|
||||
const isChrome =
|
||||
userAgent.indexOf("chrome") > -1 &&
|
||||
userAgent.indexOf("edge") === -1 &&
|
||||
userAgent.indexOf("opr") === -1 &&
|
||||
!isBraveBrowser;
|
||||
|
||||
setIsChrome(isChrome);
|
||||
};
|
||||
|
||||
checkBrowser();
|
||||
}, []);
|
||||
|
||||
return isChrome;
|
||||
};
|
||||
|
||||
export default useIsChrome;
|
6
next.config.js
Normal file
6
next.config.js
Normal file
@ -0,0 +1,6 @@
|
||||
/** @type {import('next').NextConfig} */
|
||||
const nextConfig = {
|
||||
reactStrictMode: true,
|
||||
}
|
||||
|
||||
module.exports = nextConfig
|
9165
package-lock.json
generated
Normal file
9165
package-lock.json
generated
Normal file
File diff suppressed because it is too large
Load Diff
30
package.json
Normal file
30
package.json
Normal file
@ -0,0 +1,30 @@
|
||||
{
|
||||
"name": "espanol-love",
|
||||
"version": "0.1.0",
|
||||
"private": true,
|
||||
"scripts": {
|
||||
"dev": "next dev",
|
||||
"build": "next build",
|
||||
"start": "next start",
|
||||
"lint": "next lint"
|
||||
},
|
||||
"dependencies": {
|
||||
"@chakra-ui/react": "^2.6.0",
|
||||
"@emotion/react": "^11.10.8",
|
||||
"@emotion/styled": "^11.10.8",
|
||||
"@types/dom-speech-recognition": "^0.0.1",
|
||||
"@types/node": "18.16.2",
|
||||
"@types/react": "18.2.0",
|
||||
"@types/react-dom": "18.2.1",
|
||||
"eslint": "8.39.0",
|
||||
"eslint-config-next": "13.3.1",
|
||||
"framer-motion": "^10.12.4",
|
||||
"next": "13.3.1",
|
||||
"openai": "^3.2.1",
|
||||
"react": "18.2.0",
|
||||
"react-dom": "18.2.0",
|
||||
"react-icons": "^4.8.0",
|
||||
"react-spinners": "^0.13.8",
|
||||
"typescript": "5.0.4"
|
||||
}
|
||||
}
|
10
pages/_app.tsx
Normal file
10
pages/_app.tsx
Normal file
@ -0,0 +1,10 @@
|
||||
import type { AppProps } from "next/app";
|
||||
import { ChakraProvider } from "@chakra-ui/react";
|
||||
|
||||
export default function App({ Component, pageProps }: AppProps) {
|
||||
return (
|
||||
<ChakraProvider>
|
||||
<Component {...pageProps} />
|
||||
</ChakraProvider>
|
||||
);
|
||||
}
|
13
pages/_document.tsx
Normal file
13
pages/_document.tsx
Normal file
@ -0,0 +1,13 @@
|
||||
import { Html, Head, Main, NextScript } from 'next/document'
|
||||
|
||||
export default function Document() {
|
||||
return (
|
||||
<Html lang="en">
|
||||
<Head />
|
||||
<body>
|
||||
<Main />
|
||||
<NextScript />
|
||||
</body>
|
||||
</Html>
|
||||
)
|
||||
}
|
64
pages/api/translate.ts
Normal file
64
pages/api/translate.ts
Normal file
@ -0,0 +1,64 @@
|
||||
import { Message } from "@/types";
|
||||
import type { NextApiRequest, NextApiResponse } from "next";
|
||||
import { Configuration, OpenAIApi } from "openai";
|
||||
|
||||
const configuration = new Configuration({
|
||||
apiKey: process.env.OPENAI_KEY,
|
||||
});
|
||||
|
||||
const openai = new OpenAIApi(configuration);
|
||||
|
||||
export default async function translate(
|
||||
req: NextApiRequest,
|
||||
res: NextApiResponse
|
||||
) {
|
||||
const { messages, userName } = req.body;
|
||||
|
||||
const translatedText = await askOpenAI({ messages, userName });
|
||||
|
||||
const TRIAL_URL = "https://api.elevenlabs.io";
|
||||
const API_PATH = `/v1/text-to-speech/${process.env.ELEVENLABS_VOICE_ID}`;
|
||||
const API_KEY = process.env.ELEVENLABS_KEY as string;
|
||||
|
||||
const OPTIONS = {
|
||||
method: "POST",
|
||||
body: JSON.stringify({
|
||||
text: translatedText,
|
||||
model_id: "eleven_monolingual_v1",
|
||||
}),
|
||||
headers: {
|
||||
"xi-api-key": API_KEY,
|
||||
"Content-Type": "application/json",
|
||||
accept: "audio/mpeg",
|
||||
},
|
||||
};
|
||||
|
||||
const response = await fetch(TRIAL_URL + API_PATH, OPTIONS);
|
||||
|
||||
const audioData = await response.arrayBuffer();
|
||||
const audioDataBase64 = Buffer.from(audioData).toString("base64");
|
||||
|
||||
res.setHeader("Content-Type", "application/json");
|
||||
res.send(JSON.stringify({ audioDataBase64, translatedText }));
|
||||
}
|
||||
|
||||
async function askOpenAI({
|
||||
messages,
|
||||
userName,
|
||||
}: {
|
||||
messages: Message[];
|
||||
userName: string;
|
||||
}) {
|
||||
const response = await openai.createChatCompletion({
|
||||
model: "gpt-3.5-turbo-0301",
|
||||
messages: [
|
||||
{
|
||||
role: "system",
|
||||
content: `Imagine you are a spanish teacher having conversation with a student that is looking to improve their spanish. User will start the conversion with you and you will respond to them and ask about them. If they user asks you a question, you can help them restructure the question in spanish and continue the conversion. The user's name is ${userName}.`,
|
||||
},
|
||||
...messages,
|
||||
],
|
||||
});
|
||||
|
||||
return response.data.choices[0].message?.content;
|
||||
}
|
273
pages/index.tsx
Normal file
273
pages/index.tsx
Normal file
@ -0,0 +1,273 @@
|
||||
import { useState, useEffect, useRef } from "react";
|
||||
import Head from "next/head";
|
||||
import { FaMicrophone, FaTwitter } from "react-icons/fa";
|
||||
import Beatloader from "react-spinners/BeatLoader";
|
||||
import base64ToBlob from "@/utils/basetoblob";
|
||||
import {
|
||||
Box,
|
||||
Button,
|
||||
HStack,
|
||||
Heading,
|
||||
Icon,
|
||||
Text,
|
||||
Textarea,
|
||||
VStack,
|
||||
useToast,
|
||||
useColorModeValue,
|
||||
Link,
|
||||
} from "@chakra-ui/react";
|
||||
import { handleEnterKeyPress } from "@/utils";
|
||||
import NameInput from "@/components/NameInput";
|
||||
import { Message } from "@/types";
|
||||
import useIsChrome from "@/hooks/useIsChrome";
|
||||
|
||||
let SpeechRecognition: { new (): SpeechRecognition };
|
||||
|
||||
if (typeof window !== "undefined") {
|
||||
SpeechRecognition =
|
||||
window.SpeechRecognition || window.webkitSpeechRecognition;
|
||||
}
|
||||
|
||||
function Home() {
|
||||
const isChrome = useIsChrome();
|
||||
const micRef = useRef<SpeechRecognition>();
|
||||
const audioRef = useRef<HTMLAudioElement | null>(null);
|
||||
|
||||
const [messages, setMessages] = useState<Message[]>([]);
|
||||
|
||||
const addMessage = (message: Message) => {
|
||||
setMessages((prevMessages) => [...prevMessages, message]);
|
||||
};
|
||||
|
||||
const toast = useToast();
|
||||
|
||||
const [isListening, setIsListening] = useState(false);
|
||||
const [text, setText] = useState("");
|
||||
const [loading, setLoading] = useState(false);
|
||||
const [isRecording, setIsRecording] = useState<boolean>(false);
|
||||
const [speechRecognition, setSpeechRecognition] =
|
||||
useState<SpeechRecognition | null>(null);
|
||||
|
||||
// on the first translate, we need to get the user's name
|
||||
// on subsequent translates, we can use the name from state
|
||||
const translate = async (props?: { name?: string }) => {
|
||||
if (!text && !props?.name)
|
||||
return toast({
|
||||
title: "Enter text to translate first!",
|
||||
status: "error",
|
||||
});
|
||||
|
||||
if (!userName && !props?.name)
|
||||
return toast({
|
||||
title: "Enter your name first!",
|
||||
status: "error",
|
||||
});
|
||||
|
||||
const message = { role: "user", content: text };
|
||||
|
||||
if (!props?.name) {
|
||||
addMessage({ role: "user", content: text });
|
||||
setText("");
|
||||
}
|
||||
|
||||
if (!audioRef.current)
|
||||
return toast({ title: "Error enabling audio", status: "error" });
|
||||
|
||||
setLoading(true);
|
||||
|
||||
// response for chat gpt
|
||||
const response = await fetch("/api/translate", {
|
||||
method: "POST",
|
||||
headers: {
|
||||
"Content-Type": "application/json",
|
||||
"Access-Control-Allow-Origin": "*",
|
||||
Accept: "application/json",
|
||||
},
|
||||
body: JSON.stringify({
|
||||
messages: [...messages, message],
|
||||
userName: userName || props?.name,
|
||||
}),
|
||||
});
|
||||
|
||||
const { audioDataBase64, translatedText } = await response.json();
|
||||
|
||||
addMessage({ role: "assistant", content: translatedText });
|
||||
|
||||
const audioBlob = base64ToBlob(audioDataBase64, "audio/mpeg");
|
||||
const audioURL = URL.createObjectURL(audioBlob);
|
||||
|
||||
audioRef.current.src = audioURL;
|
||||
await audioRef.current.play();
|
||||
|
||||
setText("");
|
||||
|
||||
try {
|
||||
setLoading(false);
|
||||
} catch (e: any) {
|
||||
console.log("Error:", e.message);
|
||||
}
|
||||
};
|
||||
|
||||
// this is a hack to allow mobile browsers to play audio without user interaction
|
||||
const startAudioForPermission = async () => {
|
||||
if (!audioRef.current) return;
|
||||
await audioRef.current.play();
|
||||
};
|
||||
|
||||
// testing
|
||||
const handleListen = async (mic: any) => {
|
||||
if (!SpeechRecognition)
|
||||
return alert("Speech recognition is not available in your browser.");
|
||||
|
||||
mic.continuous = true;
|
||||
mic.interimResults = true;
|
||||
mic.lang = "es-ES";
|
||||
|
||||
if (isListening) mic.start();
|
||||
if (!isListening) mic.stop();
|
||||
|
||||
mic.onresult = (event: SpeechRecognitionEvent) => {
|
||||
const transcript = Array.from(event.results)
|
||||
.map((result) => result[0])
|
||||
.map((result) => result.transcript)
|
||||
.join("");
|
||||
setText(transcript);
|
||||
mic.onerror = (event: SpeechRecognitionErrorEvent) => {
|
||||
console.log(event.error);
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
useEffect(() => {
|
||||
const mic = new SpeechRecognition();
|
||||
|
||||
micRef.current = mic;
|
||||
|
||||
const audio = new Audio();
|
||||
audioRef.current = audio;
|
||||
|
||||
return () => {
|
||||
mic.stop();
|
||||
};
|
||||
}, []);
|
||||
|
||||
useEffect(() => {
|
||||
handleListen(micRef.current);
|
||||
}, [isListening]);
|
||||
|
||||
const userBgColor = useColorModeValue("blue.500", "blue.300");
|
||||
const assistantBgColor = useColorModeValue("gray.100", "gray.700");
|
||||
const userColor = "white";
|
||||
const assistantColor = "black";
|
||||
|
||||
const [userName, setUserName] = useState<null | string>(null);
|
||||
|
||||
const assistantName = "Tutor";
|
||||
|
||||
return (
|
||||
<>
|
||||
<Head>
|
||||
<title>tu tutor</title>
|
||||
</Head>
|
||||
<VStack pt={40} px={4} spacing={4} h="100vh" maxW="600px" mx="auto">
|
||||
<Heading as="h1" color="black">
|
||||
Your Tutor in Spanish
|
||||
</Heading>
|
||||
<Text color="black">
|
||||
Start a conversation with AI tutor in Spanish. For more tutorials &
|
||||
content, you can follow me on Twitter{" "}
|
||||
<Link
|
||||
href="https://twitter.com/emergingbits"
|
||||
color="#1DA1F2"
|
||||
isExternal
|
||||
>
|
||||
<Icon as={FaTwitter} fontSize="md" />
|
||||
</Link>
|
||||
</Text>
|
||||
<Text color="black" as="i">
|
||||
<b> Microphone works well in Google Chrome only (for now).</b>
|
||||
</Text>
|
||||
|
||||
{!userName ? (
|
||||
<NameInput
|
||||
onEnter={(name) => {
|
||||
startAudioForPermission();
|
||||
setUserName(name);
|
||||
translate({ name });
|
||||
}}
|
||||
/>
|
||||
) : (
|
||||
<>
|
||||
{messages.map((message, index) => {
|
||||
const isUser = message.role === "user";
|
||||
<audio ref={audioRef} />;
|
||||
return (
|
||||
<Box
|
||||
key={index}
|
||||
alignSelf={isUser ? "flex-end" : "flex-start"}
|
||||
backgroundColor={isUser ? userBgColor : assistantBgColor}
|
||||
color={isUser ? userColor : assistantColor}
|
||||
borderRadius="lg"
|
||||
px={4}
|
||||
py={2}
|
||||
maxWidth="70%"
|
||||
position="relative"
|
||||
>
|
||||
<Text
|
||||
fontSize="xs"
|
||||
position="absolute"
|
||||
color="black"
|
||||
top={-4}
|
||||
left={2}
|
||||
>
|
||||
{isUser ? userName : assistantName}
|
||||
</Text>
|
||||
<Text fontSize="sm">{message.content}</Text>
|
||||
</Box>
|
||||
);
|
||||
})}
|
||||
<VStack w="100%" spacing={4}>
|
||||
<Textarea
|
||||
value={text}
|
||||
onChange={(e) => setText(e.target.value)}
|
||||
onKeyDown={handleEnterKeyPress(() => {
|
||||
translate();
|
||||
})}
|
||||
/>
|
||||
</VStack>
|
||||
|
||||
<HStack w="100%" spacing={4}>
|
||||
<Button
|
||||
h={9}
|
||||
variant="outline"
|
||||
onClick={() => {
|
||||
translate();
|
||||
}}
|
||||
isLoading={loading}
|
||||
spinner={<Beatloader size={8} />}
|
||||
>
|
||||
Send
|
||||
</Button>
|
||||
{isChrome && (
|
||||
<Icon
|
||||
as={FaMicrophone}
|
||||
cursor="pointer"
|
||||
color={isListening ? "red.500" : "gray.500"}
|
||||
onClick={() => {
|
||||
if (isListening === true) {
|
||||
translate();
|
||||
}
|
||||
setIsListening(!isListening);
|
||||
setText("");
|
||||
}}
|
||||
/>
|
||||
)}
|
||||
</HStack>
|
||||
</>
|
||||
)}
|
||||
</VStack>
|
||||
</>
|
||||
);
|
||||
}
|
||||
|
||||
export default Home;
|
23
tsconfig.json
Normal file
23
tsconfig.json
Normal file
@ -0,0 +1,23 @@
|
||||
{
|
||||
"compilerOptions": {
|
||||
"target": "es5",
|
||||
"lib": ["dom", "dom.iterable", "esnext"],
|
||||
"allowJs": true,
|
||||
"skipLibCheck": true,
|
||||
"strict": true,
|
||||
"forceConsistentCasingInFileNames": true,
|
||||
"noEmit": true,
|
||||
"esModuleInterop": true,
|
||||
"module": "esnext",
|
||||
"moduleResolution": "node",
|
||||
"resolveJsonModule": true,
|
||||
"isolatedModules": true,
|
||||
"jsx": "preserve",
|
||||
"incremental": true,
|
||||
"paths": {
|
||||
"@/*": ["./*"]
|
||||
}
|
||||
},
|
||||
"include": ["next-env.d.ts", "**/*.ts", "**/*.tsx"],
|
||||
"exclude": ["node_modules"]
|
||||
}
|
4
types/index.ts
Normal file
4
types/index.ts
Normal file
@ -0,0 +1,4 @@
|
||||
export type Message = {
|
||||
role: "user" | "assistant";
|
||||
content: string;
|
||||
};
|
14
utils/basetoblob.ts
Normal file
14
utils/basetoblob.ts
Normal file
@ -0,0 +1,14 @@
|
||||
// this is kinda wack, passing audio as base64
|
||||
// now we can also send the translated text in api
|
||||
|
||||
export default function base64ToBlob(base64: string, mimeType: string) {
|
||||
const byteString = atob(base64);
|
||||
const arrayBuffer = new ArrayBuffer(byteString.length);
|
||||
const uint8Array = new Uint8Array(arrayBuffer);
|
||||
|
||||
for (let i = 0; i < byteString.length; i++) {
|
||||
uint8Array[i] = byteString.charCodeAt(i);
|
||||
}
|
||||
|
||||
return new Blob([arrayBuffer], { type: mimeType });
|
||||
}
|
13
utils/index.ts
Normal file
13
utils/index.ts
Normal file
@ -0,0 +1,13 @@
|
||||
import { KeyboardEvent } from "react";
|
||||
|
||||
export function handleEnterKeyPress<T = Element>(f: () => void) {
|
||||
return handleKeyPress<T>(f, "Enter");
|
||||
}
|
||||
|
||||
export function handleKeyPress<T = Element>(f: () => void, key: string) {
|
||||
return (e: KeyboardEvent<T>) => {
|
||||
if (e.key === key) {
|
||||
f();
|
||||
}
|
||||
};
|
||||
}
|
Loading…
Reference in New Issue
Block a user