This commit is contained in:
Andriy 2023-05-20 16:29:17 -06:00
commit 4447888485
17 changed files with 13346 additions and 0 deletions

4
.env.example Normal file
View File

@ -0,0 +1,4 @@
OPENAI_KEY=
ELEVENLABS_KEY=
OPENAI_ORGANIZATION=Personal
ELEVENLABS_VOICE_ID=

33
.gitignore vendored Normal file
View File

@ -0,0 +1,33 @@
# dependencies
/node_modules
/.pnp
.pnp.js
# testing
/coverage
# next.js
/.next/
/out/
# production
/build
# misc
.DS_Store
*.pem
# debug
npm-debug.log*
yarn-debug.log*
yarn-error.log*
# vercel
.vercel
# typescript
*.tsbuildinfo
next-env.d.ts
# env
.env

37
README.md Normal file
View File

@ -0,0 +1,37 @@
## Tu Tutor
Weekend project where I learned how to use open ai chatpot, eleven labs, prompt engineering to have an ai tutor to speak with and improve my spanish! I took some inspiration & code from Aleem's [Espanol Love Repo](https://github.com/aleemrehmtulla/espanol-love) :-)
For more content, you can follow me on twitter [here](https://twitter.com/emergingbits)
### Setup
1. Grab an openai api key from [here](https://beta.openai.com/) and add it to your .env file
2. Grab an ElevenLabs api key from [here](https://beta.elevenlabs.io/speech-synthesis) and add it to your .env file
3. Clone a voice with ElevenLabs and add the model id to your .env file
4. Hit `npm install` to grab the necessary packages
5. Run `npm run dev` to start your server on `http://localhost:3000`
### Deploy to the world
1. Push all your changes to Github (or another git provider)
2. Head to vercel.app, import your repo, and hit deploy
3. Go to settings of the deployment, add your .env, and rebuild
### Other Useful Notes
When setting up eleven labs, you need to configure voices to get the proper `ELEVENLABS_VOICE_ID`
- https://docs.elevenlabs.io/api-reference/voices
Open AI has rate limits. This repo is using open ai 3.5. if you have access to 4.0, you can switch the model!
- https://platform.openai.com/account/rate-limits
To properly configure open ai for best experience, refer to deep learning course. Specifically I used chapter 8
- improve prompting with: https://learn.deeplearning.ai/chatgpt-prompt-eng/lesson/8/chatbot
Open AI does a fairly good job with translating but it's not perfect. Here is another provider for more precise translation:
- https://www.deepl.com/translator

37
components/NameInput.tsx Normal file
View File

@ -0,0 +1,37 @@
import React, { useState } from "react";
import { Box, Button, FormControl, FormLabel, Input } from "@chakra-ui/react";
import { handleEnterKeyPress } from "@/utils";
const NameInput = ({ onEnter }: { onEnter: (name: string) => void }) => {
const [name, setName] = useState("");
const handleChange = (event: React.ChangeEvent<HTMLInputElement>) => {
setName(event.target.value);
};
return (
<Box width="100%">
<FormControl id="name" mt={4}>
<FormLabel>What is your name?</FormLabel>
<Input
type="text"
value={name}
onChange={handleChange}
onKeyDown={handleEnterKeyPress(() => {
onEnter(name);
})}
placeholder="Enter your name"
/>
</FormControl>
<Button
colorScheme="blue"
onClick={() => onEnter(name)}
isDisabled={!name.trim()}
>
Submit
</Button>
</Box>
);
};
export default NameInput;

34
hooks/useIsChrome.ts Normal file
View File

@ -0,0 +1,34 @@
import { useEffect, useState } from "react";
const useIsChrome = (): boolean => {
const [isChrome, setIsChrome] = useState(false);
const isBrave = async (): Promise<boolean> => {
try {
const isBrave = await (window.navigator as any).brave?.isBrave();
return !!isBrave;
} catch (error) {
return false;
}
};
useEffect(() => {
const checkBrowser = async () => {
const userAgent = window.navigator.userAgent.toLowerCase();
const isBraveBrowser = await isBrave();
const isChrome =
userAgent.indexOf("chrome") > -1 &&
userAgent.indexOf("edge") === -1 &&
userAgent.indexOf("opr") === -1 &&
!isBraveBrowser;
setIsChrome(isChrome);
};
checkBrowser();
}, []);
return isChrome;
};
export default useIsChrome;

6
next.config.js Normal file
View File

@ -0,0 +1,6 @@
/** @type {import('next').NextConfig} */
const nextConfig = {
reactStrictMode: true,
}
module.exports = nextConfig

9165
package-lock.json generated Normal file

File diff suppressed because it is too large Load Diff

30
package.json Normal file
View File

@ -0,0 +1,30 @@
{
"name": "espanol-love",
"version": "0.1.0",
"private": true,
"scripts": {
"dev": "next dev",
"build": "next build",
"start": "next start",
"lint": "next lint"
},
"dependencies": {
"@chakra-ui/react": "^2.6.0",
"@emotion/react": "^11.10.8",
"@emotion/styled": "^11.10.8",
"@types/dom-speech-recognition": "^0.0.1",
"@types/node": "18.16.2",
"@types/react": "18.2.0",
"@types/react-dom": "18.2.1",
"eslint": "8.39.0",
"eslint-config-next": "13.3.1",
"framer-motion": "^10.12.4",
"next": "13.3.1",
"openai": "^3.2.1",
"react": "18.2.0",
"react-dom": "18.2.0",
"react-icons": "^4.8.0",
"react-spinners": "^0.13.8",
"typescript": "5.0.4"
}
}

10
pages/_app.tsx Normal file
View File

@ -0,0 +1,10 @@
import type { AppProps } from "next/app";
import { ChakraProvider } from "@chakra-ui/react";
export default function App({ Component, pageProps }: AppProps) {
return (
<ChakraProvider>
<Component {...pageProps} />
</ChakraProvider>
);
}

13
pages/_document.tsx Normal file
View File

@ -0,0 +1,13 @@
import { Html, Head, Main, NextScript } from 'next/document'
export default function Document() {
return (
<Html lang="en">
<Head />
<body>
<Main />
<NextScript />
</body>
</Html>
)
}

64
pages/api/translate.ts Normal file
View File

@ -0,0 +1,64 @@
import { Message } from "@/types";
import type { NextApiRequest, NextApiResponse } from "next";
import { Configuration, OpenAIApi } from "openai";
const configuration = new Configuration({
apiKey: process.env.OPENAI_KEY,
});
const openai = new OpenAIApi(configuration);
export default async function translate(
req: NextApiRequest,
res: NextApiResponse
) {
const { messages, userName } = req.body;
const translatedText = await askOpenAI({ messages, userName });
const TRIAL_URL = "https://api.elevenlabs.io";
const API_PATH = `/v1/text-to-speech/${process.env.ELEVENLABS_VOICE_ID}`;
const API_KEY = process.env.ELEVENLABS_KEY as string;
const OPTIONS = {
method: "POST",
body: JSON.stringify({
text: translatedText,
model_id: "eleven_monolingual_v1",
}),
headers: {
"xi-api-key": API_KEY,
"Content-Type": "application/json",
accept: "audio/mpeg",
},
};
const response = await fetch(TRIAL_URL + API_PATH, OPTIONS);
const audioData = await response.arrayBuffer();
const audioDataBase64 = Buffer.from(audioData).toString("base64");
res.setHeader("Content-Type", "application/json");
res.send(JSON.stringify({ audioDataBase64, translatedText }));
}
async function askOpenAI({
messages,
userName,
}: {
messages: Message[];
userName: string;
}) {
const response = await openai.createChatCompletion({
model: "gpt-3.5-turbo-0301",
messages: [
{
role: "system",
content: `Imagine you are a spanish teacher having conversation with a student that is looking to improve their spanish. User will start the conversion with you and you will respond to them and ask about them. If they user asks you a question, you can help them restructure the question in spanish and continue the conversion. The user's name is ${userName}.`,
},
...messages,
],
});
return response.data.choices[0].message?.content;
}

273
pages/index.tsx Normal file
View File

@ -0,0 +1,273 @@
import { useState, useEffect, useRef } from "react";
import Head from "next/head";
import { FaMicrophone, FaTwitter } from "react-icons/fa";
import Beatloader from "react-spinners/BeatLoader";
import base64ToBlob from "@/utils/basetoblob";
import {
Box,
Button,
HStack,
Heading,
Icon,
Text,
Textarea,
VStack,
useToast,
useColorModeValue,
Link,
} from "@chakra-ui/react";
import { handleEnterKeyPress } from "@/utils";
import NameInput from "@/components/NameInput";
import { Message } from "@/types";
import useIsChrome from "@/hooks/useIsChrome";
let SpeechRecognition: { new (): SpeechRecognition };
if (typeof window !== "undefined") {
SpeechRecognition =
window.SpeechRecognition || window.webkitSpeechRecognition;
}
function Home() {
const isChrome = useIsChrome();
const micRef = useRef<SpeechRecognition>();
const audioRef = useRef<HTMLAudioElement | null>(null);
const [messages, setMessages] = useState<Message[]>([]);
const addMessage = (message: Message) => {
setMessages((prevMessages) => [...prevMessages, message]);
};
const toast = useToast();
const [isListening, setIsListening] = useState(false);
const [text, setText] = useState("");
const [loading, setLoading] = useState(false);
const [isRecording, setIsRecording] = useState<boolean>(false);
const [speechRecognition, setSpeechRecognition] =
useState<SpeechRecognition | null>(null);
// on the first translate, we need to get the user's name
// on subsequent translates, we can use the name from state
const translate = async (props?: { name?: string }) => {
if (!text && !props?.name)
return toast({
title: "Enter text to translate first!",
status: "error",
});
if (!userName && !props?.name)
return toast({
title: "Enter your name first!",
status: "error",
});
const message = { role: "user", content: text };
if (!props?.name) {
addMessage({ role: "user", content: text });
setText("");
}
if (!audioRef.current)
return toast({ title: "Error enabling audio", status: "error" });
setLoading(true);
// response for chat gpt
const response = await fetch("/api/translate", {
method: "POST",
headers: {
"Content-Type": "application/json",
"Access-Control-Allow-Origin": "*",
Accept: "application/json",
},
body: JSON.stringify({
messages: [...messages, message],
userName: userName || props?.name,
}),
});
const { audioDataBase64, translatedText } = await response.json();
addMessage({ role: "assistant", content: translatedText });
const audioBlob = base64ToBlob(audioDataBase64, "audio/mpeg");
const audioURL = URL.createObjectURL(audioBlob);
audioRef.current.src = audioURL;
await audioRef.current.play();
setText("");
try {
setLoading(false);
} catch (e: any) {
console.log("Error:", e.message);
}
};
// this is a hack to allow mobile browsers to play audio without user interaction
const startAudioForPermission = async () => {
if (!audioRef.current) return;
await audioRef.current.play();
};
// testing
const handleListen = async (mic: any) => {
if (!SpeechRecognition)
return alert("Speech recognition is not available in your browser.");
mic.continuous = true;
mic.interimResults = true;
mic.lang = "es-ES";
if (isListening) mic.start();
if (!isListening) mic.stop();
mic.onresult = (event: SpeechRecognitionEvent) => {
const transcript = Array.from(event.results)
.map((result) => result[0])
.map((result) => result.transcript)
.join("");
setText(transcript);
mic.onerror = (event: SpeechRecognitionErrorEvent) => {
console.log(event.error);
};
};
};
useEffect(() => {
const mic = new SpeechRecognition();
micRef.current = mic;
const audio = new Audio();
audioRef.current = audio;
return () => {
mic.stop();
};
}, []);
useEffect(() => {
handleListen(micRef.current);
}, [isListening]);
const userBgColor = useColorModeValue("blue.500", "blue.300");
const assistantBgColor = useColorModeValue("gray.100", "gray.700");
const userColor = "white";
const assistantColor = "black";
const [userName, setUserName] = useState<null | string>(null);
const assistantName = "Tutor";
return (
<>
<Head>
<title>tu tutor</title>
</Head>
<VStack pt={40} px={4} spacing={4} h="100vh" maxW="600px" mx="auto">
<Heading as="h1" color="black">
Your Tutor in Spanish
</Heading>
<Text color="black">
Start a conversation with AI tutor in Spanish. For more tutorials &
content, you can follow me on Twitter{" "}
<Link
href="https://twitter.com/emergingbits"
color="#1DA1F2"
isExternal
>
<Icon as={FaTwitter} fontSize="md" />
</Link>
</Text>
<Text color="black" as="i">
<b> Microphone works well in Google Chrome only (for now).</b>
</Text>
{!userName ? (
<NameInput
onEnter={(name) => {
startAudioForPermission();
setUserName(name);
translate({ name });
}}
/>
) : (
<>
{messages.map((message, index) => {
const isUser = message.role === "user";
<audio ref={audioRef} />;
return (
<Box
key={index}
alignSelf={isUser ? "flex-end" : "flex-start"}
backgroundColor={isUser ? userBgColor : assistantBgColor}
color={isUser ? userColor : assistantColor}
borderRadius="lg"
px={4}
py={2}
maxWidth="70%"
position="relative"
>
<Text
fontSize="xs"
position="absolute"
color="black"
top={-4}
left={2}
>
{isUser ? userName : assistantName}
</Text>
<Text fontSize="sm">{message.content}</Text>
</Box>
);
})}
<VStack w="100%" spacing={4}>
<Textarea
value={text}
onChange={(e) => setText(e.target.value)}
onKeyDown={handleEnterKeyPress(() => {
translate();
})}
/>
</VStack>
<HStack w="100%" spacing={4}>
<Button
h={9}
variant="outline"
onClick={() => {
translate();
}}
isLoading={loading}
spinner={<Beatloader size={8} />}
>
Send
</Button>
{isChrome && (
<Icon
as={FaMicrophone}
cursor="pointer"
color={isListening ? "red.500" : "gray.500"}
onClick={() => {
if (isListening === true) {
translate();
}
setIsListening(!isListening);
setText("");
}}
/>
)}
</HStack>
</>
)}
</VStack>
</>
);
}
export default Home;

23
tsconfig.json Normal file
View File

@ -0,0 +1,23 @@
{
"compilerOptions": {
"target": "es5",
"lib": ["dom", "dom.iterable", "esnext"],
"allowJs": true,
"skipLibCheck": true,
"strict": true,
"forceConsistentCasingInFileNames": true,
"noEmit": true,
"esModuleInterop": true,
"module": "esnext",
"moduleResolution": "node",
"resolveJsonModule": true,
"isolatedModules": true,
"jsx": "preserve",
"incremental": true,
"paths": {
"@/*": ["./*"]
}
},
"include": ["next-env.d.ts", "**/*.ts", "**/*.tsx"],
"exclude": ["node_modules"]
}

4
types/index.ts Normal file
View File

@ -0,0 +1,4 @@
export type Message = {
role: "user" | "assistant";
content: string;
};

14
utils/basetoblob.ts Normal file
View File

@ -0,0 +1,14 @@
// this is kinda wack, passing audio as base64
// now we can also send the translated text in api
export default function base64ToBlob(base64: string, mimeType: string) {
const byteString = atob(base64);
const arrayBuffer = new ArrayBuffer(byteString.length);
const uint8Array = new Uint8Array(arrayBuffer);
for (let i = 0; i < byteString.length; i++) {
uint8Array[i] = byteString.charCodeAt(i);
}
return new Blob([arrayBuffer], { type: mimeType });
}

13
utils/index.ts Normal file
View File

@ -0,0 +1,13 @@
import { KeyboardEvent } from "react";
export function handleEnterKeyPress<T = Element>(f: () => void) {
return handleKeyPress<T>(f, "Enter");
}
export function handleKeyPress<T = Element>(f: () => void, key: string) {
return (e: KeyboardEvent<T>) => {
if (e.key === key) {
f();
}
};
}

3586
yarn.lock Normal file

File diff suppressed because it is too large Load Diff