init

2024-11-25 16:08:31 +08:00 · 2023-05-20 16:29:17 -06:00 · 2023-05-20 16:29:17 -06:00 · 4447888485
commit 4447888485
17 changed files with 13346 additions and 0 deletions
--- a/.env.example
+++ b/.env.example
@ -0,0 +1,4 @@
+OPENAI_KEY=
+ELEVENLABS_KEY=
+OPENAI_ORGANIZATION=Personal
+ELEVENLABS_VOICE_ID=
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,33 @@
+# dependencies
+/node_modules
+/.pnp
+.pnp.js
+
+# testing
+/coverage
+
+# next.js
+/.next/
+/out/
+
+# production
+/build
+
+# misc
+.DS_Store
+*.pem
+
+# debug
+npm-debug.log*
+yarn-debug.log*
+yarn-error.log*
+
+# vercel
+.vercel
+
+# typescript
+*.tsbuildinfo
+next-env.d.ts
+
+# env
+.env
--- a/README.md
+++ b/README.md
@ -0,0 +1,37 @@
+## Tu Tutor
+
+Weekend project where I learned how to use open ai chatpot, eleven labs, prompt engineering to have an ai tutor to speak with and improve my spanish! I took some inspiration & code from Aleem's [Espanol Love Repo](https://github.com/aleemrehmtulla/espanol-love) :-)
+
+For more content, you can follow me on twitter [here](https://twitter.com/emergingbits)
+
+### Setup
+
+1. Grab an openai api key from [here](https://beta.openai.com/) and add it to your .env file
+2. Grab an ElevenLabs api key from [here](https://beta.elevenlabs.io/speech-synthesis) and add it to your .env file
+3. Clone a voice with ElevenLabs and add the model id to your .env file
+4. Hit `npm install` to grab the necessary packages
+5. Run `npm run dev` to start your server on `http://localhost:3000`
+
+### Deploy to the world
+
+1. Push all your changes to Github (or another git provider)
+2. Head to vercel.app, import your repo, and hit deploy
+3. Go to settings of the deployment, add your .env, and rebuild
+
+### Other Useful Notes
+
+When setting up eleven labs, you need to configure voices to get the proper `ELEVENLABS_VOICE_ID`
+
+- https://docs.elevenlabs.io/api-reference/voices
+
+Open AI has rate limits. This repo is using open ai 3.5. if you have access to 4.0, you can switch the model!
+
+- https://platform.openai.com/account/rate-limits
+
+To properly configure open ai for best experience, refer to deep learning course. Specifically I used chapter 8
+
+- improve prompting with: https://learn.deeplearning.ai/chatgpt-prompt-eng/lesson/8/chatbot
+
+Open AI does a fairly good job with translating but it's not perfect. Here is another provider for more precise translation:
+
+- https://www.deepl.com/translator
--- a/components/NameInput.tsx
+++ b/components/NameInput.tsx
@ -0,0 +1,37 @@
+import React, { useState } from "react";
+import { Box, Button, FormControl, FormLabel, Input } from "@chakra-ui/react";
+import { handleEnterKeyPress } from "@/utils";
+
+const NameInput = ({ onEnter }: { onEnter: (name: string) => void }) => {
+  const [name, setName] = useState("");
+
+  const handleChange = (event: React.ChangeEvent<HTMLInputElement>) => {
+    setName(event.target.value);
+  };
+
+  return (
+    <Box width="100%">
+      <FormControl id="name" mt={4}>
+        <FormLabel>What is your name?</FormLabel>
+        <Input
+          type="text"
+          value={name}
+          onChange={handleChange}
+          onKeyDown={handleEnterKeyPress(() => {
+            onEnter(name);
+          })}
+          placeholder="Enter your name"
+        />
+      </FormControl>
+      <Button
+        colorScheme="blue"
+        onClick={() => onEnter(name)}
+        isDisabled={!name.trim()}
+      >
+        Submit
+      </Button>
+    </Box>
+  );
+};
+
+export default NameInput;
--- a/hooks/useIsChrome.ts
+++ b/hooks/useIsChrome.ts
@ -0,0 +1,34 @@
+import { useEffect, useState } from "react";
+
+const useIsChrome = (): boolean => {
+  const [isChrome, setIsChrome] = useState(false);
+
+  const isBrave = async (): Promise<boolean> => {
+    try {
+      const isBrave = await (window.navigator as any).brave?.isBrave();
+      return !!isBrave;
+    } catch (error) {
+      return false;
+    }
+  };
+
+  useEffect(() => {
+    const checkBrowser = async () => {
+      const userAgent = window.navigator.userAgent.toLowerCase();
+      const isBraveBrowser = await isBrave();
+      const isChrome =
+        userAgent.indexOf("chrome") > -1 &&
+        userAgent.indexOf("edge") === -1 &&
+        userAgent.indexOf("opr") === -1 &&
+        !isBraveBrowser;
+
+      setIsChrome(isChrome);
+    };
+
+    checkBrowser();
+  }, []);
+
+  return isChrome;
+};
+
+export default useIsChrome;
--- a/next.config.js
+++ b/next.config.js
@ -0,0 +1,6 @@
+/** @type {import('next').NextConfig} */
+const nextConfig = {
+  reactStrictMode: true,
+}
+
+module.exports = nextConfig
--- a/package-lock.json
+++ b/package-lock.json
--- a/package.json
+++ b/package.json
@ -0,0 +1,30 @@
+{
+  "name": "espanol-love",
+  "version": "0.1.0",
+  "private": true,
+  "scripts": {
+    "dev": "next dev",
+    "build": "next build",
+    "start": "next start",
+    "lint": "next lint"
+  },
+  "dependencies": {
+    "@chakra-ui/react": "^2.6.0",
+    "@emotion/react": "^11.10.8",
+    "@emotion/styled": "^11.10.8",
+    "@types/dom-speech-recognition": "^0.0.1",
+    "@types/node": "18.16.2",
+    "@types/react": "18.2.0",
+    "@types/react-dom": "18.2.1",
+    "eslint": "8.39.0",
+    "eslint-config-next": "13.3.1",
+    "framer-motion": "^10.12.4",
+    "next": "13.3.1",
+    "openai": "^3.2.1",
+    "react": "18.2.0",
+    "react-dom": "18.2.0",
+    "react-icons": "^4.8.0",
+    "react-spinners": "^0.13.8",
+    "typescript": "5.0.4"
+  }
+}
--- a/pages/_app.tsx
+++ b/pages/_app.tsx
@ -0,0 +1,10 @@
+import type { AppProps } from "next/app";
+import { ChakraProvider } from "@chakra-ui/react";
+
+export default function App({ Component, pageProps }: AppProps) {
+  return (
+    <ChakraProvider>
+      <Component {...pageProps} />
+    </ChakraProvider>
+  );
+}
--- a/pages/_document.tsx
+++ b/pages/_document.tsx
@ -0,0 +1,13 @@
+import { Html, Head, Main, NextScript } from 'next/document'
+
+export default function Document() {
+  return (
+    <Html lang="en">
+      <Head />
+      <body>
+        <Main />
+        <NextScript />
+      </body>
+    </Html>
+  )
+}
--- a/pages/api/translate.ts
+++ b/pages/api/translate.ts
@ -0,0 +1,64 @@
+import { Message } from "@/types";
+import type { NextApiRequest, NextApiResponse } from "next";
+import { Configuration, OpenAIApi } from "openai";
+
+const configuration = new Configuration({
+  apiKey: process.env.OPENAI_KEY,
+});
+
+const openai = new OpenAIApi(configuration);
+
+export default async function translate(
+  req: NextApiRequest,
+  res: NextApiResponse
+) {
+  const { messages, userName } = req.body;
+
+  const translatedText = await askOpenAI({ messages, userName });
+
+  const TRIAL_URL = "https://api.elevenlabs.io";
+  const API_PATH = `/v1/text-to-speech/${process.env.ELEVENLABS_VOICE_ID}`;
+  const API_KEY = process.env.ELEVENLABS_KEY as string;
+
+  const OPTIONS = {
+    method: "POST",
+    body: JSON.stringify({
+      text: translatedText,
+      model_id: "eleven_monolingual_v1",
+    }),
+    headers: {
+      "xi-api-key": API_KEY,
+      "Content-Type": "application/json",
+      accept: "audio/mpeg",
+    },
+  };
+
+  const response = await fetch(TRIAL_URL + API_PATH, OPTIONS);
+
+  const audioData = await response.arrayBuffer();
+  const audioDataBase64 = Buffer.from(audioData).toString("base64");
+
+  res.setHeader("Content-Type", "application/json");
+  res.send(JSON.stringify({ audioDataBase64, translatedText }));
+}
+
+async function askOpenAI({
+  messages,
+  userName,
+}: {
+  messages: Message[];
+  userName: string;
+}) {
+  const response = await openai.createChatCompletion({
+    model: "gpt-3.5-turbo-0301",
+    messages: [
+      {
+        role: "system",
+        content: `Imagine you are a spanish teacher having conversation with a student that is looking to improve their spanish. User will start the conversion with you and you will respond to them and ask about them. If they user asks you a question, you can help them restructure the question in spanish and continue the conversion. The user's name is ${userName}.`,
+      },
+      ...messages,
+    ],
+  });
+
+  return response.data.choices[0].message?.content;
+}
--- a/pages/index.tsx
+++ b/pages/index.tsx
@ -0,0 +1,273 @@
+import { useState, useEffect, useRef } from "react";
+import Head from "next/head";
+import { FaMicrophone, FaTwitter } from "react-icons/fa";
+import Beatloader from "react-spinners/BeatLoader";
+import base64ToBlob from "@/utils/basetoblob";
+import {
+  Box,
+  Button,
+  HStack,
+  Heading,
+  Icon,
+  Text,
+  Textarea,
+  VStack,
+  useToast,
+  useColorModeValue,
+  Link,
+} from "@chakra-ui/react";
+import { handleEnterKeyPress } from "@/utils";
+import NameInput from "@/components/NameInput";
+import { Message } from "@/types";
+import useIsChrome from "@/hooks/useIsChrome";
+
+let SpeechRecognition: { new (): SpeechRecognition };
+
+if (typeof window !== "undefined") {
+  SpeechRecognition =
+    window.SpeechRecognition || window.webkitSpeechRecognition;
+}
+
+function Home() {
+  const isChrome = useIsChrome();
+  const micRef = useRef<SpeechRecognition>();
+  const audioRef = useRef<HTMLAudioElement | null>(null);
+
+  const [messages, setMessages] = useState<Message[]>([]);
+
+  const addMessage = (message: Message) => {
+    setMessages((prevMessages) => [...prevMessages, message]);
+  };
+
+  const toast = useToast();
+
+  const [isListening, setIsListening] = useState(false);
+  const [text, setText] = useState("");
+  const [loading, setLoading] = useState(false);
+  const [isRecording, setIsRecording] = useState<boolean>(false);
+  const [speechRecognition, setSpeechRecognition] =
+    useState<SpeechRecognition | null>(null);
+
+  // on the first translate, we need to get the user's name
+  // on subsequent translates, we can use the name from state
+  const translate = async (props?: { name?: string }) => {
+    if (!text && !props?.name)
+      return toast({
+        title: "Enter text to translate first!",
+        status: "error",
+      });
+
+    if (!userName && !props?.name)
+      return toast({
+        title: "Enter your name first!",
+        status: "error",
+      });
+
+    const message = { role: "user", content: text };
+
+    if (!props?.name) {
+      addMessage({ role: "user", content: text });
+      setText("");
+    }
+
+    if (!audioRef.current)
+      return toast({ title: "Error enabling audio", status: "error" });
+
+    setLoading(true);
+
+    // response for chat gpt
+    const response = await fetch("/api/translate", {
+      method: "POST",
+      headers: {
+        "Content-Type": "application/json",
+        "Access-Control-Allow-Origin": "*",
+        Accept: "application/json",
+      },
+      body: JSON.stringify({
+        messages: [...messages, message],
+        userName: userName || props?.name,
+      }),
+    });
+
+    const { audioDataBase64, translatedText } = await response.json();
+
+    addMessage({ role: "assistant", content: translatedText });
+
+    const audioBlob = base64ToBlob(audioDataBase64, "audio/mpeg");
+    const audioURL = URL.createObjectURL(audioBlob);
+
+    audioRef.current.src = audioURL;
+    await audioRef.current.play();
+
+    setText("");
+
+    try {
+      setLoading(false);
+    } catch (e: any) {
+      console.log("Error:", e.message);
+    }
+  };
+
+  // this is a hack to allow mobile browsers to play audio without user interaction
+  const startAudioForPermission = async () => {
+    if (!audioRef.current) return;
+    await audioRef.current.play();
+  };
+
+  // testing
+  const handleListen = async (mic: any) => {
+    if (!SpeechRecognition)
+      return alert("Speech recognition is not available in your browser.");
+
+    mic.continuous = true;
+    mic.interimResults = true;
+    mic.lang = "es-ES";
+
+    if (isListening) mic.start();
+    if (!isListening) mic.stop();
+
+    mic.onresult = (event: SpeechRecognitionEvent) => {
+      const transcript = Array.from(event.results)
+        .map((result) => result[0])
+        .map((result) => result.transcript)
+        .join("");
+      setText(transcript);
+      mic.onerror = (event: SpeechRecognitionErrorEvent) => {
+        console.log(event.error);
+      };
+    };
+  };
+
+  useEffect(() => {
+    const mic = new SpeechRecognition();
+
+    micRef.current = mic;
+
+    const audio = new Audio();
+    audioRef.current = audio;
+
+    return () => {
+      mic.stop();
+    };
+  }, []);
+
+  useEffect(() => {
+    handleListen(micRef.current);
+  }, [isListening]);
+
+  const userBgColor = useColorModeValue("blue.500", "blue.300");
+  const assistantBgColor = useColorModeValue("gray.100", "gray.700");
+  const userColor = "white";
+  const assistantColor = "black";
+
+  const [userName, setUserName] = useState<null | string>(null);
+
+  const assistantName = "Tutor";
+
+  return (
+    <>
+      <Head>
+        <title>tu tutor</title>
+      </Head>
+      <VStack pt={40} px={4} spacing={4} h="100vh" maxW="600px" mx="auto">
+        <Heading as="h1" color="black">
+          Your Tutor in Spanish
+        </Heading>
+        <Text color="black">
+          Start a conversation with AI tutor in Spanish. For more tutorials &
+          content, you can follow me on Twitter{" "}
+          <Link
+            href="https://twitter.com/emergingbits"
+            color="#1DA1F2"
+            isExternal
+          >
+            <Icon as={FaTwitter} fontSize="md" />
+          </Link>
+        </Text>
+        <Text color="black" as="i">
+          <b> Microphone works well in Google Chrome only (for now).</b>
+        </Text>
+
+        {!userName ? (
+          <NameInput
+            onEnter={(name) => {
+              startAudioForPermission();
+              setUserName(name);
+              translate({ name });
+            }}
+          />
+        ) : (
+          <>
+            {messages.map((message, index) => {
+              const isUser = message.role === "user";
+              <audio ref={audioRef} />;
+              return (
+                <Box
+                  key={index}
+                  alignSelf={isUser ? "flex-end" : "flex-start"}
+                  backgroundColor={isUser ? userBgColor : assistantBgColor}
+                  color={isUser ? userColor : assistantColor}
+                  borderRadius="lg"
+                  px={4}
+                  py={2}
+                  maxWidth="70%"
+                  position="relative"
+                >
+                  <Text
+                    fontSize="xs"
+                    position="absolute"
+                    color="black"
+                    top={-4}
+                    left={2}
+                  >
+                    {isUser ? userName : assistantName}
+                  </Text>
+                  <Text fontSize="sm">{message.content}</Text>
+                </Box>
+              );
+            })}
+            <VStack w="100%" spacing={4}>
+              <Textarea
+                value={text}
+                onChange={(e) => setText(e.target.value)}
+                onKeyDown={handleEnterKeyPress(() => {
+                  translate();
+                })}
+              />
+            </VStack>
+
+            <HStack w="100%" spacing={4}>
+              <Button
+                h={9}
+                variant="outline"
+                onClick={() => {
+                  translate();
+                }}
+                isLoading={loading}
+                spinner={<Beatloader size={8} />}
+              >
+                Send
+              </Button>
+              {isChrome && (
+                <Icon
+                  as={FaMicrophone}
+                  cursor="pointer"
+                  color={isListening ? "red.500" : "gray.500"}
+                  onClick={() => {
+                    if (isListening === true) {
+                      translate();
+                    }
+                    setIsListening(!isListening);
+                    setText("");
+                  }}
+                />
+              )}
+            </HStack>
+          </>
+        )}
+      </VStack>
+    </>
+  );
+}
+
+export default Home;
--- a/tsconfig.json
+++ b/tsconfig.json
@ -0,0 +1,23 @@
+{
+  "compilerOptions": {
+    "target": "es5",
+    "lib": ["dom", "dom.iterable", "esnext"],
+    "allowJs": true,
+    "skipLibCheck": true,
+    "strict": true,
+    "forceConsistentCasingInFileNames": true,
+    "noEmit": true,
+    "esModuleInterop": true,
+    "module": "esnext",
+    "moduleResolution": "node",
+    "resolveJsonModule": true,
+    "isolatedModules": true,
+    "jsx": "preserve",
+    "incremental": true,
+    "paths": {
+      "@/*": ["./*"]
+    }
+  },
+  "include": ["next-env.d.ts", "**/*.ts", "**/*.tsx"],
+  "exclude": ["node_modules"]
+}
--- a/types/index.ts
+++ b/types/index.ts
@ -0,0 +1,4 @@
+export type Message = {
+  role: "user" | "assistant";
+  content: string;
+};
--- a/utils/basetoblob.ts
+++ b/utils/basetoblob.ts
@ -0,0 +1,14 @@
+// this is kinda wack, passing audio as base64
+// now we can also send the translated text in api
+
+export default function base64ToBlob(base64: string, mimeType: string) {
+  const byteString = atob(base64);
+  const arrayBuffer = new ArrayBuffer(byteString.length);
+  const uint8Array = new Uint8Array(arrayBuffer);
+
+  for (let i = 0; i < byteString.length; i++) {
+    uint8Array[i] = byteString.charCodeAt(i);
+  }
+
+  return new Blob([arrayBuffer], { type: mimeType });
+}
--- a/utils/index.ts
+++ b/utils/index.ts
@ -0,0 +1,13 @@
+import { KeyboardEvent } from "react";
+
+export function handleEnterKeyPress<T = Element>(f: () => void) {
+  return handleKeyPress<T>(f, "Enter");
+}
+
+export function handleKeyPress<T = Element>(f: () => void, key: string) {
+  return (e: KeyboardEvent<T>) => {
+    if (e.key === key) {
+      f();
+    }
+  };
+}
--- a/yarn.lock
+++ b/yarn.lock