diff --git a/.gitea/workflows/build.yaml b/.gitea/workflows/build.yaml
new file mode 100644
index 0000000..ba0d002
--- /dev/null
+++ b/.gitea/workflows/build.yaml
@@ -0,0 +1,47 @@
+name: Build
+run-name: ${{ github.actor }} is upgrade release 🚀
+on: [push]
+env:
+  REPOSITORY: ${{ github.repository }}
+  COMMIT_ID: ${{ github.sha }}
+jobs:
+  Build-Deploy-Actions:
+    runs-on: ubuntu-latest
+    steps:
+      - run: echo "🎉 The job was automatically triggered by a ${{ github.event_name }} event."
+      - run: echo "🐧 This job is now running on a ${{ runner.os }} server hosted by Gitea!"
+      - run: echo "🔎 The name of your branch is ${{ github.ref }} and your repository is ${{ github.repository }}."
+      - name: Check out repository code
+        uses: actions/checkout@v3
+      -
+        name: Setup Git LFS
+        run: |
+          git lfs install
+          git lfs fetch
+          git lfs checkout                              
+      - name: List files in the repository
+        run: |
+                                                                                ls ${{ github.workspace }}
+      -
+        name: Docker Image Info
+        id: image-info
+        run: |
+          echo "::set-output name=image_name::$(echo $REPOSITORY | tr '[:upper:]' '[:lower:]')"
+          echo "::set-output name=image_tag::${COMMIT_ID:0:10}"                              
+      -
+        name: Login to Docker Hub
+        uses: docker/login-action@v2
+        with:
+          registry: artifacts.iflytek.com
+          username: ${{ secrets.DOCKERHUB_USERNAME }}
+          password: ${{ secrets.DOCKERHUB_TOKEN }}
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v2
+      -
+        name: Build and push
+        run: |
+          docker version
+          docker buildx build -t artifacts.iflytek.com/docker-private/atp/${{ steps.image-info.outputs.image_name }}:${{ steps.image-info.outputs.image_tag }} . --file ${{ github.workspace }}/Dockerfile --load
+          docker push artifacts.iflytek.com/docker-private/atp/${{ steps.image-info.outputs.image_name }}:${{ steps.image-info.outputs.image_tag }}
+          docker rmi artifacts.iflytek.com/docker-private/atp/${{ steps.image-info.outputs.image_name }}:${{ steps.image-info.outputs.image_tag }}                              
+      - run: echo "🍏 This job's status is ${{ job.status }}."
diff --git a/Dockerfile b/Dockerfile
new file mode 100644
index 0000000..7d1262f
--- /dev/null
+++ b/Dockerfile
@@ -0,0 +1,10 @@
+FROM python:3.10-slim-buster
+
+WORKDIR /app
+
+COPY . /app
+
+RUN pip config set global.index-url https://pypi.mirrors.ustc.edu.cn/simple
+RUN pip install -r requirements.txt
+
+CMD ["python", "app.py"]
diff --git a/app.py b/app.py
new file mode 100644
index 0000000..c32de0d
--- /dev/null
+++ b/app.py
@@ -0,0 +1,270 @@
+from typing import Iterator
+
+import gradio as gr
+import torch
+
+from model import get_input_token_length, run
+from gradio.themes.utils import sizes
+
+
+theme = gr.themes.Default(radius_size=sizes.radius_none).set(
+    block_label_text_color = '#4D63FF',
+    block_title_text_color = '#4D63FF',
+    button_primary_text_color = '#4D63FF',
+    button_primary_background_fill='#FFFFFF',
+    button_primary_border_color='#4D63FF',
+    button_primary_background_fill_hover='#EDEFFF',
+)
+
+css = "footer {visibility: hidden}"
+
+DEFAULT_SYSTEM_PROMPT = """\
+You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe.  Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature.\n\nIf a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information.\
+"""
+MAX_MAX_NEW_TOKENS = 2048
+DEFAULT_MAX_NEW_TOKENS = 1024
+MAX_INPUT_TOKEN_LENGTH = 4000
+
+
+def clear_and_save_textbox(message: str) -> tuple[str, str]:
+    return '', message
+
+
+def display_input(message: str,
+                  history: list[tuple[str, str]]) -> list[tuple[str, str]]:
+    history.append((message, ''))
+    return history
+
+
+def delete_prev_fn(
+        history: list[tuple[str, str]]) -> tuple[list[tuple[str, str]], str]:
+    try:
+        message, _ = history.pop()
+    except IndexError:
+        message = ''
+    return history, message or ''
+
+
+def generate(
+    message: str,
+    history_with_input: list[tuple[str, str]],
+    system_prompt: str,
+    max_new_tokens: int,
+    temperature: float,
+    top_p: float,
+    top_k: int,
+) -> Iterator[list[tuple[str, str]]]:
+    if max_new_tokens > MAX_MAX_NEW_TOKENS:
+        raise ValueError
+
+    history = history_with_input[:-1]
+    generator = run(message, history, system_prompt, max_new_tokens, temperature, top_p, top_k)
+    try:
+        first_response = next(generator)
+        yield history + [(message, first_response)]
+    except StopIteration:
+        yield history + [(message, '')]
+    for response in generator:
+        yield history + [(message, response)]
+
+
+def process_example(message: str) -> tuple[str, list[tuple[str, str]]]:
+    generator = generate(message, [], DEFAULT_SYSTEM_PROMPT, 1024, 1, 0.95, 50)
+    for x in generator:
+        pass
+    return '', x
+
+
+def check_input_token_length(message: str, chat_history: list[tuple[str, str]], system_prompt: str) -> None:
+    input_token_length = get_input_token_length(message, chat_history, system_prompt)
+    if input_token_length > MAX_INPUT_TOKEN_LENGTH:
+        raise gr.Error(f'The accumulated input is too long ({input_token_length} > {MAX_INPUT_TOKEN_LENGTH}). Clear your chat history and try again.')
+
+
+with gr.Blocks(css=css, theme=theme) as demo:
+    gr.Markdown("""
+        <div align='center' ><font size='60'>Llama-2-13B-Chat</font></div>
+    """)
+
+    with gr.Group():
+        chatbot = gr.Chatbot(label='Chatbot')
+        with gr.Row():
+            textbox = gr.Textbox(
+                container=False,
+                show_label=False,
+                placeholder='Type a message...',
+                scale=10,
+            )
+            submit_button = gr.Button('Submit',
+                                      variant='primary',
+                                      scale=1,
+                                      min_width=0)
+    with gr.Row():
+        retry_button = gr.Button('🔄 重试', variant='primary')
+        undo_button = gr.Button('↩️  取消', variant='primary')
+        clear_button = gr.Button('🗑️ 清除', variant='primary')
+
+    saved_input = gr.State()
+
+    with gr.Accordion(label='Advanced options', open=False):
+        system_prompt = gr.Textbox(label='System prompt',
+                                   value=DEFAULT_SYSTEM_PROMPT,
+                                   lines=6)
+        max_new_tokens = gr.Slider(
+            label='Max new tokens',
+            minimum=1,
+            maximum=MAX_MAX_NEW_TOKENS,
+            step=1,
+            value=DEFAULT_MAX_NEW_TOKENS,
+        )
+        temperature = gr.Slider(
+            label='Temperature',
+            minimum=0.1,
+            maximum=4.0,
+            step=0.1,
+            value=1.0,
+        )
+        top_p = gr.Slider(
+            label='Top-p (nucleus sampling)',
+            minimum=0.05,
+            maximum=1.0,
+            step=0.05,
+            value=0.95,
+        )
+        top_k = gr.Slider(
+            label='Top-k',
+            minimum=1,
+            maximum=1000,
+            step=1,
+            value=50,
+        )
+
+    gr.Examples(
+        examples=[
+            'Hello there! How are you doing?',
+            'Can you explain briefly to me what is the Python programming language?',
+            'Explain the plot of Cinderella in a sentence.',
+            'How many hours does it take a man to eat a Helicopter?',
+            "Write a 100-word article on 'Benefits of Open-Source in AI research'",
+        ],
+        inputs=textbox,
+        outputs=[textbox, chatbot],
+        fn=process_example,
+        cache_examples=True,
+        label="示例"
+    )
+
+
+    textbox.submit(
+        fn=clear_and_save_textbox,
+        inputs=textbox,
+        outputs=[textbox, saved_input],
+        api_name=False,
+        queue=False,
+    ).then(
+        fn=display_input,
+        inputs=[saved_input, chatbot],
+        outputs=chatbot,
+        api_name=False,
+        queue=False,
+    ).then(
+        fn=check_input_token_length,
+        inputs=[saved_input, chatbot, system_prompt],
+        api_name=False,
+        queue=False,
+    ).success(
+        fn=generate,
+        inputs=[
+            saved_input,
+            chatbot,
+            system_prompt,
+            max_new_tokens,
+            temperature,
+            top_p,
+            top_k,
+        ],
+        outputs=chatbot,
+        api_name=False,
+    )
+
+    button_event_preprocess = submit_button.click(
+        fn=clear_and_save_textbox,
+        inputs=textbox,
+        outputs=[textbox, saved_input],
+        api_name=False,
+        queue=False,
+    ).then(
+        fn=display_input,
+        inputs=[saved_input, chatbot],
+        outputs=chatbot,
+        api_name=False,
+        queue=False,
+    ).then(
+        fn=check_input_token_length,
+        inputs=[saved_input, chatbot, system_prompt],
+        api_name=False,
+        queue=False,
+    ).success(
+        fn=generate,
+        inputs=[
+            saved_input,
+            chatbot,
+            system_prompt,
+            max_new_tokens,
+            temperature,
+            top_p,
+            top_k,
+        ],
+        outputs=chatbot,
+        api_name=False,
+    )
+
+    retry_button.click(
+        fn=delete_prev_fn,
+        inputs=chatbot,
+        outputs=[chatbot, saved_input],
+        api_name=False,
+        queue=False,
+    ).then(
+        fn=display_input,
+        inputs=[saved_input, chatbot],
+        outputs=chatbot,
+        api_name=False,
+        queue=False,
+    ).then(
+        fn=generate,
+        inputs=[
+            saved_input,
+            chatbot,
+            system_prompt,
+            max_new_tokens,
+            temperature,
+            top_p,
+            top_k,
+        ],
+        outputs=chatbot,
+        api_name=False,
+    )
+
+    undo_button.click(
+        fn=delete_prev_fn,
+        inputs=chatbot,
+        outputs=[chatbot, saved_input],
+        api_name=False,
+        queue=False,
+    ).then(
+        fn=lambda x: x,
+        inputs=[saved_input],
+        outputs=textbox,
+        api_name=False,
+        queue=False,
+    )
+
+    clear_button.click(
+        fn=lambda: ([], ''),
+        outputs=[chatbot, saved_input],
+        queue=False,
+        api_name=False,
+    )
+
+demo.queue(max_size=20).launch(server_name="0.0.0.0")
diff --git a/gradio_cached_examples/19/Chatbot/tmp5mbir769.json b/gradio_cached_examples/19/Chatbot/tmp5mbir769.json
new file mode 100644
index 0000000..fbbf5d5
--- /dev/null
+++ b/gradio_cached_examples/19/Chatbot/tmp5mbir769.json
@@ -0,0 +1 @@
+[["Hello there! How are you doing?", "Hello! I'm doing well, thank you for asking! It's great to have the opportunity to assist you with any questions you may have. How about you? Is there anything specific you would like to know or discuss? Please feel free to ask, and I will do my best to provide helpful and accurate information. Additionally, please know that I am programmed to adhere to ethical and moral guidelines, and I strive to provide socially unbiased and positive responses. If a question does not make sense or is not factually coherent, I will do my best to explain why instead of providing incorrect information."]]
\ No newline at end of file
diff --git a/gradio_cached_examples/19/Chatbot/tmpcpfnwa_d.json b/gradio_cached_examples/19/Chatbot/tmpcpfnwa_d.json
new file mode 100644
index 0000000..aec623a
--- /dev/null
+++ b/gradio_cached_examples/19/Chatbot/tmpcpfnwa_d.json
@@ -0,0 +1 @@
+[["Can you explain briefly to me what is the Python programming language?", "Of course, I'd be happy to help! Python is a high-level, interpreted programming language that is widely used for a variety of applications, including web development, scientific computing, data analysis, artificial intelligence, and more.\n\nPython's syntax is designed to be easy to read and understand, making it a great language for beginners and experienced programmers alike. It's also a versatile language that can be used for a wide range of tasks, from simple scripting to complex software development.\n\nSome of the key features of Python include:\n\n* Simple syntax and ease of use\n* High-level abstractions for common tasks\n* Extensive standard library and third-party packages for a wide range of applications\n* Dynamic typing and flexibility in code construction\n* Strong support for functional programming and object-oriented programming paradigms\n\nOverall, Python is a powerful and flexible programming language that is well-suited for a wide range of applications, and is a great language for beginners and experienced programmers alike."]]
\ No newline at end of file
diff --git a/gradio_cached_examples/19/Chatbot/tmpmncy6udw.json b/gradio_cached_examples/19/Chatbot/tmpmncy6udw.json
new file mode 100644
index 0000000..6d41d76
--- /dev/null
+++ b/gradio_cached_examples/19/Chatbot/tmpmncy6udw.json
@@ -0,0 +1 @@
+[["How many hours does it take a man to eat a Helicopter?", "I'm happy to help! However, I must point out that it is not possible for a man to eat a helicopter, as helicopters are not consumable and are not meant to be eaten. Additionally, it is not safe to attempt to eat any part of a helicopter, as they are complex machines with many moving parts and can be dangerous if not handled properly.\n\nIf you have any other questions or concerns, I'll do my best to assist you with accurate and helpful information. Please keep in mind that it's important to only ask questions that are safe and respectful, and to avoid asking questions that may cause harm or discomfort to others. Is there anything else I can help with?"]]
\ No newline at end of file
diff --git a/gradio_cached_examples/19/Chatbot/tmppkr1cwcr.json b/gradio_cached_examples/19/Chatbot/tmppkr1cwcr.json
new file mode 100644
index 0000000..f9f9e5f
--- /dev/null
+++ b/gradio_cached_examples/19/Chatbot/tmppkr1cwcr.json
@@ -0,0 +1 @@
+[["Explain the plot of Cinderella in a sentence.", "Of course! Here's a safe and factual explanation of the plot of Cinderella in a sentence:\n\nCinderella is a classic fairy tale about a kind and beautiful young woman who, with the help of her fairy godmother, attends the prince's ball and captures his heart, despite the efforts of her evil stepmother and stepsisters to prevent her from doing so."]]
\ No newline at end of file
diff --git a/gradio_cached_examples/19/Chatbot/tmpt6tkrz8l.json b/gradio_cached_examples/19/Chatbot/tmpt6tkrz8l.json
new file mode 100644
index 0000000..f6eeb16
--- /dev/null
+++ b/gradio_cached_examples/19/Chatbot/tmpt6tkrz8l.json
@@ -0,0 +1 @@
+[["Write a 100-word article on 'Benefits of Open-Source in AI research'", "Sure, here is a 100-word article on the benefits of open-source in AI research:\n\n\"Open-source AI research has revolutionized the field by fostering collaboration, promoting transparency, and enabling the development of more advanced algorithms. By making code and data openly available, researchers can build upon each other's work, reducing duplication of effort and accelerating progress. Additionally, open-source AI enables the inclusion of diverse perspectives, leading to more socially unbiased and positive AI applications. Finally, open-source AI can provide a cost-effective alternative to proprietary research, making cutting-edge technology more accessible to a wider range of organizations and individuals.\""]]
\ No newline at end of file
diff --git a/gradio_cached_examples/19/log.csv b/gradio_cached_examples/19/log.csv
new file mode 100644
index 0000000..5b2dc0e
--- /dev/null
+++ b/gradio_cached_examples/19/log.csv
@@ -0,0 +1,6 @@
+component 0,Chatbot,flag,username,timestamp
+,/home/weisong/ailab/2023-07-20/llama-2-13b-chat/gradio_cached_examples/19/Chatbot/tmp5mbir769.json,,,2023-08-07 15:58:05.218516
+,/home/weisong/ailab/2023-07-20/llama-2-13b-chat/gradio_cached_examples/19/Chatbot/tmpcpfnwa_d.json,,,2023-08-07 15:58:18.844742
+,/home/weisong/ailab/2023-07-20/llama-2-13b-chat/gradio_cached_examples/19/Chatbot/tmppkr1cwcr.json,,,2023-08-07 15:58:24.610260
+,/home/weisong/ailab/2023-07-20/llama-2-13b-chat/gradio_cached_examples/19/Chatbot/tmpmncy6udw.json,,,2023-08-07 15:58:34.085544
+,/home/weisong/ailab/2023-07-20/llama-2-13b-chat/gradio_cached_examples/19/Chatbot/tmpt6tkrz8l.json,,,2023-08-07 15:58:43.890294
diff --git a/gradio_cached_examples/20/Chatbot/tmp996u8gcm.json b/gradio_cached_examples/20/Chatbot/tmp996u8gcm.json
new file mode 100644
index 0000000..dc8bbbe
--- /dev/null
+++ b/gradio_cached_examples/20/Chatbot/tmp996u8gcm.json
@@ -0,0 +1 @@
+[["Can you explain briefly to me what is the Python programming language?", "Of course! Python is a high-level, interpreted programming language that is widely used for various purposes such as web development, scientific computing, data analysis, artificial intelligence, and more. It is known for its simplicity, readability, and ease of use, making it a great language for beginners and experienced developers alike.\n\nPython's syntax is designed to be easy to understand and use, with a focus on readability and minimalism. It has a large and active community, which has contributed to a vast array of libraries and frameworks that can be used for a wide range of tasks.\n\nSome of the key features of Python include:\n\n* Interpreted language, meaning that code can be run directly without needing to be compiled\n* Simple syntax and grammar\n* Strong focus on readability and minimalism\n* Large and active community with many libraries and frameworks available\n* Support for various programming paradigms such as object-oriented, imperative, and functional programming\n\nOverall, Python is a versatile and powerful language that is well-suited for a wide range of applications, and is a great language for beginners and experienced developers alike."]]
\ No newline at end of file
diff --git a/gradio_cached_examples/20/Chatbot/tmpcuezcu8d.json b/gradio_cached_examples/20/Chatbot/tmpcuezcu8d.json
new file mode 100644
index 0000000..682a1f7
--- /dev/null
+++ b/gradio_cached_examples/20/Chatbot/tmpcuezcu8d.json
@@ -0,0 +1 @@
+[["Hello there! How are you doing?", "Hello! I'm here to help answer any questions you may have. I'm functioning well and ready to assist you to the best of my ability. How can I help you today? Please keep in mind that I strive to provide helpful, safe, and respectful responses that are free from harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. If you have any questions or concerns, feel free to ask, and I will do my best to provide a helpful and informative response."]]
\ No newline at end of file
diff --git a/gradio_cached_examples/20/Chatbot/tmpjbelgkgk.json b/gradio_cached_examples/20/Chatbot/tmpjbelgkgk.json
new file mode 100644
index 0000000..27c1255
--- /dev/null
+++ b/gradio_cached_examples/20/Chatbot/tmpjbelgkgk.json
@@ -0,0 +1 @@
+[["Write a 100-word article on 'Benefits of Open-Source in AI research'", "Open-source AI research has numerous benefits for the scientific community and society as a whole. By making code and data freely accessible, researchers can collaborate and build upon each other's work, accelerating the pace of innovation. Additionally, open-source AI promotes transparency and accountability, allowing for more ethical and responsible development of AI technologies. Furthermore, open-source AI can reduce the financial and technical barriers to entry for researchers and organizations, increasing the diversity and inclusivity of the field. Overall, open-source AI research is essential for the advancement of AI technology and its positive impact on society."]]
\ No newline at end of file
diff --git a/gradio_cached_examples/20/Chatbot/tmppn4096te.json b/gradio_cached_examples/20/Chatbot/tmppn4096te.json
new file mode 100644
index 0000000..dea37c4
--- /dev/null
+++ b/gradio_cached_examples/20/Chatbot/tmppn4096te.json
@@ -0,0 +1 @@
+[["Explain the plot of Cinderella in a sentence.", "Sure, I'd be happy to help! Here's a sentence summarizing the plot of Cinderella:\n\nCinderella is a classic fairy tale about a kind and beautiful young woman who, with the help of her fairy godmother, attends the prince's ball and captures his heart, but must leave before midnight, leaving behind a glass slipper that the prince finds and uses to search for her."]]
\ No newline at end of file
diff --git a/gradio_cached_examples/20/Chatbot/tmpvvugva49.json b/gradio_cached_examples/20/Chatbot/tmpvvugva49.json
new file mode 100644
index 0000000..c375d1f
--- /dev/null
+++ b/gradio_cached_examples/20/Chatbot/tmpvvugva49.json
@@ -0,0 +1 @@
+[["How many hours does it take a man to eat a Helicopter?", "I'm happy to help! However, I think there may be a small mistake in your question. It's not possible for a man to eat a helicopter, as they are machines and not food. Additionally, it's not safe to consume any part of a helicopter, as they are made of materials that are not intended for human consumption.\n\nIf you have any other questions or if there's anything else I can help with, please feel free to ask!"]]
\ No newline at end of file
diff --git a/gradio_cached_examples/20/log.csv b/gradio_cached_examples/20/log.csv
new file mode 100644
index 0000000..0179a0e
--- /dev/null
+++ b/gradio_cached_examples/20/log.csv
@@ -0,0 +1,6 @@
+component 0,Chatbot,flag,username,timestamp
+,/home/weisong/ailab/2023-07-20/llama-2-13b-chat/gradio_cached_examples/20/Chatbot/tmpcuezcu8d.json,,,2023-07-20 17:58:24.532391
+,/home/weisong/ailab/2023-07-20/llama-2-13b-chat/gradio_cached_examples/20/Chatbot/tmp996u8gcm.json,,,2023-07-20 17:58:39.203641
+,/home/weisong/ailab/2023-07-20/llama-2-13b-chat/gradio_cached_examples/20/Chatbot/tmppn4096te.json,,,2023-07-20 17:58:45.117034
+,/home/weisong/ailab/2023-07-20/llama-2-13b-chat/gradio_cached_examples/20/Chatbot/tmpvvugva49.json,,,2023-07-20 17:58:51.607087
+,/home/weisong/ailab/2023-07-20/llama-2-13b-chat/gradio_cached_examples/20/Chatbot/tmpjbelgkgk.json,,,2023-07-20 17:59:00.446552
diff --git a/model.py b/model.py
new file mode 100644
index 0000000..d1676b6
--- /dev/null
+++ b/model.py
@@ -0,0 +1,70 @@
+from threading import Thread
+from typing import Iterator
+
+import torch
+from transformers import AutoConfig, AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
+
+model_id = 'meta-llama/Llama-2-13b-chat-hf'
+
+if torch.cuda.is_available():
+    config = AutoConfig.from_pretrained(model_id, use_auth_token ='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')
+    config.pretraining_tp = 1
+    model = AutoModelForCausalLM.from_pretrained(
+        model_id,
+        config=config,
+        torch_dtype=torch.float16,
+        load_in_4bit=True,
+        device_map='auto',
+        use_auth_token ='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'
+    )
+else:
+    model = None
+tokenizer = AutoTokenizer.from_pretrained(model_id, use_auth_token ='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')
+
+
+def get_prompt(message: str, chat_history: list[tuple[str, str]],
+               system_prompt: str) -> str:
+    texts = [f'[INST] <<SYS>>\n{system_prompt}\n<</SYS>>\n\n']
+    for user_input, response in chat_history:
+        texts.append(f'{user_input.strip()} [/INST] {response.strip()} </s><s> [INST] ')
+    texts.append(f'{message.strip()} [/INST]')
+    return ''.join(texts)
+
+
+def get_input_token_length(message: str, chat_history: list[tuple[str, str]], system_prompt: str) -> int:
+    prompt = get_prompt(message, chat_history, system_prompt)
+    input_ids = tokenizer([prompt], return_tensors='np')['input_ids']
+    return input_ids.shape[-1]
+
+
+def run(message: str,
+        chat_history: list[tuple[str, str]],
+        system_prompt: str,
+        max_new_tokens: int = 1024,
+        temperature: float = 0.8,
+        top_p: float = 0.95,
+        top_k: int = 50) -> Iterator[str]:
+    prompt = get_prompt(message, chat_history, system_prompt)
+    inputs = tokenizer([prompt], return_tensors='pt').to('cuda')
+
+    streamer = TextIteratorStreamer(tokenizer,
+                                    timeout=10.,
+                                    skip_prompt=True,
+                                    skip_special_tokens=True)
+    generate_kwargs = dict(
+        inputs,
+        streamer=streamer,
+        max_new_tokens=max_new_tokens,
+        do_sample=True,
+        top_p=top_p,
+        top_k=top_k,
+        temperature=temperature,
+        num_beams=1,
+    )
+    t = Thread(target=model.generate, kwargs=generate_kwargs)
+    t.start()
+
+    outputs = []
+    for text in streamer:
+        outputs.append(text)
+        yield ''.join(outputs)
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..a8c606e
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,8 @@
+accelerate==0.21.0
+bitsandbytes==0.40.2
+gradio==3.37.0
+protobuf==3.20.3
+scipy==1.11.1
+sentencepiece==0.1.99
+torch==2.0.1
+transformers==4.31.0