commit b5a168bda8e5777784a1acdb261b4c4c12d865b5 Author: jianjiang Date: Fri Apr 21 12:15:32 2023 +0800 ok diff --git a/.gitea/workflow/build.yaml b/.gitea/workflow/build.yaml new file mode 100644 index 0000000..1ed1ca9 --- /dev/null +++ b/.gitea/workflow/build.yaml @@ -0,0 +1,48 @@ +name: Build +run-name: ${{ github.actor }} is upgrade release ๐Ÿš€ +on: [push] +env: + REPOSITORY: ${{ github.repository }} + COMMIT_ID: ${{ github.sha }} +jobs: + Build-Deploy-Actions: + runs-on: ubuntu-latest + steps: + - run: echo "๐ŸŽ‰ The job was automatically triggered by a ${{ github.event_name }} event." + - run: echo "๐Ÿง This job is now running on a ${{ runner.os }} server hosted by Gitea!" + - run: echo "๐Ÿ”Ž The name of your branch is ${{ github.ref }} and your repository is ${{ github.repository }}." + - name: Check out repository code + uses: actions/checkout@v3 + - + name: Setup Git LFS + run: | + git lfs install + git lfs fetch + git lfs checkout + - name: List files in the repository + run: | + ls ${{ github.workspace }} + - + name: Docker Image Info + id: image-info + run: | + echo "::set-output name=image_name::$(echo $REPOSITORY | tr '[:upper:]' '[:lower:]')" + echo "::set-output name=image_tag::${COMMIT_ID:0:10}" + - + name: Login to Docker Hub + uses: docker/login-action@v2 + with: + registry: artifacts.iflytek.com + username: ${{ secrets.DOCKERHUB_USERNAME }} + password: ${{ secrets.DOCKERHUB_TOKEN }} + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v2 + - + name: Build and push + run: | + docker version + docker buildx build -t artifacts.iflytek.com/docker-private/atp/${{ steps.image-info.outputs.image_name }}:${{ steps.image-info.outputs.image_tag }} . --file ${{ github.workspace }}/Dockerfile --load + docker push artifacts.iflytek.com/docker-private/atp/${{ steps.image-info.outputs.image_name }}:${{ steps.image-info.outputs.image_tag }} + docker rmi artifacts.iflytek.com/docker-private/atp/${{ steps.image-info.outputs.image_name }}:${{ steps.image-info.outputs.image_tag }} + - run: echo "๐Ÿ This job's status is ${{ job.status }}." + diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..671699b --- /dev/null +++ b/Dockerfile @@ -0,0 +1,10 @@ +FROM python:3.8.13 + +WORKDIR /app + +COPY . /app + +RUN pip config set global.index-url https://pypi.mirrors.ustc.edu.cn/simple +RUN pip install -r requirements.txt + +CMD ["python", "app.py"] diff --git a/app.py b/app.py new file mode 100644 index 0000000..439f5f3 --- /dev/null +++ b/app.py @@ -0,0 +1,60 @@ +from transformers import AutoModelForSequenceClassification, AutoTokenizer +from languages import LANGUANGE_MAP +import gradio as gr +import torch +from gradio.themes.utils import sizes + +#model_ckpt = "ivanlau/language-detection-fine-tuned-on-xlm-roberta-base" +model_ckpt = "papluca/xlm-roberta-base-language-detection" +model = AutoModelForSequenceClassification.from_pretrained(model_ckpt) +tokenizer = AutoTokenizer.from_pretrained(model_ckpt) + +theme = gr.themes.Default(radius_size=sizes.radius_none).set( + block_label_text_color = '#4D63FF', + block_title_text_color = '#4D63FF', + button_primary_text_color = '#4D63FF', + button_primary_background_fill='#FFFFFF', + button_primary_border_color='#4D63FF', + button_primary_background_fill_hover='#EDEFFF', +) + + +def detect_language(sentence): + tokenized_sentence = tokenizer(sentence, return_tensors='pt') + output = model(**tokenized_sentence) + predictions = torch.nn.functional.softmax(output.logits, dim=-1) + _, preds = torch.max(predictions, dim=-1) + return LANGUANGE_MAP[preds.item()] + +examples = [ + "ๆญๅ–œๅ‘่ดข!", + "Jumpa lagi, saya pergi kerja.", + "ไฝ ้ฃŸๅ’—้ฃฏๆœชๅ‘€?", + "ใ‚‚ใ†้ฃŸในใพใ—ใŸใ‹?", + "as-tu mangรฉ", + "ุฃุฑูŠุฏ ุฃู† ุฃู„ุนุจ ูƒุฑุฉ ุงู„ุฑูŠุดุฉ" +] + +inputs=gr.inputs.Textbox(placeholder="Enter your text here", label="Text content", lines=5) +outputs=gr.outputs.Label(label="Language detected:") +article = """ +Fine-tuned on xlm-roberta-base model.\n +Supported languages:\n + 'Arabic', 'Basque', 'Breton', 'Catalan', 'Chinese_China', 'Chinese_Hongkong', 'Chinese_Taiwan', 'Chuvash', 'Czech', + 'Dhivehi', 'Dutch', 'English', 'Esperanto', 'Estonian', 'French', 'Frisian', 'Georgian', 'German', 'Greek', 'Hakha_Chin', + 'Indonesian', 'Interlingua', 'Italian', 'Japanese', 'Kabyle', 'Kinyarwanda', 'Kyrgyz', 'Latvian', 'Maltese', + 'Mangolian', 'Persian', 'Polish', 'Portuguese', 'Romanian', 'Romansh_Sursilvan', 'Russian', 'Sakha', 'Slovenian', + 'Spanish', 'Swedish', 'Tamil', 'Tatar', 'Turkish', 'Ukranian', 'Welsh' +""" + +gr.Interface( + fn=detect_language, + inputs=inputs, + outputs=outputs, + verbose=True, + examples = examples, + title="Language Detector ๐Ÿ” ", + description="A simple interface to detect 45 languages.", + article=article, + theme=theme +).launch() diff --git a/languages.py b/languages.py new file mode 100644 index 0000000..42189cb --- /dev/null +++ b/languages.py @@ -0,0 +1,47 @@ +LANGUANGE_MAP = { + 0: 'Arabic', + 1: 'Basque', + 2: 'Breton', + 3: 'Catalan', + 4: 'Chinese_China', + 5: 'Chinese_Hongkong', + 6: 'Chinese_Taiwan', + 7: 'Chuvash', + 8: 'Czech', + 9: 'Dhivehi', + 10: 'Dutch', + 11: 'English', + 12: 'Esperanto', + 13: 'Estonian', + 14: 'French', + 15: 'Frisian', + 16: 'Georgian', + 17: 'German', + 18: 'Greek', + 19: 'Hakha_Chin', + 20: 'Indonesian', + 21: 'Interlingua', + 22: 'Italian', + 23: 'Japanese', + 24: 'Kabyle', + 25: 'Kinyarwanda', + 26: 'Kyrgyz', + 27: 'Latvian', + 28: 'Maltese', + 29: 'Mongolian', + 30: 'Persian', + 31: 'Polish', + 32: 'Portuguese', + 33: 'Romanian', + 34: 'Romansh_Sursilvan', + 35: 'Russian', + 36: 'Sakha', + 37: 'Slovenian', + 38: 'Spanish', + 39: 'Swedish', + 40: 'Tamil', + 41: 'Tatar', + 42: 'Turkish', + 43: 'Ukranian', + 44: 'Welsh' + } diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..f8e1863 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,3 @@ +gradio +transformers +torch