Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import requests | |
| from PIL import Image | |
| import io | |
| from huggingface_hub import InferenceClient | |
| # Streamlit page setup | |
| st.set_page_config(page_title="MTSS Image Accessibility Alt Text Generator", layout="centered") | |
| # Add the logo image with a specified width | |
| image_width = 300 # Set the desired width in pixels | |
| st.image('MTSS.ai_Logo.png', width=image_width) | |
| st.header('VisionTexts™ | Accessibility') | |
| st.subheader('Image Alt Text Creator') | |
| # Retrieve the Hugging Face API Key from secrets | |
| huggingface_api_key = st.secrets["huggingface_api_key"] | |
| # Initialize the Hugging Face inference client | |
| client = InferenceClient(token=huggingface_api_key) | |
| # File uploader allows user to add their own image | |
| uploaded_file = st.file_uploader("Upload an image", type=["jpg", "png", "jpeg"]) | |
| if uploaded_file: | |
| # Display the uploaded image | |
| image = Image.open(uploaded_file).convert('RGB') | |
| image_width = 200 # Set the desired width in pixels | |
| with st.expander("Image", expanded=True): | |
| st.image(image, caption=uploaded_file.name, width=image_width, use_column_width=False) | |
| else: | |
| st.warning("Please upload an image.") | |
| # Option for adding additional details | |
| show_details = st.checkbox("Add additional details about the image.", value=False) | |
| if show_details: | |
| # Text input for additional details about the image | |
| additional_details = st.text_area( | |
| "Provide specific information that is important to include in the alt text or reflect why the image is being used:" | |
| ) | |
| else: | |
| additional_details = "" | |
| # Button to trigger the analysis | |
| analyze_button = st.button("Analyze the Image", type="secondary") | |
| # Prompt for complex image description | |
| complex_image_prompt_text = ( | |
| "As an expert in image accessibility and alternative text, thoroughly describe the image caption provided. " | |
| "Provide a detailed description using not more than 500 characters that conveys the essential information in eight or fewer clear and concise sentences. " | |
| "Skip phrases like 'image of' or 'picture of.' " | |
| "Your description should form a clear, well-structured, and factual paragraph that avoids bullet points, focusing on creating a seamless narrative. " | |
| "Importantly, only describe what is visibly present in the image and avoid making assumptions or adding extraneous information. " | |
| "Stick to the facts and ensure the description is accurate and reliable." | |
| ) | |
| # Functions to query the Hugging Face Inference API | |
| def query_image_caption(image): | |
| # Convert PIL image to bytes | |
| buffered = io.BytesIO() | |
| image.save(buffered, format="JPEG") | |
| image_bytes = buffered.getvalue() | |
| # Use the InferenceClient's image_to_text method | |
| response = client.image_to_text( | |
| # model="Salesforce/blip-image-captioning-large", | |
| model="nlpconnect/vit-gpt2-image-captioning", | |
| image=image_bytes, | |
| ) | |
| return response | |
| def query_llm(prompt): | |
| # System prompt (optional) | |
| system_prompt = "You are an expert in image accessibility and alternative text." | |
| # Generate the response using the Hugging Face InferenceClient's chat completion | |
| response = client.chat.completions.create( | |
| model="meta-llama/Llama-2-7b-chat-hf", | |
| messages=[ | |
| {"role": "system", "content": system_prompt}, # Optional system prompt | |
| {"role": "user", "content": prompt} | |
| ], | |
| stream=True, | |
| temperature=0.5, | |
| max_tokens=1024, | |
| top_p=0.7 | |
| ) | |
| # Collect the streamed response | |
| response_content = "" | |
| for message in response: | |
| if "choices" in message and len(message["choices"]) > 0: | |
| delta = message["choices"][0].get("delta", {}) | |
| content = delta.get("content", "") | |
| response_content += content | |
| # Optionally, you can update the progress to the user here | |
| return response_content.strip() | |
| # Check if an image has been uploaded and if the button has been pressed | |
| if uploaded_file is not None and analyze_button: | |
| with st.spinner("Analyzing the image..."): | |
| # Get the caption from the image using the image captioning API | |
| caption_response = query_image_caption(image) | |
| # Handle potential errors from the API | |
| if isinstance(caption_response, dict) and caption_response.get("error"): | |
| st.error(f"Error with image captioning model: {caption_response['error']}") | |
| else: | |
| # Since caption_response is a string, assign it directly | |
| image_caption = caption_response | |
| # Use the complex image prompt text | |
| prompt_text = complex_image_prompt_text | |
| # Include additional details if provided | |
| if additional_details: | |
| prompt_text += f"\n\nAdditional context provided by the user:\n{additional_details}" | |
| # Create the full prompt | |
| full_prompt = f"{prompt_text}\n\nImage Caption: {image_caption}" | |
| # Use the language model to generate the alt text description | |
| llm_response = query_llm(full_prompt) | |
| # Display the generated alt text | |
| st.markdown("### Generated Alt Text:") | |
| st.write(llm_response) | |
| st.success('Powered by MTSS GPT. AI can make mistakes. Consider checking important information.') | |
| else: | |
| st.write("Please upload an image and click 'Analyze the Image' to generate alt text.") |