diff --git a/packages/markitdown-mcp/src/markitdown_mcp/__main__.py b/packages/markitdown-mcp/src/markitdown_mcp/__main__.py index 32b7527..9e404ab 100644 --- a/packages/markitdown-mcp/src/markitdown_mcp/__main__.py +++ b/packages/markitdown-mcp/src/markitdown_mcp/__main__.py @@ -1,5 +1,4 @@ import sys -from typing import Any from mcp.server.fastmcp import FastMCP from starlette.applications import Starlette from mcp.server.sse import SseServerTransport diff --git a/packages/markitdown-sample-plugin/tests/test_sample_plugin.py b/packages/markitdown-sample-plugin/tests/test_sample_plugin.py index 6d0102d..6968247 100644 --- a/packages/markitdown-sample-plugin/tests/test_sample_plugin.py +++ b/packages/markitdown-sample-plugin/tests/test_sample_plugin.py @@ -1,6 +1,5 @@ #!/usr/bin/env python3 -m pytest import os -import pytest from markitdown import MarkItDown, StreamInfo from markitdown_sample_plugin import RtfConverter diff --git a/packages/markitdown/src/markitdown/__main__.py b/packages/markitdown/src/markitdown/__main__.py index cfb1c6e..6085ad6 100644 --- a/packages/markitdown/src/markitdown/__main__.py +++ b/packages/markitdown/src/markitdown/__main__.py @@ -4,7 +4,6 @@ import argparse import sys import codecs -import locale from textwrap import dedent from importlib.metadata import entry_points from .__about__ import __version__ @@ -34,13 +33,13 @@ def main(): OR markitdown < example.pdf - + OR to save to a file use - + markitdown example.pdf -o example.md - + OR - + markitdown example.pdf > example.md """ ).strip(), diff --git a/packages/markitdown/src/markitdown/_base_converter.py b/packages/markitdown/src/markitdown/_base_converter.py index 2f0ca9d..a6f2a2d 100644 --- a/packages/markitdown/src/markitdown/_base_converter.py +++ b/packages/markitdown/src/markitdown/_base_converter.py @@ -1,7 +1,4 @@ -import os -import tempfile -from warnings import warn -from typing import Any, Union, BinaryIO, Optional, List +from typing import Any, BinaryIO, Optional from ._stream_info import StreamInfo diff --git a/packages/markitdown/src/markitdown/_markitdown.py b/packages/markitdown/src/markitdown/_markitdown.py index 682902b..52a5e28 100644 --- a/packages/markitdown/src/markitdown/_markitdown.py +++ b/packages/markitdown/src/markitdown/_markitdown.py @@ -1,11 +1,8 @@ -import copy import mimetypes import os import re import sys import shutil -import tempfile -import warnings import traceback import io from dataclasses import dataclass @@ -547,7 +544,7 @@ class MarkItDown: # Sanity check -- make sure the cur_pos is still the same assert ( cur_pos == file_stream.tell() - ), f"File stream position should NOT change between guess iterations" + ), "File stream position should NOT change between guess iterations" _kwargs = {k: v for k, v in kwargs.items()} @@ -614,7 +611,7 @@ class MarkItDown: # Nothing can handle it! raise UnsupportedFormatException( - f"Could not convert stream to Markdown. No converter attempted a conversion, suggesting that the filetype is simply not supported." + "Could not convert stream to Markdown. No converter attempted a conversion, suggesting that the filetype is simply not supported." ) def register_page_converter(self, converter: DocumentConverter) -> None: diff --git a/packages/markitdown/src/markitdown/converter_utils/docx/math/omml.py b/packages/markitdown/src/markitdown/converter_utils/docx/math/omml.py index 012b95d..dfa734c 100644 --- a/packages/markitdown/src/markitdown/converter_utils/docx/math/omml.py +++ b/packages/markitdown/src/markitdown/converter_utils/docx/math/omml.py @@ -272,7 +272,7 @@ class oMath2Latex(Tag2Method): if FUNC.get(t): latex_chars.append(FUNC[t]) else: - raise NotImplemented("Not support func %s" % t) + raise NotImplementedError("Not support func %s" % t) else: latex_chars.append(t) t = BLANK.join(latex_chars) @@ -316,7 +316,7 @@ class oMath2Latex(Tag2Method): t_dict = self.process_children_dict(elm, include=("e", "lim")) latex_s = LIM_FUNC.get(t_dict["e"]) if not latex_s: - raise NotImplemented("Not support lim %s" % t_dict["e"]) + raise NotImplementedError("Not support lim %s" % t_dict["e"]) else: return latex_s.format(lim=t_dict.get("lim")) diff --git a/packages/markitdown/src/markitdown/converter_utils/docx/pre_process.py b/packages/markitdown/src/markitdown/converter_utils/docx/pre_process.py index 78552bc..d6fa8db 100644 --- a/packages/markitdown/src/markitdown/converter_utils/docx/pre_process.py +++ b/packages/markitdown/src/markitdown/converter_utils/docx/pre_process.py @@ -147,7 +147,7 @@ def pre_process_docx(input_docx: BinaryIO) -> BinaryIO: updated_content = _pre_process_math(content) # In the future, if there are more pre-processing steps, they can be added here zip_output.writestr(name, updated_content) - except: + except Exception: # If there is an error in processing the content, write the original content zip_output.writestr(name, content) else: diff --git a/packages/markitdown/src/markitdown/converters/_audio_converter.py b/packages/markitdown/src/markitdown/converters/_audio_converter.py index 845ad5d..3d96b53 100644 --- a/packages/markitdown/src/markitdown/converters/_audio_converter.py +++ b/packages/markitdown/src/markitdown/converters/_audio_converter.py @@ -1,5 +1,4 @@ -import io -from typing import Any, BinaryIO, Optional +from typing import Any, BinaryIO from ._exiftool import exiftool_metadata from ._transcribe_audio import transcribe_audio diff --git a/packages/markitdown/src/markitdown/converters/_bing_serp_converter.py b/packages/markitdown/src/markitdown/converters/_bing_serp_converter.py index f65b85f..6a38348 100644 --- a/packages/markitdown/src/markitdown/converters/_bing_serp_converter.py +++ b/packages/markitdown/src/markitdown/converters/_bing_serp_converter.py @@ -1,9 +1,8 @@ -import io import re import base64 import binascii from urllib.parse import parse_qs, urlparse -from typing import Any, BinaryIO, Optional +from typing import Any, BinaryIO from bs4 import BeautifulSoup from .._base_converter import DocumentConverter, DocumentConverterResult diff --git a/packages/markitdown/src/markitdown/converters/_csv_converter.py b/packages/markitdown/src/markitdown/converters/_csv_converter.py index 7162889..7e9631e 100644 --- a/packages/markitdown/src/markitdown/converters/_csv_converter.py +++ b/packages/markitdown/src/markitdown/converters/_csv_converter.py @@ -1,9 +1,7 @@ -import sys import csv import io from typing import BinaryIO, Any from charset_normalizer import from_bytes -from ._html_converter import HtmlConverter from .._base_converter import DocumentConverter, DocumentConverterResult from .._stream_info import StreamInfo diff --git a/packages/markitdown/src/markitdown/converters/_doc_intel_converter.py b/packages/markitdown/src/markitdown/converters/_doc_intel_converter.py index d2dce91..ba66b5b 100644 --- a/packages/markitdown/src/markitdown/converters/_doc_intel_converter.py +++ b/packages/markitdown/src/markitdown/converters/_doc_intel_converter.py @@ -1,13 +1,12 @@ import sys import re import os -from typing import BinaryIO, Any, List, Optional, Union +from typing import BinaryIO, Any, List from enum import Enum -from ._html_converter import HtmlConverter from .._base_converter import DocumentConverter, DocumentConverterResult from .._stream_info import StreamInfo -from .._exceptions import MissingDependencyException, MISSING_DEPENDENCY_MESSAGE +from .._exceptions import MissingDependencyException # Try loading optional (but in this case, required) dependencies # Save reporting of any exceptions for later diff --git a/packages/markitdown/src/markitdown/converters/_docx_converter.py b/packages/markitdown/src/markitdown/converters/_docx_converter.py index b320695..69c1ea8 100644 --- a/packages/markitdown/src/markitdown/converters/_docx_converter.py +++ b/packages/markitdown/src/markitdown/converters/_docx_converter.py @@ -4,7 +4,7 @@ from typing import BinaryIO, Any from ._html_converter import HtmlConverter from ..converter_utils.docx.pre_process import pre_process_docx -from .._base_converter import DocumentConverter, DocumentConverterResult +from .._base_converter import DocumentConverterResult from .._stream_info import StreamInfo from .._exceptions import MissingDependencyException, MISSING_DEPENDENCY_MESSAGE diff --git a/packages/markitdown/src/markitdown/converters/_epub_converter.py b/packages/markitdown/src/markitdown/converters/_epub_converter.py index 73f2955..3be65b0 100644 --- a/packages/markitdown/src/markitdown/converters/_epub_converter.py +++ b/packages/markitdown/src/markitdown/converters/_epub_converter.py @@ -6,7 +6,7 @@ from xml.dom.minidom import Document from typing import BinaryIO, Any, Dict, List from ._html_converter import HtmlConverter -from .._base_converter import DocumentConverter, DocumentConverterResult +from .._base_converter import DocumentConverterResult from .._stream_info import StreamInfo ACCEPTED_MIME_TYPE_PREFIXES = [ diff --git a/packages/markitdown/src/markitdown/converters/_exiftool.py b/packages/markitdown/src/markitdown/converters/_exiftool.py index 43c100f..1af155f 100644 --- a/packages/markitdown/src/markitdown/converters/_exiftool.py +++ b/packages/markitdown/src/markitdown/converters/_exiftool.py @@ -1,10 +1,6 @@ import json import subprocess import locale -import sys -import shutil -import os -import warnings from typing import BinaryIO, Any, Union diff --git a/packages/markitdown/src/markitdown/converters/_ipynb_converter.py b/packages/markitdown/src/markitdown/converters/_ipynb_converter.py index f8ba193..b15e77a 100644 --- a/packages/markitdown/src/markitdown/converters/_ipynb_converter.py +++ b/packages/markitdown/src/markitdown/converters/_ipynb_converter.py @@ -50,8 +50,6 @@ class IpynbConverter(DocumentConverter): **kwargs: Any, # Options to pass to the converter ) -> DocumentConverterResult: # Parse and convert the notebook - result = None - encoding = stream_info.charset or "utf-8" notebook_content = file_stream.read().decode(encoding=encoding) return self._convert(json.loads(notebook_content)) diff --git a/packages/markitdown/src/markitdown/converters/_llm_caption.py b/packages/markitdown/src/markitdown/converters/_llm_caption.py index b851dc8..004a47a 100644 --- a/packages/markitdown/src/markitdown/converters/_llm_caption.py +++ b/packages/markitdown/src/markitdown/converters/_llm_caption.py @@ -1,4 +1,4 @@ -from typing import BinaryIO, Any, Union +from typing import BinaryIO, Union import base64 import mimetypes from .._stream_info import StreamInfo diff --git a/packages/markitdown/src/markitdown/converters/_pdf_converter.py b/packages/markitdown/src/markitdown/converters/_pdf_converter.py index 4586ef1..63162d5 100644 --- a/packages/markitdown/src/markitdown/converters/_pdf_converter.py +++ b/packages/markitdown/src/markitdown/converters/_pdf_converter.py @@ -4,7 +4,6 @@ import io from typing import BinaryIO, Any -from ._html_converter import HtmlConverter from .._base_converter import DocumentConverter, DocumentConverterResult from .._stream_info import StreamInfo from .._exceptions import MissingDependencyException, MISSING_DEPENDENCY_MESSAGE diff --git a/packages/markitdown/src/markitdown/converters/_plain_text_converter.py b/packages/markitdown/src/markitdown/converters/_plain_text_converter.py index 2e10405..6f1306f 100644 --- a/packages/markitdown/src/markitdown/converters/_plain_text_converter.py +++ b/packages/markitdown/src/markitdown/converters/_plain_text_converter.py @@ -9,7 +9,7 @@ from .._stream_info import StreamInfo # Save reporting of any exceptions for later _dependency_exc_info = None try: - import mammoth + import mammoth # noqa: F401 except ImportError: # Preserve the error and stack trace for later _dependency_exc_info = sys.exc_info() diff --git a/packages/markitdown/src/markitdown/converters/_wikipedia_converter.py b/packages/markitdown/src/markitdown/converters/_wikipedia_converter.py index c0f7e0e..c200186 100644 --- a/packages/markitdown/src/markitdown/converters/_wikipedia_converter.py +++ b/packages/markitdown/src/markitdown/converters/_wikipedia_converter.py @@ -1,7 +1,6 @@ -import io import re import bs4 -from typing import Any, BinaryIO, Optional +from typing import Any, BinaryIO from .._base_converter import DocumentConverter, DocumentConverterResult from .._stream_info import StreamInfo diff --git a/packages/markitdown/src/markitdown/converters/_xlsx_converter.py b/packages/markitdown/src/markitdown/converters/_xlsx_converter.py index 28f73a0..4186ec7 100644 --- a/packages/markitdown/src/markitdown/converters/_xlsx_converter.py +++ b/packages/markitdown/src/markitdown/converters/_xlsx_converter.py @@ -10,14 +10,14 @@ from .._stream_info import StreamInfo _xlsx_dependency_exc_info = None try: import pandas as pd - import openpyxl + import openpyxl # noqa: F401 except ImportError: _xlsx_dependency_exc_info = sys.exc_info() _xls_dependency_exc_info = None try: - import pandas as pd - import xlrd + import pandas as pd # noqa: F811 + import xlrd # noqa: F401 except ImportError: _xls_dependency_exc_info = sys.exc_info() diff --git a/packages/markitdown/src/markitdown/converters/_youtube_converter.py b/packages/markitdown/src/markitdown/converters/_youtube_converter.py index b5a014c..e08c4cb 100644 --- a/packages/markitdown/src/markitdown/converters/_youtube_converter.py +++ b/packages/markitdown/src/markitdown/converters/_youtube_converter.py @@ -1,10 +1,8 @@ -import sys import json import time -import io import re import bs4 -from typing import Any, BinaryIO, Optional, Dict, List, Union +from typing import Any, BinaryIO, Dict, List, Union from urllib.parse import parse_qs, urlparse, unquote from .._base_converter import DocumentConverter, DocumentConverterResult diff --git a/packages/markitdown/src/markitdown/converters/_zip_converter.py b/packages/markitdown/src/markitdown/converters/_zip_converter.py index cb1a7e6..f87e6c8 100644 --- a/packages/markitdown/src/markitdown/converters/_zip_converter.py +++ b/packages/markitdown/src/markitdown/converters/_zip_converter.py @@ -1,4 +1,3 @@ -import sys import zipfile import io import os diff --git a/packages/markitdown/tests/test_cli_misc.py b/packages/markitdown/tests/test_cli_misc.py index 345d5cc..cf6c9cc 100644 --- a/packages/markitdown/tests/test_cli_misc.py +++ b/packages/markitdown/tests/test_cli_misc.py @@ -1,6 +1,5 @@ #!/usr/bin/env python3 -m pytest import subprocess -import pytest from markitdown import __version__ # This file contains CLI tests that are not directly tested by the FileTestVectors. @@ -24,8 +23,8 @@ def test_invalid_flag() -> None: assert result.returncode != 0, f"CLI exited with error: {result.stderr}" assert ( "unrecognized arguments" in result.stderr - ), f"Expected 'unrecognized arguments' to appear in STDERR" - assert "SYNTAX" in result.stderr, f"Expected 'SYNTAX' to appear in STDERR" + ), "Expected 'unrecognized arguments' to appear in STDERR" + assert "SYNTAX" in result.stderr, "Expected 'SYNTAX' to appear in STDERR" if __name__ == "__main__": diff --git a/packages/markitdown/tests/test_cli_vectors.py b/packages/markitdown/tests/test_cli_vectors.py index 6030482..5add530 100644 --- a/packages/markitdown/tests/test_cli_vectors.py +++ b/packages/markitdown/tests/test_cli_vectors.py @@ -19,13 +19,6 @@ else: FileTestVector, ) -from markitdown import ( - MarkItDown, - UnsupportedFormatException, - FileConversionException, - StreamInfo, -) - skip_remote = ( True if os.environ.get("GITHUB_ACTIONS") else False ) # Don't run these tests in CI @@ -140,8 +133,6 @@ def test_convert_url(shared_tmp_dir, test_vector): """Test the conversion of a stream with no stream info.""" # Note: tmp_dir is not used here, but is needed to match the signature - markitdown = MarkItDown() - time.sleep(1) # Ensure we don't hit rate limits result = subprocess.run( ["python", "-m", "markitdown", TEST_FILES_URL + "/" + test_vector.filename], @@ -191,7 +182,6 @@ def test_output_to_file_with_data_uris(shared_tmp_dir, test_vector) -> None: if __name__ == "__main__": - import sys import tempfile """Runs this file's tests from the command line.""" diff --git a/packages/markitdown/tests/test_module_misc.py b/packages/markitdown/tests/test_module_misc.py index 1819183..447e28a 100644 --- a/packages/markitdown/tests/test_module_misc.py +++ b/packages/markitdown/tests/test_module_misc.py @@ -3,7 +3,6 @@ import io import os import re import shutil -import openai import pytest from markitdown._uri_utils import parse_data_uri, file_uri_to_path @@ -253,8 +252,6 @@ def test_file_uris() -> None: def test_docx_comments() -> None: - markitdown = MarkItDown() - # Test DOCX processing, with comments and setting style_map on init markitdown_with_style_map = MarkItDown(style_map="comment-reference => ") result = markitdown_with_style_map.convert( diff --git a/packages/markitdown/tests/test_module_vectors.py b/packages/markitdown/tests/test_module_vectors.py index 98fd0c7..d6d9d68 100644 --- a/packages/markitdown/tests/test_module_vectors.py +++ b/packages/markitdown/tests/test_module_vectors.py @@ -2,7 +2,6 @@ import os import time import pytest -import codecs import base64 from pathlib import Path @@ -14,8 +13,6 @@ else: from markitdown import ( MarkItDown, - UnsupportedFormatException, - FileConversionException, StreamInfo, ) @@ -203,8 +200,6 @@ def test_convert_stream_keep_data_uris(test_vector): if __name__ == "__main__": - import sys - """Runs this file's tests from the command line.""" # General tests