From 6ebef5af0cc672619c4127a7d1019dbce174c603 Mon Sep 17 00:00:00 2001
From: Simon Willison <swillison@gmail.com>
Date: Fri, 13 Dec 2024 11:06:11 -0800
Subject: [PATCH 1/3] CLI usage instructions

Plus added  a PyPI badge
---
 README.md | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/README.md b/README.md
index 5034d03..6b62356 100644
--- a/README.md
+++ b/README.md
@@ -1,5 +1,7 @@
 # MarkItDown
 
+[![PyPI](https://img.shields.io/pypi/v/markitdown.svg)](https://pypi.org/project/markitdown/)
+
 The MarkItDown library is a utility tool for converting various files to Markdown (e.g., for indexing, text analysis, etc.)
 
 It presently supports:
@@ -23,6 +25,16 @@ result = markitdown.convert("test.xlsx")
 print(result.text_content)
 ```
 
+To use this as a command-line utility, install it and then run it like this:
+
+```bash
+markitdown path-to-file.pdf
+```
+This will output Markdown to standard output. You can save it like this:
+```bash
+markitdown path-to-file.pdf > document.md
+```
+
 ## Contributing
 
 This project welcomes contributions and suggestions.  Most contributions require you to agree to a

From 33ce17954dea8a0a127d96817b6d1dac8e50fb9b Mon Sep 17 00:00:00 2001
From: Simon Willison <swillison@gmail.com>
Date: Fri, 13 Dec 2024 11:09:03 -0800
Subject: [PATCH 2/3] Note about piping

---
 README.md | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/README.md b/README.md
index 6b62356..851611e 100644
--- a/README.md
+++ b/README.md
@@ -34,6 +34,10 @@ This will output Markdown to standard output. You can save it like this:
 ```bash
 markitdown path-to-file.pdf > document.md
 ```
+You can pipe content to standard input by omitting the argument:
+```bash
+cat path-to-file.pdf | markitdown
+```
 
 ## Contributing
 

From 52b723724c33b76cf3a2ee1e4d636ee81312e388 Mon Sep 17 00:00:00 2001
From: Divyansh Singh <40380293+brc-dd@users.noreply.github.com>
Date: Sun, 15 Dec 2024 10:37:15 +0530
Subject: [PATCH 3/3] Fix character decoding issues with text-like files

---
 pyproject.toml                    |  1 +
 src/markitdown/_markitdown.py     |  5 ++---
 tests/test_files/test_mskanji.csv |  4 ++++
 tests/test_markitdown.py          | 13 +++++++++++++
 4 files changed, 20 insertions(+), 3 deletions(-)
 create mode 100644 tests/test_files/test_mskanji.csv

diff --git a/pyproject.toml b/pyproject.toml
index 74df032..756380a 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -38,6 +38,7 @@ dependencies = [
   "youtube-transcript-api",
   "SpeechRecognition",
   "pathvalidate",
+  "charset-normalizer",
 ]
 
 [project.urls]
diff --git a/src/markitdown/_markitdown.py b/src/markitdown/_markitdown.py
index 96997cf..25786f6 100644
--- a/src/markitdown/_markitdown.py
+++ b/src/markitdown/_markitdown.py
@@ -26,6 +26,7 @@ import pptx
 import puremagic
 import requests
 from bs4 import BeautifulSoup
+from charset_normalizer import from_path
 
 # Optional Transcription support
 try:
@@ -161,9 +162,7 @@ class PlainTextConverter(DocumentConverter):
         elif "text/" not in content_type.lower():
             return None
 
-        text_content = ""
-        with open(local_path, "rt", encoding="utf-8") as fh:
-            text_content = fh.read()
+        text_content = str(from_path(local_path).best())
         return DocumentConverterResult(
             title=None,
             text_content=text_content,
diff --git a/tests/test_files/test_mskanji.csv b/tests/test_files/test_mskanji.csv
new file mode 100644
index 0000000..d67f5a3
--- /dev/null
+++ b/tests/test_files/test_mskanji.csv
@@ -0,0 +1,4 @@
+–¼‘O,”N—î,ZŠ
+²“¡‘¾˜Y,30,“Œ‹ž
+ŽO–Ø‰pŽq,25,‘åã
+îà‹´~,35,–¼ŒÃ‰®
diff --git a/tests/test_markitdown.py b/tests/test_markitdown.py
index 94fd886..ac08820 100644
--- a/tests/test_markitdown.py
+++ b/tests/test_markitdown.py
@@ -87,6 +87,13 @@ SERP_TEST_EXCLUDES = [
     "data:image/svg+xml,%3Csvg%20width%3D",
 ]
 
+CSV_CP932_TEST_STRINGS = [
+    "åå‰,å¹´é½¢,ä½æ‰€",
+    "ä½è—¤å¤ªéƒŽ,30,æ±äº¬",
+    "ä¸‰æœ¨è‹±å­,25,å¤§é˜ª",
+    "é«™æ©‹æ·³,35,åå¤å±‹",
+]
+
 
 @pytest.mark.skipif(
     skip_remote,
@@ -164,6 +171,12 @@ def test_markitdown_local() -> None:
     for test_string in SERP_TEST_STRINGS:
         assert test_string in text_content
 
+    ## Test non-UTF-8 encoding
+    result = markitdown.convert(os.path.join(TEST_FILES_DIR, "test_mskanji.csv"))
+    text_content = result.text_content.replace("\\", "")
+    for test_string in CSV_CP932_TEST_STRINGS:
+        assert test_string in text_content
+
 
 @pytest.mark.skipif(
     skip_exiftool,