Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
 help / color / mirror / Atom feed
From: Nil Admirari <nil-admirari@mailo.com>
To: ffmpeg-devel@ffmpeg.org
Subject: [FFmpeg-devel] [PATCH v16 1/5] libavutil: Add wchartoutf8(), wchartoansi(), utf8toansi() and getenv_utf8()
Date: Wed, 15 Jun 2022 23:03:40 +0300
Message-ID: <20220615200344.17211-1-nil-admirari@mailo.com> (raw)

wchartoutf8() converts strings returned by WinAPI into UTF-8,
which is FFmpeg's preffered encoding.

Some external dependencies, such as AviSynth, are still
not Unicode-enabled. utf8toansi() converts UTF-8 strings
into ANSI in two steps: UTF-8 -> wchar_t -> ANSI.
wchartoansi() is responsible for the second step of the conversion.
Conversion in just one step is not supported by WinAPI.

Since these character converting functions allocate the buffer
of necessary size, they also facilitate the removal of MAX_PATH limit
in places where fixed-size ANSI/WCHAR strings were used
as filename buffers.

getenv_utf8() wraps _wgetenv() converting its input from
and its output to UTF-8. Compared to plain getenv(),
getenv_utf8() requires a cleanup.

Because of that, in places that only test the existence of
an environment variable or compare its value with a string
consisting entirely of ASCII characters, the use of plain getenv()
is still preferred. (libavutil/log.c check_color_terminal()
is an example of such a place.)

Plain getenv() is also preffered in UNIX-only code,
such as bktr.c, fbdev_common.c, oss.c in libavdevice
or af_ladspa.c in libavfilter.
---
 configure                  |  1 +
 libavutil/getenv_utf8.h    | 71 ++++++++++++++++++++++++++++++++++++++
 libavutil/wchar_filename.h | 51 +++++++++++++++++++++++++++
 3 files changed, 123 insertions(+)
 create mode 100644 libavutil/getenv_utf8.h

diff --git a/configure b/configure
index 3dca1c4bd3..fa37a74531 100755
--- a/configure
+++ b/configure
@@ -2272,6 +2272,7 @@ SYSTEM_FUNCS="
     fcntl
     getaddrinfo
     getauxval
+    getenv
     gethrtime
     getopt
     GetModuleHandle
diff --git a/libavutil/getenv_utf8.h b/libavutil/getenv_utf8.h
new file mode 100644
index 0000000000..161e3e6202
--- /dev/null
+++ b/libavutil/getenv_utf8.h
@@ -0,0 +1,71 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVUTIL_GETENV_UTF8_H
+#define AVUTIL_GETENV_UTF8_H
+
+#include <stdlib.h>
+
+#include "mem.h"
+
+#ifdef HAVE_GETENV
+
+#ifdef _WIN32
+
+#include "libavutil/wchar_filename.h"
+
+static inline char *getenv_utf8(const char *varname)
+{
+    wchar_t *varname_w, *var_w;
+    char *var;
+
+    if (utf8towchar(varname, &varname_w))
+        return NULL;
+    if (!varname_w)
+        return NULL;
+
+    var_w = _wgetenv(varname_w);
+    av_free(varname_w);
+
+    if (!var_w)
+        return NULL;
+    if (wchartoutf8(var_w, &var))
+        return NULL;
+
+    return var;
+
+    // No CP_ACP fallback compared to other *_utf8() functions:
+    // non UTF-8 strings must not be returned.
+}
+
+#else
+
+static inline char *getenv_utf8(const char *varname)
+{
+    return av_strdup(getenv(varname));
+}
+
+#endif // _WIN32
+
+#else
+
+#define getenv_utf8(x) NULL
+
+#endif // HAVE_GETENV
+
+#endif // AVUTIL_GETENV_UTF8_H
diff --git a/libavutil/wchar_filename.h b/libavutil/wchar_filename.h
index f36d9dfea3..a6d71e52e5 100644
--- a/libavutil/wchar_filename.h
+++ b/libavutil/wchar_filename.h
@@ -41,6 +41,57 @@ static inline int utf8towchar(const char *filename_utf8, wchar_t **filename_w)
     return 0;
 }
 
+av_warn_unused_result
+static inline int wchartocp(unsigned int code_page, const wchar_t *filename_w,
+                            char **filename)
+{
+    DWORD flags = code_page == CP_UTF8 ? WC_ERR_INVALID_CHARS : 0;
+    int num_chars = WideCharToMultiByte(code_page, flags, filename_w, -1,
+                                        NULL, 0, NULL, NULL);
+    if (num_chars <= 0) {
+        *filename = NULL;
+        return 0;
+    }
+    *filename = av_malloc_array(num_chars, sizeof *filename);
+    if (!*filename) {
+        errno = ENOMEM;
+        return -1;
+    }
+    WideCharToMultiByte(code_page, flags, filename_w, -1,
+                        *filename, num_chars, NULL, NULL);
+    return 0;
+}
+
+av_warn_unused_result
+static inline int wchartoutf8(const wchar_t *filename_w, char **filename)
+{
+    return wchartocp(CP_UTF8, filename_w, filename);
+}
+
+av_warn_unused_result
+static inline int wchartoansi(const wchar_t *filename_w, char **filename)
+{
+    return wchartocp(CP_ACP, filename_w, filename);
+}
+
+av_warn_unused_result
+static inline int utf8toansi(const char *filename_utf8, char **filename)
+{
+    wchar_t *filename_w = NULL;
+    int ret = -1;
+    if (utf8towchar(filename_utf8, &filename_w))
+        return -1;
+
+    if (!filename_w) {
+        *filename = NULL;
+        return 0;
+    }
+
+    ret = wchartoansi(filename_w, filename);
+    av_free(filename_w);
+    return ret;
+}
+
 /**
  * Checks for extended path prefixes for which normalization needs to be skipped.
  * see .NET6: PathInternal.IsExtended()
-- 
2.34.1



_______________________________________________
ffmpeg-devel mailing list
ffmpeg-devel@ffmpeg.org
https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

To unsubscribe, visit link above, or email
ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".

             reply	other threads:[~2022-06-15 20:04 UTC|newest]

Thread overview: 5+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-06-15 20:03 Nil Admirari [this message]
2022-06-15 20:03 ` [FFmpeg-devel] [PATCH v16 2/5] compat/w32dlfcn.h: Remove MAX_PATH limit and replace LoadLibraryExA with LoadLibraryExW Nil Admirari
2022-06-15 20:03 ` [FFmpeg-devel] [PATCH v16 3/5] fftools: Remove MAX_PATH limit and switch to UTF-8 versions of fopen() and getenv() Nil Admirari
2022-06-15 20:03 ` [FFmpeg-devel] [PATCH v16 4/5] libavformat: Remove MAX_PATH limit and use UTF-8 version of getenv() Nil Admirari
2022-06-15 20:03 ` [FFmpeg-devel] [PATCH v16 5/5] libavfilter/vf_frei0r.c: Use " Nil Admirari

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20220615200344.17211-1-nil-admirari@mailo.com \
    --to=nil-admirari@mailo.com \
    --cc=ffmpeg-devel@ffmpeg.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Git Inbox Mirror of the ffmpeg-devel mailing list - see https://ffmpeg.org/mailman/listinfo/ffmpeg-devel

This inbox may be cloned and mirrored by anyone:

	git clone --mirror https://master.gitmailbox.com/ffmpegdev/0 ffmpegdev/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 ffmpegdev ffmpegdev/ https://master.gitmailbox.com/ffmpegdev \
		ffmpegdev@gitmailbox.com
	public-inbox-index ffmpegdev

Example config snippet for mirrors.


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git