mirror of
https://github.com/kovidgoyal/kitty.git
synced 2026-05-13 16:37:27 +00:00
Build only the SIMD code with SIMD compiler flags
This commit is contained in:
parent
465616223c
commit
7e77a196e6
8 changed files with 172 additions and 55 deletions
30
glfw/glfw.py
30
glfw/glfw.py
|
|
@ -6,7 +6,8 @@ import json
|
|||
import os
|
||||
import re
|
||||
import sys
|
||||
from typing import Callable, Dict, List, NamedTuple, Optional, Sequence, Tuple
|
||||
from enum import Enum
|
||||
from typing import Any, Callable, Dict, List, NamedTuple, Optional, Sequence, Tuple
|
||||
|
||||
_plat = sys.platform.lower()
|
||||
is_linux = 'linux' in _plat
|
||||
|
|
@ -32,6 +33,19 @@ class Command(NamedTuple):
|
|||
keyfile: Optional[str] = None
|
||||
|
||||
|
||||
class ISA(Enum):
|
||||
X86 = 0x03
|
||||
AMD64 = 0x3e
|
||||
ARM64 = 0xb7
|
||||
Other = 0x0
|
||||
|
||||
|
||||
class BinaryArch(NamedTuple):
|
||||
bits: int = 64
|
||||
isa: ISA = ISA.AMD64
|
||||
|
||||
|
||||
|
||||
class Env:
|
||||
|
||||
cc: List[str] = []
|
||||
|
|
@ -42,6 +56,9 @@ class Env:
|
|||
ldpaths: List[str] = []
|
||||
ccver: Tuple[int, int]
|
||||
vcs_rev: str = ''
|
||||
build_universal_binary: bool = False
|
||||
binary_arch: BinaryArch = BinaryArch()
|
||||
native_optimizations: bool = False
|
||||
|
||||
# glfw stuff
|
||||
all_headers: List[str] = []
|
||||
|
|
@ -54,12 +71,16 @@ class Env:
|
|||
def __init__(
|
||||
self, cc: List[str] = [], cppflags: List[str] = [], cflags: List[str] = [], ldflags: List[str] = [],
|
||||
library_paths: Dict[str, List[str]] = {}, ldpaths: Optional[List[str]] = None, ccver: Tuple[int, int] = (0, 0),
|
||||
vcs_rev: str = ''
|
||||
vcs_rev: str = '', build_universal_binary: bool = False, binary_arch: BinaryArch = BinaryArch(),
|
||||
native_optimizations: bool = False,
|
||||
):
|
||||
self.cc, self.cppflags, self.cflags, self.ldflags, self.library_paths = cc, cppflags, cflags, ldflags, library_paths
|
||||
self.ldpaths = ldpaths or []
|
||||
self.ccver = ccver
|
||||
self.vcs_rev = vcs_rev
|
||||
self.build_universal_binary = build_universal_binary
|
||||
self.binary_arch = binary_arch
|
||||
self.native_optimizations = native_optimizations
|
||||
|
||||
def copy(self) -> 'Env':
|
||||
ans = Env(self.cc, list(self.cppflags), list(self.cflags), list(self.ldflags), dict(self.library_paths), list(self.ldpaths), self.ccver)
|
||||
|
|
@ -70,6 +91,9 @@ class Env:
|
|||
ans.wayland_scanner_code = self.wayland_scanner_code
|
||||
ans.wayland_protocols = self.wayland_protocols
|
||||
ans.vcs_rev = self.vcs_rev
|
||||
ans.build_universal_binary = self.build_universal_binary
|
||||
ans.binary_arch = self.binary_arch
|
||||
ans.native_optimizations = self.native_optimizations
|
||||
return ans
|
||||
|
||||
|
||||
|
|
@ -83,7 +107,7 @@ def init_env(
|
|||
pkg_config: Callable[..., List[str]],
|
||||
pkg_version: Callable[[str], Tuple[int, int]],
|
||||
at_least_version: Callable[..., None],
|
||||
test_compile: Callable[..., bool],
|
||||
test_compile: Callable[..., Any],
|
||||
module: str = 'x11'
|
||||
) -> Env:
|
||||
ans = env.copy()
|
||||
|
|
|
|||
29
kitty/arches.h
Normal file
29
kitty/arches.h
Normal file
|
|
@ -0,0 +1,29 @@
|
|||
/*
|
||||
* Copyright (C) 2024 Kovid Goyal <kovid at kovidgoyal.net>
|
||||
*
|
||||
* Distributed under terms of the GPL3 license.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
|
||||
#ifdef __aarch64__
|
||||
#define KITTY_TARGET_CPU_IS_ARM64
|
||||
#define KITTY_128BIT_ALLOWED
|
||||
#define KITTY_256BIT_ALLOWED
|
||||
#elif defined(__i386__) || defined(__i486__) || defined(__i586__) || defined(__i686__)
|
||||
#define KITTY_TARGET_CPU_IS_X86
|
||||
#define KITTY_128BIT_ALLOWED
|
||||
#elif defined(__amd64__)
|
||||
#define KITTY_TARGET_CPU_IS_AMD64
|
||||
#define KITTY_128BIT_ALLOWED
|
||||
#define KITTY_256BIT_ALLOWED
|
||||
#endif
|
||||
|
||||
#if defined(__clang__) && defined(KITTY_128BIT_ALLOWED)
|
||||
#define KITTY_START_128BIT_CODE
|
||||
#elif defined(KITTY_128BIT_ALLOWED)
|
||||
#define KITTY_START_128BIT_CODE
|
||||
#else
|
||||
#define KITTY_START_128BIT_CODE
|
||||
#endif
|
||||
9
kitty/simd-string-128.c
Normal file
9
kitty/simd-string-128.c
Normal file
|
|
@ -0,0 +1,9 @@
|
|||
/*
|
||||
* simd-string-128.c
|
||||
* Copyright (C) 2024 Kovid Goyal <kovid at kovidgoyal.net>
|
||||
*
|
||||
* Distributed under terms of the GPL3 license.
|
||||
*/
|
||||
|
||||
#define KITTY_SIMD_LEVEL 128
|
||||
#include "simd-string-impl.h"
|
||||
9
kitty/simd-string-256.c
Normal file
9
kitty/simd-string-256.c
Normal file
|
|
@ -0,0 +1,9 @@
|
|||
/*
|
||||
* simd-string-128.c
|
||||
* Copyright (C) 2024 Kovid Goyal <kovid at kovidgoyal.net>
|
||||
*
|
||||
* Distributed under terms of the GPL3 license.
|
||||
*/
|
||||
|
||||
#define KITTY_SIMD_LEVEL 256
|
||||
#include "simd-string-impl.h"
|
||||
|
|
@ -4,8 +4,8 @@
|
|||
* Distributed under terms of the GPL3 license.
|
||||
*/
|
||||
|
||||
#ifndef BITS
|
||||
#define BITS 128
|
||||
#ifndef KITTY_SIMD_LEVEL
|
||||
#define KITTY_SIMD_LEVEL 128
|
||||
#endif
|
||||
|
||||
#include "simd-string.h"
|
||||
|
|
@ -26,13 +26,13 @@ _Pragma("clang diagnostic pop")
|
|||
#endif
|
||||
#define CONCAT(A, B) A##B
|
||||
#define CONCAT_EXPAND(A, B) CONCAT(A,B)
|
||||
#define FUNC(name) CONCAT_EXPAND(name##_, BITS)
|
||||
#define integer_t CONCAT_EXPAND(CONCAT_EXPAND(simde__m, BITS), i)
|
||||
#define FUNC(name) CONCAT_EXPAND(name##_, KITTY_SIMD_LEVEL)
|
||||
#define integer_t CONCAT_EXPAND(CONCAT_EXPAND(simde__m, KITTY_SIMD_LEVEL), i)
|
||||
#define shift_right_by_bytes128 simde_mm_srli_si128
|
||||
#define zero_last_n_bytes FUNC(zero_last_n_bytes)
|
||||
#define is_zero FUNC(is_zero)
|
||||
|
||||
#if BITS == 128
|
||||
#if KITTY_SIMD_LEVEL == 128
|
||||
#define set1_epi8(x) simde_mm_set1_epi8((char)(x))
|
||||
#define set_epi8 simde_mm_set_epi8
|
||||
#define add_epi8 simde_mm_add_epi8
|
||||
|
|
@ -199,7 +199,7 @@ FUNC(zero_last_n_bytes)(integer_t vec, char n) {
|
|||
return andnot_si(mask, vec);
|
||||
}
|
||||
|
||||
static inline const uint8_t*
|
||||
const uint8_t*
|
||||
FUNC(find_either_of_two_bytes)(const uint8_t *haystack, const size_t sz, const uint8_t a, const uint8_t b) {
|
||||
const integer_t a_vec = set1_epi8(a), b_vec = set1_epi8(b);
|
||||
for (const uint8_t* limit = haystack + sz; haystack < limit; haystack += sizeof(integer_t)) {
|
||||
|
|
@ -220,7 +220,7 @@ FUNC(find_either_of_two_bytes)(const uint8_t *haystack, const size_t sz, const u
|
|||
|
||||
static inline void
|
||||
FUNC(output_plain_ascii)(UTF8Decoder *d, integer_t vec, size_t src_sz) {
|
||||
#if BITS == 128
|
||||
#if KITTY_SIMD_LEVEL == 128
|
||||
for (const uint32_t *limit = d->output + src_sz, *p = d->output; p < limit; p += output_increment) {
|
||||
const integer_t unpacked = extract_lower_quarter_as_chars(vec);
|
||||
store_aligned((integer_t*)p, unpacked);
|
||||
|
|
@ -252,7 +252,7 @@ FUNC(output_plain_ascii)(UTF8Decoder *d, integer_t vec, size_t src_sz) {
|
|||
|
||||
static inline void
|
||||
FUNC(output_unicode)(UTF8Decoder *d, integer_t output1, integer_t output2, integer_t output3, const size_t num_codepoints) {
|
||||
#if BITS == 128
|
||||
#if KITTY_SIMD_LEVEL == 128
|
||||
for (const uint32_t *limit = d->output + num_codepoints, *p = d->output; p < limit; p += output_increment) {
|
||||
const integer_t unpacked1 = extract_lower_quarter_as_chars(output1);
|
||||
const integer_t unpacked2 = shift_right_by_one_byte(extract_lower_quarter_as_chars(output2));
|
||||
|
|
@ -294,7 +294,6 @@ FUNC(output_unicode)(UTF8Decoder *d, integer_t output1, integer_t output2, integ
|
|||
}
|
||||
#undef output_increment
|
||||
|
||||
#ifndef SIMD_STRING_IMPL_INCLUDED_ONCE
|
||||
static inline unsigned
|
||||
sum_bytes_128(simde__m128i v) {
|
||||
// Use _mm_sad_epu8 to perform a sum of absolute differences against zero
|
||||
|
|
@ -345,9 +344,8 @@ scalar_decode_all(UTF8Decoder *d, const uint8_t *src, size_t src_sz) {
|
|||
return pos;
|
||||
}
|
||||
#undef do_one_byte
|
||||
#endif
|
||||
|
||||
static inline bool
|
||||
bool
|
||||
FUNC(utf8_decode_to_esc)(UTF8Decoder *d, const uint8_t *src, size_t src_sz) {
|
||||
// Based on the algorithm described in: https://woboq.com/blog/utf-8-processing-using-simd.html
|
||||
|
||||
|
|
@ -489,7 +487,7 @@ start_classification:
|
|||
shifts = add_epi8(shifts, shift_right_by_two_bytes(shifts));
|
||||
shifts = add_epi8(shifts, shift_right_by_four_bytes(shifts));
|
||||
shifts = add_epi8(shifts, shift_right_by_eight_bytes(shifts));
|
||||
#if BITS == 256
|
||||
#if KITTY_SIMD_LEVEL == 256
|
||||
shifts = add_epi8(shifts, shift_right_by_sixteen_bytes(shifts));
|
||||
#endif
|
||||
// zero the shifts for discarded continuation bytes
|
||||
|
|
@ -505,7 +503,7 @@ start_classification:
|
|||
shifts = move(shifts, two_bytes, 2);
|
||||
shifts = move(shifts, four_bytes, 3);
|
||||
shifts = move(shifts, eight_bytes, 4);
|
||||
#if BITS == 256
|
||||
#if KITTY_SIMD_LEVEL == 256
|
||||
shifts = move(shifts, sixteen_bytes, 5);
|
||||
#endif
|
||||
#undef move
|
||||
|
|
@ -547,7 +545,7 @@ invalid_utf8:
|
|||
#undef movemask_epi8
|
||||
#undef CONCAT
|
||||
#undef CONCAT_EXPAND
|
||||
#undef BITS
|
||||
#undef KITTY_SIMD_LEVEL
|
||||
#undef shift_right_by_one_byte
|
||||
#undef shift_right_by_two_bytes
|
||||
#undef shift_right_by_four_bytes
|
||||
|
|
@ -575,6 +573,3 @@ invalid_utf8:
|
|||
#undef sum_bytes
|
||||
#undef is_zero
|
||||
#undef print_register_as_bytes
|
||||
#ifndef SIMD_STRING_IMPL_INCLUDED_ONCE
|
||||
#define SIMD_STRING_IMPL_INCLUDED_ONCE
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -8,12 +8,6 @@
|
|||
#include "data-types.h"
|
||||
#include "charsets.h"
|
||||
#include "simd-string.h"
|
||||
#undef BITS
|
||||
#define BITS 128
|
||||
#include "simd-string-impl.h"
|
||||
#define BITS 256
|
||||
#include "simd-string-impl.h"
|
||||
#undef BITS
|
||||
static bool has_sse4_2 = false, has_avx2 = false;
|
||||
|
||||
// find_either_of_two_bytes {{{
|
||||
|
|
|
|||
|
|
@ -6,11 +6,9 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stddef.h>
|
||||
#include <stdbool.h>
|
||||
#include <stdalign.h>
|
||||
#include "data-types.h"
|
||||
#include <stddef.h>
|
||||
#include <stdalign.h>
|
||||
|
||||
typedef void (*control_byte_callback)(void *data, uint8_t ch);
|
||||
typedef void (*output_chars_callback)(void *data, const uint32_t *chars, unsigned count);
|
||||
|
|
@ -36,3 +34,9 @@ const uint8_t* find_either_of_two_bytes(const uint8_t *haystack, const size_t sz
|
|||
// first position in haystack that contains a char that is not in [a, b].
|
||||
// a must be <= b
|
||||
const uint8_t* find_byte_not_in_range(const uint8_t *haystack, const size_t sz, const uint8_t a1, const uint8_t b);
|
||||
|
||||
// SIMD implementations, internal use
|
||||
bool utf8_decode_to_esc_128(UTF8Decoder *d, const uint8_t *src, size_t src_sz);
|
||||
bool utf8_decode_to_esc_256(UTF8Decoder *d, const uint8_t *src, size_t src_sz);
|
||||
const uint8_t* find_either_of_two_bytes_128(const uint8_t *haystack, const size_t sz, const uint8_t a, const uint8_t b);
|
||||
const uint8_t* find_either_of_two_bytes_256(const uint8_t *haystack, const size_t sz, const uint8_t a, const uint8_t b);
|
||||
|
|
|
|||
103
setup.py
103
setup.py
|
|
@ -10,6 +10,7 @@ import re
|
|||
import runpy
|
||||
import shlex
|
||||
import shutil
|
||||
import struct
|
||||
import subprocess
|
||||
import sys
|
||||
import sysconfig
|
||||
|
|
@ -22,7 +23,7 @@ from pathlib import Path
|
|||
from typing import Callable, Dict, FrozenSet, Iterable, Iterator, List, Optional, Sequence, Set, Tuple, Union, cast
|
||||
|
||||
from glfw import glfw
|
||||
from glfw.glfw import Command, CompileKey
|
||||
from glfw.glfw import ISA, BinaryArch, Command, CompileKey
|
||||
|
||||
if sys.version_info[:2] < (3, 8):
|
||||
raise SystemExit('kitty requires python >= 3.8')
|
||||
|
|
@ -362,6 +363,24 @@ def get_sanitize_args(cc: List[str], ccver: Tuple[int, int]) -> List[str]:
|
|||
return sanitize_args
|
||||
|
||||
|
||||
def get_binary_arch(path: str) -> BinaryArch:
|
||||
with open(path, 'rb') as f:
|
||||
sig = f.read(64)
|
||||
if sig.startswith(b'\x7fELF'): # ELF
|
||||
bits = {1: 32, 2: 64}[sig[4]]
|
||||
endian = {1: '<', 2: '>'}[sig[5]]
|
||||
machine, = struct.unpack_from(endian + 'H', sig, 0x12)
|
||||
isa = {i.value:i for i in ISA}.get(machine, ISA.Other)
|
||||
elif sig[:4] in (b'\xcf\xfa\xed\xfe', b'\xce\xfa\xed\xfe'): # Mach-O
|
||||
s, cpu_type, = struct.unpack_from('<II', sig, 0)
|
||||
bits = {0xfeedface: 32, 0xfeedfacf: 64}[s]
|
||||
cpu_type &= 0xff
|
||||
isa = {0x7: ISA.AMD64, 0xc: ISA.ARM64}[cpu_type]
|
||||
else:
|
||||
raise SystemExit(f'Unknown binary format with signature: {sig[:4]!r}')
|
||||
return BinaryArch(bits=bits, isa=isa)
|
||||
|
||||
|
||||
def test_compile(
|
||||
cc: List[str], *cflags: str,
|
||||
src: str = '',
|
||||
|
|
@ -370,18 +389,25 @@ def test_compile(
|
|||
show_stderr: bool = False,
|
||||
libraries: Iterable[str] = (),
|
||||
ldflags: Iterable[str] = (),
|
||||
) -> bool:
|
||||
get_output_arch: bool = False,
|
||||
) -> Union[bool, BinaryArch]:
|
||||
src = src or 'int main(void) { return 0; }'
|
||||
with tempfile.TemporaryDirectory(prefix='kitty-test-compile-') as tdir:
|
||||
with open(os.path.join(tdir, f'source.{source_ext}'), 'w', encoding='utf-8') as srcf:
|
||||
print(src, file=srcf)
|
||||
return subprocess.Popen(
|
||||
output = os.path.join(tdir, 'source.output')
|
||||
ret = subprocess.Popen(
|
||||
cc + ['-Werror=implicit-function-declaration'] + list(cflags) + ([] if link_also else ['-c']) +
|
||||
['-o', os.path.join(tdir, 'source.output'), srcf.name] +
|
||||
['-o', output, srcf.name] +
|
||||
[f'-l{x}' for x in libraries] + list(ldflags),
|
||||
stdout=subprocess.DEVNULL, stdin=subprocess.DEVNULL,
|
||||
stderr=None if show_stderr else subprocess.DEVNULL
|
||||
).wait() == 0
|
||||
).wait()
|
||||
if get_output_arch:
|
||||
if ret != 0:
|
||||
raise SystemExit(f'Failed to determine target architecture compiling test program failed with exit code: {ret}')
|
||||
return get_binary_arch(output)
|
||||
return ret == 0
|
||||
|
||||
|
||||
def first_successful_compile(cc: List[str], *cflags: str, src: str = '', source_ext: str = 'c') -> str:
|
||||
|
|
@ -432,6 +458,7 @@ def init_env(
|
|||
vcs_rev: str = '',
|
||||
) -> Env:
|
||||
native_optimizations = native_optimizations and not sanitize
|
||||
build_universal_binary = build_universal_binary and is_macos
|
||||
cc, ccver = cc_version()
|
||||
if verbose:
|
||||
print('CC:', cc, ccver)
|
||||
|
|
@ -463,7 +490,7 @@ def init_env(
|
|||
# in https://github.com/kovidgoyal/kitty/issues/6845#issuecomment-1835886938
|
||||
arm_control_flow_protection = '-mbranch-protection=standard' if is_macos else ''
|
||||
# Universal build fails with -fcf-protection clang is not smart enough to filter it out for the ARM part
|
||||
intel_control_flow_protection = '-fcf-protection=full' if ccver >= (9, 0) and not build_universal_binary else ''
|
||||
intel_control_flow_protection = '-fcf-protection=full' if ccver >= (9, 0) else ''
|
||||
control_flow_protection = arm_control_flow_protection if is_arm else intel_control_flow_protection
|
||||
env_cflags = shlex.split(os.environ.get('CFLAGS', ''))
|
||||
env_cppflags = shlex.split(os.environ.get('CPPFLAGS', ''))
|
||||
|
|
@ -471,11 +498,10 @@ def init_env(
|
|||
if control_flow_protection and not test_compile(cc, control_flow_protection, *env_cppflags, *env_cflags, ldflags=env_ldflags):
|
||||
control_flow_protection = ''
|
||||
march = ''
|
||||
if not (is_macos and is_arm) and not build_universal_binary:
|
||||
if native_optimizations and not build_universal_binary and not (is_macos and is_arm):
|
||||
# see https://github.com/kovidgoyal/kitty/issues/3126
|
||||
# -march=native is not supported when targeting Apple Silicon
|
||||
if native_optimizations:
|
||||
march = '-march=native -mtune=native'
|
||||
march = '-march=native -mtune=native'
|
||||
cflags_ = os.environ.get(
|
||||
'OVERRIDE_CFLAGS', (
|
||||
f'-Wextra {float_conversion} -Wno-missing-field-initializers -Wall -Wstrict-prototypes {std}'
|
||||
|
|
@ -508,13 +534,6 @@ def init_env(
|
|||
cflags.append('-g3')
|
||||
ldflags.append('-lprofiler')
|
||||
|
||||
# SIMD instructions
|
||||
if is_arm:
|
||||
if not is_macos:
|
||||
cflags.append('-mfpu=neon')
|
||||
else:
|
||||
cflags.append('-msse4.2')
|
||||
cflags.append('-mavx2')
|
||||
library_paths: Dict[str, List[str]] = {}
|
||||
|
||||
def add_lpath(which: str, name: str, val: Optional[str]) -> None:
|
||||
|
|
@ -539,11 +558,13 @@ def init_env(
|
|||
cflags.insert(0, f'-I{os.environ["DEVELOP_ROOT"]}/include')
|
||||
ldpaths.insert(0, f'-L{os.environ["DEVELOP_ROOT"]}/lib')
|
||||
|
||||
if build_universal_binary:
|
||||
set_arches(cflags)
|
||||
set_arches(ldflags)
|
||||
ba = test_compile(cc, *(cppflags + cflags), ldflags=ldflags, get_output_arch=True)
|
||||
assert isinstance(ba, BinaryArch)
|
||||
|
||||
return Env(cc, cppflags, cflags, ldflags, library_paths, ccver=ccver, ldpaths=ldpaths, vcs_rev=vcs_rev)
|
||||
return Env(
|
||||
cc, cppflags, cflags, ldflags, library_paths, binary_arch=ba, native_optimizations=native_optimizations,
|
||||
ccver=ccver, ldpaths=ldpaths, vcs_rev=vcs_rev, build_universal_binary=build_universal_binary
|
||||
)
|
||||
|
||||
|
||||
def kitty_env(args: Options) -> Env:
|
||||
|
|
@ -638,7 +659,7 @@ def get_vcs_rev() -> str:
|
|||
|
||||
|
||||
@lru_cache
|
||||
def base64_defines() -> List[str]:
|
||||
def base64_defines(isa: ISA) -> List[str]:
|
||||
defs = {
|
||||
'HAVE_AVX512': 0,
|
||||
'HAVE_AVX2': 0,
|
||||
|
|
@ -649,14 +670,18 @@ def base64_defines() -> List[str]:
|
|||
'HAVE_SSE42': 0,
|
||||
'HAVE_AVX': 0,
|
||||
}
|
||||
if is_arm:
|
||||
if isa == ISA.ARM64:
|
||||
defs['HAVE_NEON64'] = 1
|
||||
else:
|
||||
elif isa == ISA.AMD64:
|
||||
defs['HAVE_AVX2'] = 1
|
||||
defs['HAVE_AVX'] = 1
|
||||
defs['HAVE_SSE42'] = 1
|
||||
defs['HAVE_SSE41'] = 1
|
||||
defs['HAVE_SSE3'] = 1
|
||||
elif isa == ISA.X86:
|
||||
defs['HAVE_SSE42'] = 1
|
||||
defs['HAVE_SSE41'] = 1
|
||||
defs['HAVE_SSE3'] = 1
|
||||
return [f'{k}={v}' for k, v in defs.items()]
|
||||
|
||||
|
||||
|
|
@ -668,13 +693,40 @@ def get_source_specific_defines(env: Env, src: str) -> Tuple[str, List[str], Opt
|
|||
env.vcs_rev = get_vcs_rev()
|
||||
return src, [], [f'KITTY_VCS_REV="{env.vcs_rev}"', f'WRAPPED_KITTENS="{wrapped_kittens()}"']
|
||||
if src.startswith('3rdparty/base64/'):
|
||||
return src, ['3rdparty/base64',], base64_defines()
|
||||
return src, ['3rdparty/base64',], base64_defines(env.binary_arch.isa)
|
||||
try:
|
||||
return src, [], env.library_paths[src]
|
||||
except KeyError:
|
||||
return src, [], None
|
||||
|
||||
|
||||
def get_source_specific_cflags(env: Env, src: str) -> List[str]:
|
||||
ans = list(env.cflags)
|
||||
# SIMD specific flags, ignored for native optimizations as they give slightly better performance
|
||||
if src == 'kitty/simd-string-128.c':
|
||||
if env.binary_arch.isa in (ISA.AMD64, ISA.X86):
|
||||
if not env.native_optimizations:
|
||||
ans.append('-msse4.2')
|
||||
elif src == 'kitty/simd-string-256.c':
|
||||
if env.binary_arch.isa in (ISA.AMD64, ISA.X86):
|
||||
if not env.native_optimizations:
|
||||
ans.append('-mavx2')
|
||||
elif src.startswith('3rdparty/base64/lib/arch/'):
|
||||
if not env.native_optimizations:
|
||||
q = src.split(os.path.sep)
|
||||
if 'sse3' in q:
|
||||
ans.append('-msse3')
|
||||
elif 'sse41' in q:
|
||||
ans.append('-msse4.1')
|
||||
elif 'sse42' in q:
|
||||
ans.append('-msse4.2')
|
||||
elif 'avx' in q:
|
||||
ans.append('-mavx')
|
||||
elif 'avx2' in q:
|
||||
ans.append('-mavx2')
|
||||
return ans
|
||||
|
||||
|
||||
def newer(dest: str, *sources: str) -> bool:
|
||||
try:
|
||||
dtime = os.path.getmtime(dest)
|
||||
|
|
@ -782,7 +834,8 @@ def compile_c_extension(
|
|||
src, include_paths, defines = get_source_specific_defines(kenv, src)
|
||||
if defines is not None:
|
||||
cppflags.extend(map(define, defines))
|
||||
cmd = kenv.cc + ['-MMD'] + cppflags + [f'-I{x}' for x in include_paths] + kenv.cflags
|
||||
cflags = get_source_specific_cflags(kenv, src)
|
||||
cmd = kenv.cc + ['-MMD'] + cppflags + [f'-I{x}' for x in include_paths] + cflags
|
||||
cmd += ['-c', src] + ['-o', dest]
|
||||
key = CompileKey(original_src, os.path.basename(dest))
|
||||
desc = f'Compiling {emphasis(desc_prefix + src)} ...'
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue