Hans, here is what I'm playing with right now against current
trunk.

I looked at the use cases for making use of the scale factor in the
VIS %gsr register and it's used similar to how rounding modes are
modified in the FPU control register.

You have a function, or family of functions, that want to operate with
a certain scale factor.  And at the top level the first thing you do
is set the %gsr as you want it to be set.

So I've added a GSR register to the sparc backend and then added
__vis_write_gsr() and __vis_read_gsr() functions to facilitate the
use cases I've seen.

This allowed me to describe to the compiler exactly what the alignaddr
instructions do, and thus the unspecs for them are now gone.

The pack and faligndata intrinsics still need to be unspec, and thus I
merely added GSR uses to those patterns which is enough to let the
compiler get the dataflow right.

This all seems sufficient for what things like Sun's medialib and your
RAPP project want to do.

I'll look into your other suggestion in PR48974, namely making use of
fone VIS instructions.

Thanks.

diff --git a/gcc/config/sparc/sparc.c b/gcc/config/sparc/sparc.c
index d62d5a1..f38ecda 100644
--- a/gcc/config/sparc/sparc.c
+++ b/gcc/config/sparc/sparc.c
@@ -329,7 +329,7 @@ char leaf_reg_remap[] =
   72, 73, 74, 75, 76, 77, 78, 79,
   80, 81, 82, 83, 84, 85, 86, 87,
   88, 89, 90, 91, 92, 93, 94, 95,
-  96, 97, 98, 99, 100};
+  96, 97, 98, 99, 100, 101, 102};
 
 /* Vector, indexed by hard register number, which contains 1
    for a register that is allowable in a candidate for leaf
@@ -347,7 +347,7 @@ char sparc_leaf_regs[] =
   1, 1, 1, 1, 1, 1, 1, 1,
   1, 1, 1, 1, 1, 1, 1, 1,
   1, 1, 1, 1, 1, 1, 1, 1,
-  1, 1, 1, 1, 1};
+  1, 1, 1, 1, 1, 1, 1};
 
 struct GTY(()) machine_function
 {
@@ -4036,8 +4036,8 @@ static const int hard_32bit_mode_classes[] = {
   /* %fcc[0123] */
   CCFP_MODES, CCFP_MODES, CCFP_MODES, CCFP_MODES,
 
-  /* %icc */
-  CC_MODES
+  /* %icc, %sfp, %gsr */
+  CC_MODES, 0, S_MODES
 };
 
 static const int hard_64bit_mode_classes[] = {
@@ -4061,8 +4061,8 @@ static const int hard_64bit_mode_classes[] = {
   /* %fcc[0123] */
   CCFP_MODES, CCFP_MODES, CCFP_MODES, CCFP_MODES,
 
-  /* %icc */
-  CC_MODES
+  /* %icc, %sfp, %gsr */
+  CC_MODES, 0, S_MODES
 };
 
 int sparc_mode_class [NUM_MACHINE_MODES];
@@ -9168,14 +9168,18 @@ sparc_vis_init_builtins (void)
                                                      v4hi, v4hi, 0);
   tree si_ftype_v2si_v2si = build_function_type_list (intSI_type_node,
                                                      v2si, v2si, 0);
+  tree void_ftype_si = build_function_type_list (void_type_node,
+                                                intSI_type_node, 0);
+  tree si_ftype_void = build_function_type_list (intSI_type_node,
+                                                void_type_node, 0);
 
   /* Packing and expanding vectors.  */
-  def_builtin ("__builtin_vis_fpack16", CODE_FOR_fpack16_vis,
-              v4qi_ftype_v4hi);
-  def_builtin ("__builtin_vis_fpack32", CODE_FOR_fpack32_vis,
-              v8qi_ftype_v2si_v8qi);
-  def_builtin ("__builtin_vis_fpackfix", CODE_FOR_fpackfix_vis,
-              v2hi_ftype_v2si);
+  def_builtin_const ("__builtin_vis_fpack16", CODE_FOR_fpack16_vis,
+                    v4qi_ftype_v4hi);
+  def_builtin_const ("__builtin_vis_fpack32", CODE_FOR_fpack32_vis,
+                    v8qi_ftype_v2si_v8qi);
+  def_builtin_const ("__builtin_vis_fpackfix", CODE_FOR_fpackfix_vis,
+                    v2hi_ftype_v2si);
   def_builtin_const ("__builtin_vis_fexpand", CODE_FOR_fexpand_vis,
                     v4hi_ftype_v4qi);
   def_builtin_const ("__builtin_vis_fpmerge", CODE_FOR_fpmerge_vis,
@@ -9198,27 +9202,33 @@ sparc_vis_init_builtins (void)
                     v2si_ftype_v4qi_v2hi);
 
   /* Data aligning.  */
-  def_builtin ("__builtin_vis_faligndatav4hi", CODE_FOR_faligndatav4hi_vis,
-              v4hi_ftype_v4hi_v4hi);
-  def_builtin ("__builtin_vis_faligndatav8qi", CODE_FOR_faligndatav8qi_vis,
-              v8qi_ftype_v8qi_v8qi);
-  def_builtin ("__builtin_vis_faligndatav2si", CODE_FOR_faligndatav2si_vis,
-              v2si_ftype_v2si_v2si);
-  def_builtin ("__builtin_vis_faligndatadi", CODE_FOR_faligndatadi_vis,
-              di_ftype_di_di);
+  def_builtin_const ("__builtin_vis_faligndatav4hi", 
CODE_FOR_faligndatav4hi_vis,
+                    v4hi_ftype_v4hi_v4hi);
+  def_builtin_const ("__builtin_vis_faligndatav8qi", 
CODE_FOR_faligndatav8qi_vis,
+                    v8qi_ftype_v8qi_v8qi);
+  def_builtin_const ("__builtin_vis_faligndatav2si", 
CODE_FOR_faligndatav2si_vis,
+                    v2si_ftype_v2si_v2si);
+  def_builtin_const ("__builtin_vis_faligndatadi", CODE_FOR_faligndatadi_vis,
+                    di_ftype_di_di);
+
+  def_builtin ("__builtin_vis_write_gsr", CODE_FOR_wrgsr_vis,
+              void_ftype_si);
+  def_builtin ("__builtin_vis_read_gsr", CODE_FOR_rdgsr_vis,
+              si_ftype_void);
+
   if (TARGET_ARCH64)
     {
-      def_builtin ("__builtin_vis_alignaddr", CODE_FOR_alignaddrdi_vis,
-                  ptr_ftype_ptr_di);
-      def_builtin ("__builtin_vis_alignaddrl", CODE_FOR_alignaddrldi_vis,
-                  ptr_ftype_ptr_di);
+      def_builtin_const ("__builtin_vis_alignaddr", CODE_FOR_alignaddrdi_vis,
+                        ptr_ftype_ptr_di);
+      def_builtin_const ("__builtin_vis_alignaddrl", CODE_FOR_alignaddrldi_vis,
+                        ptr_ftype_ptr_di);
     }
   else
     {
-      def_builtin ("__builtin_vis_alignaddr", CODE_FOR_alignaddrsi_vis,
-                  ptr_ftype_ptr_si);
-      def_builtin ("__builtin_vis_alignaddrl", CODE_FOR_alignaddrlsi_vis,
-                  ptr_ftype_ptr_si);
+      def_builtin_const ("__builtin_vis_alignaddr", CODE_FOR_alignaddrsi_vis,
+                        ptr_ftype_ptr_si);
+      def_builtin_const ("__builtin_vis_alignaddrl", CODE_FOR_alignaddrlsi_vis,
+                        ptr_ftype_ptr_si);
     }
 
   /* Pixel distance.  */
@@ -9289,32 +9299,47 @@ sparc_expand_builtin (tree exp, rtx target,
   tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
   unsigned int icode = DECL_FUNCTION_CODE (fndecl);
   rtx pat, op[4];
-  enum machine_mode mode[4];
   int arg_count = 0;
+  bool nonvoid;
 
-  mode[0] = insn_data[icode].operand[0].mode;
-  if (!target
-      || GET_MODE (target) != mode[0]
-      || ! (*insn_data[icode].operand[0].predicate) (target, mode[0]))
-    op[0] = gen_reg_rtx (mode[0]);
-  else
-    op[0] = target;
+  nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
 
+  if (nonvoid)
+    {
+      enum machine_mode tmode = insn_data[icode].operand[0].mode;
+      if (!target
+         || GET_MODE (target) != tmode
+         || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
+       op[0] = gen_reg_rtx (tmode);
+      else
+       op[0] = target;
+    }
   FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
     {
+      const struct insn_operand_data *insn_op;
+
+      if (arg == error_mark_node)
+       return NULL_RTX;
+
       arg_count++;
-      mode[arg_count] = insn_data[icode].operand[arg_count].mode;
+      insn_op = &insn_data[icode].operand[arg_count - !nonvoid];
       op[arg_count] = expand_normal (arg);
 
       if (! (*insn_data[icode].operand[arg_count].predicate) (op[arg_count],
-                                                             mode[arg_count]))
-       op[arg_count] = copy_to_mode_reg (mode[arg_count], op[arg_count]);
+                                                             insn_op->mode))
+       op[arg_count] = copy_to_mode_reg (insn_op->mode, op[arg_count]);
     }
 
   switch (arg_count)
     {
+    case 0:
+      pat = GEN_FCN (icode) (op[0]);
+      break;
     case 1:
-      pat = GEN_FCN (icode) (op[0], op[1]);
+      if (nonvoid)
+       pat = GEN_FCN (icode) (op[0], op[1]);
+      else
+       pat = GEN_FCN (icode) (op[1]);
       break;
     case 2:
       pat = GEN_FCN (icode) (op[0], op[1], op[2]);
@@ -9331,7 +9356,10 @@ sparc_expand_builtin (tree exp, rtx target,
 
   emit_insn (pat);
 
-  return op[0];
+  if (nonvoid)
+    return op[0];
+  else
+    return const0_rtx;
 }
 
 static int
@@ -9416,7 +9444,8 @@ sparc_fold_builtin (tree fndecl, int n_args 
ATTRIBUTE_UNUSED,
 
   if (ignore
       && icode != CODE_FOR_alignaddrsi_vis
-      && icode != CODE_FOR_alignaddrdi_vis)
+      && icode != CODE_FOR_alignaddrdi_vis
+      && icode != CODE_FOR_wrgsr_vis)
     return build_zero_cst (rtype);
 
   switch (icode)
diff --git a/gcc/config/sparc/sparc.h b/gcc/config/sparc/sparc.h
index afdca1e..77eff2e 100644
--- a/gcc/config/sparc/sparc.h
+++ b/gcc/config/sparc/sparc.h
@@ -691,7 +691,7 @@ extern enum cmodel sparc_cmodel;
    Register 100 is used as the integer condition code register.
    Register 101 is used as the soft frame pointer register.  */
 
-#define FIRST_PSEUDO_REGISTER 102
+#define FIRST_PSEUDO_REGISTER 103
 
 #define SPARC_FIRST_FP_REG     32
 /* Additional V9 fp regs.  */
@@ -704,6 +704,7 @@ extern enum cmodel sparc_cmodel;
 #define SPARC_FCC_REG 96
 /* Integer CC reg.  We don't distinguish %icc from %xcc.  */
 #define SPARC_ICC_REG 100
+#define SPARC_GSR_REG 102
 
 /* Nonzero if REGNO is an fp reg.  */
 #define SPARC_FP_REG_P(REGNO) \
@@ -757,7 +758,7 @@ extern enum cmodel sparc_cmodel;
   0, 0, 0, 0, 0, 0, 0, 0,      \
   0, 0, 0, 0, 0, 0, 0, 0,      \
                                \
-  0, 0, 0, 0, 0, 1}
+  0, 0, 0, 0, 0, 1, 1}
 
 /* 1 for registers not available across function calls.
    These must include the FIXED_REGISTERS and also any
@@ -782,7 +783,7 @@ extern enum cmodel sparc_cmodel;
   1, 1, 1, 1, 1, 1, 1, 1,      \
   1, 1, 1, 1, 1, 1, 1, 1,      \
                                \
-  1, 1, 1, 1, 1, 1}
+  1, 1, 1, 1, 1, 1, 1}
 
 /* Return number of consecutive hard regs needed starting at reg REGNO
    to hold something of mode MODE.
@@ -796,11 +797,12 @@ extern enum cmodel sparc_cmodel;
    included in the hard register count).  */
 
 #define HARD_REGNO_NREGS(REGNO, MODE) \
-  (TARGET_ARCH64                                                       \
-   ? ((REGNO) < 32 || (REGNO) == FRAME_POINTER_REGNUM                  \
-      ? (GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD   \
-      : (GET_MODE_SIZE (MODE) + 3) / 4)                                        
\
-   : ((GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD))
+  ((REGNO) == SPARC_GSR_REG ? 1 :                                      \
+   (TARGET_ARCH64                                                      \
+    ? ((REGNO) < 32 || (REGNO) == FRAME_POINTER_REGNUM                 \
+       ? (GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD  \
+       : (GET_MODE_SIZE (MODE) + 3) / 4)                               \
+    : ((GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD)))
 
 /* Due to the ARCH64 discrepancy above we must override this next
    macro too.  */
@@ -985,7 +987,7 @@ enum reg_class { NO_REGS, FPCC_REGS, I64_REGS, 
GENERAL_REGS, FP_REGS,
    {0, -1, -1, 0},     /* EXTRA_FP_REGS */             \
    {-1, -1, 0, 0x20},  /* GENERAL_OR_FP_REGS */        \
    {-1, -1, -1, 0x20}, /* GENERAL_OR_EXTRA_FP_REGS */  \
-   {-1, -1, -1, 0x3f}} /* ALL_REGS */
+   {-1, -1, -1, 0x7f}} /* ALL_REGS */
 
 /* The same information, inverted:
    Return the class number of the smallest class containing
@@ -1046,7 +1048,7 @@ extern enum reg_class 
sparc_regno_reg_class[FIRST_PSEUDO_REGISTER];
   88, 89, 90, 91, 92, 93, 94, 95,      /* %f56-%f63 */ \
   39, 38, 37, 36, 35, 34, 33, 32,      /* %f7-%f0 */   \
   96, 97, 98, 99,                      /* %fcc0-3 */   \
-  100, 0, 14, 30, 101}                 /* %icc, %g0, %o6, %i6, %sfp */
+  100, 0, 14, 30, 101, 102 }           /* %icc, %g0, %o6, %i6, %sfp, %gsr */
 
 /* This is the order in which to allocate registers for
    leaf functions.  If all registers can fit in the global and
@@ -1085,7 +1087,7 @@ extern enum reg_class 
sparc_regno_reg_class[FIRST_PSEUDO_REGISTER];
   88, 89, 90, 91, 92, 93, 94, 95,      /* %f56-%f63 */ \
   39, 38, 37, 36, 35, 34, 33, 32,      /* %f7-%f0 */   \
   96, 97, 98, 99,                      /* %fcc0-3 */   \
-  100, 0, 14, 30, 31, 101}             /* %icc, %g0, %o6, %i6, %i7, %sfp */
+  100, 0, 14, 30, 31, 101, 102 }       /* %icc, %g0, %o6, %i6, %i7, %sfp, %gsr 
*/
 
 #define ADJUST_REG_ALLOC_ORDER order_regs_for_local_alloc ()
 
@@ -1724,7 +1726,7 @@ do {                                                      
                   \
  "%f40", "%f41", "%f42", "%f43", "%f44", "%f45", "%f46", "%f47",       \
  "%f48", "%f49", "%f50", "%f51", "%f52", "%f53", "%f54", "%f55",       \
  "%f56", "%f57", "%f58", "%f59", "%f60", "%f61", "%f62", "%f63",       \
- "%fcc0", "%fcc1", "%fcc2", "%fcc3", "%icc", "%sfp" }
+ "%fcc0", "%fcc1", "%fcc2", "%fcc3", "%icc", "%sfp", "%gsr" }
 
 /* Define additional names for use in asm clobbers and asm declarations.  */
 
diff --git a/gcc/config/sparc/sparc.md b/gcc/config/sparc/sparc.md
index 588caf3..200846e 100644
--- a/gcc/config/sparc/sparc.md
+++ b/gcc/config/sparc/sparc.md
@@ -58,7 +58,7 @@
    (UNSPEC_MUL8UL              46)
    (UNSPEC_MULDUL              47)
    (UNSPEC_ALIGNDATA           48)
-   (UNSPEC_ALIGNADDR           49)
+
    (UNSPEC_PDIST               50)
    (UNSPEC_EDGE8               51)
    (UNSPEC_EDGE8L              52)
@@ -66,7 +66,6 @@
    (UNSPEC_EDGE16L             54)
    (UNSPEC_EDGE32              55)
    (UNSPEC_EDGE32L             56)
-   (UNSPEC_ALIGNADDRL          57)
 
    (UNSPEC_SP_SET              60)
    (UNSPEC_SP_TEST             61)
@@ -176,6 +175,7 @@
   (FCC3_REG                    99)
   (CC_REG                      100)
   (SFP_REG                     101)
+  (GSR_REG                     102)
  ])
 
 (define_mode_iterator P [(SI "Pmode == SImode") (DI "Pmode == DImode")])
@@ -7752,7 +7752,8 @@
 (define_insn "fpack16_vis"
   [(set (match_operand:V4QI 0 "register_operand" "=f")
         (unspec:V4QI [(match_operand:V4HI 1 "register_operand" "e")]
-                     UNSPEC_FPACK16))]
+                     UNSPEC_FPACK16))
+   (use (reg:SI GSR_REG))]
   "TARGET_VIS"
   "fpack16\t%1, %0"
   [(set_attr "type" "fga")
@@ -7761,7 +7762,8 @@
 (define_insn "fpackfix_vis"
   [(set (match_operand:V2HI 0 "register_operand" "=f")
         (unspec:V2HI [(match_operand:V2SI 1 "register_operand" "e")]
-                     UNSPEC_FPACKFIX))]
+                     UNSPEC_FPACKFIX))
+   (use (reg:SI GSR_REG))]
   "TARGET_VIS"
   "fpackfix\t%1, %0"
   [(set_attr "type" "fga")
@@ -7771,7 +7773,8 @@
   [(set (match_operand:V8QI 0 "register_operand" "=e")
         (unspec:V8QI [(match_operand:V2SI 1 "register_operand" "e")
                      (match_operand:V8QI 2 "register_operand" "e")]
-                     UNSPEC_FPACK32))]
+                     UNSPEC_FPACK32))
+   (use (reg:SI GSR_REG))]
   "TARGET_VIS"
   "fpack32\t%1, %2, %0"
   [(set_attr "type" "fga")
@@ -7871,6 +7874,18 @@
   [(set_attr "type" "fpmul")
    (set_attr "fptype" "double")])
 
+(define_insn "wrgsr_vis"
+  [(set (reg:SI GSR_REG) (match_operand:SI 0 "arith_operand" "rI"))]
+  "TARGET_VIS"
+  "wr\t%%g0, %0, %%gsr"
+  [(set_attr "type" "multi")])
+
+(define_insn "rdgsr_vis"
+  [(set (match_operand:SI 0 "register_operand" "=r") (reg:SI GSR_REG))]
+  "TARGET_VIS"
+  "rd\t%%gsr, %0"
+  [(set_attr "type" "multi")])
+
 ;; Using faligndata only makes sense after an alignaddr since the choice of
 ;; bytes to take out of each operand is dependent on the results of the last
 ;; alignaddr.
@@ -7878,25 +7893,57 @@
   [(set (match_operand:V64I 0 "register_operand" "=e")
         (unspec:V64I [(match_operand:V64I 1 "register_operand" "e")
                       (match_operand:V64I 2 "register_operand" "e")]
-         UNSPEC_ALIGNDATA))]
+         UNSPEC_ALIGNDATA))
+   (use (reg:SI GSR_REG))]
   "TARGET_VIS"
   "faligndata\t%1, %2, %0"
   [(set_attr "type" "fga")
    (set_attr "fptype" "double")])
 
-(define_insn "alignaddr<P:mode>_vis"
-  [(set (match_operand:P 0 "register_operand" "=r")
-        (unspec:P [(match_operand:P 1 "register_or_zero_operand" "rJ")
-                   (match_operand:P 2 "register_or_zero_operand" "rJ")]
-         UNSPEC_ALIGNADDR))]
+(define_insn "alignaddrsi_vis"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (plus:SI (match_operand:SI 1 "register_or_zero_operand" "rJ")
+                 (match_operand:SI 2 "register_or_zero_operand" "rJ")))
+   (set (reg:SI GSR_REG)
+        (ior:SI (and:SI (reg:SI GSR_REG) (const_int -8))
+                (and:SI (plus:SI (match_dup 1) (match_dup 2))
+                        (const_int 7))))]
   "TARGET_VIS"
   "alignaddr\t%r1, %r2, %0")
 
-(define_insn "alignaddrl<P:mode>_vis"
-  [(set (match_operand:P 0 "register_operand" "=r")
-        (unspec:P [(match_operand:P 1 "register_or_zero_operand" "rJ")
-                   (match_operand:P 2 "register_or_zero_operand" "rJ")]
-         UNSPEC_ALIGNADDRL))]
+(define_insn "alignaddrdi_vis"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (plus:DI (match_operand:DI 1 "register_or_zero_operand" "rJ")
+                 (match_operand:DI 2 "register_or_zero_operand" "rJ")))
+   (set (reg:SI GSR_REG)
+        (ior:SI (and:SI (reg:SI GSR_REG) (const_int -8))
+                (and:SI (truncate:SI (plus:DI (match_dup 1) (match_dup 2)))
+                        (const_int 7))))]
+  "TARGET_VIS"
+  "alignaddr\t%r1, %r2, %0")
+
+(define_insn "alignaddrlsi_vis"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (plus:SI (match_operand:SI 1 "register_or_zero_operand" "rJ")
+                 (match_operand:SI 2 "register_or_zero_operand" "rJ")))
+   (set (reg:SI GSR_REG)
+        (ior:SI (and:SI (reg:SI GSR_REG) (const_int -8))
+                (xor:SI (and:SI (plus:SI (match_dup 1) (match_dup 2))
+                                (const_int 7))
+                        (const_int 7))))]
+  "TARGET_VIS"
+  "alignaddrl\t%r1, %r2, %0")
+
+(define_insn "alignaddrldi_vis"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+        (plus:DI (match_operand:DI 1 "register_or_zero_operand" "rJ")
+                 (match_operand:DI 2 "register_or_zero_operand" "rJ")))
+   (set (reg:SI GSR_REG)
+        (ior:SI (and:SI (reg:SI GSR_REG) (const_int -8))
+                (xor:SI (and:SI (truncate:SI (plus:DI (match_dup 1)
+                                                      (match_dup 2)))
+                                (const_int 7))
+                        (const_int 7))))]
   "TARGET_VIS"
   "alignaddrl\t%r1, %r2, %0")
 
diff --git a/gcc/config/sparc/visintrin.h b/gcc/config/sparc/visintrin.h
index 4c2fa18..37c1113 100644
--- a/gcc/config/sparc/visintrin.h
+++ b/gcc/config/sparc/visintrin.h
@@ -31,6 +31,20 @@ typedef unsigned char __v8qi __attribute__ ((__vector_size__ 
(8)));
 typedef unsigned char __v4qi __attribute__ ((__vector_size__ (4)));
 typedef int __i64 __attribute__ ((__mode__ (DI)));
 
+extern __inline void
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+__vis_write_gsr (int __A)
+{
+  __builtin_vis_write_gsr (__A);
+}
+
+extern __inline int
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+__vis_read_gsr (void)
+{
+  return __builtin_vis_read_gsr ();
+}
+
 extern __inline void *
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 __vis_alignaddr (void *__A, long __B)

Reply via email to